diff --git a/expts/neurips2023_configs/base_config/large.yaml b/expts/neurips2023_configs/base_config/large.yaml new file mode 100644 index 000000000..da9760ab3 --- /dev/null +++ b/expts/neurips2023_configs/base_config/large.yaml @@ -0,0 +1,423 @@ +# @package _global_ + +constants: + seed: &seed 42 + raise_train_error: true # Whether the code should raise an error if it crashes during training + entity: multitask-gnn + +accelerator: + type: ipu # cpu or ipu or gpu + config_override: + datamodule: + args: + ipu_dataloader_training_opts: + mode: async + max_num_nodes_per_graph: 30 # train max nodes: 20, max_edges: 54 + max_num_edges_per_graph: 100 + ipu_dataloader_inference_opts: + mode: async + max_num_nodes_per_graph: 35 # valid max nodes: 51, max_edges: 118 + max_num_edges_per_graph: 100 + # Data handling-related + batch_size_training: 30 + batch_size_inference: 30 + predictor: + metrics_every_n_train_steps: 1000 + optim_kwargs: + loss_scaling: 1024 + trainer: + trainer: + precision: 16-true + accumulate_grad_batches: 2 + + ipu_config: + - deviceIterations(30) # IPU would require large batches to be ready for the model. + - replicationFactor(16) + # - enableProfiling("graph_analyser") # The folder where the profile will be stored + # - enableExecutableCaching("pop_compiler_cache") + - TensorLocations.numIOTiles(128) + - _Popart.set("defaultBufferingDepth", 96) + - Precision.enableStochasticRounding(True) + # - Precision.enableFloatingPointExceptions(True) + + ipu_inference_config: + # set device iteration and replication factor to 1 during inference + # gradient accumulation was set to 1 in the code + - deviceIterations(1) + - replicationFactor(1) + - Precision.enableStochasticRounding(False) + +# accelerator: +# type: cpu # cpu or ipu or gpu +# config_override: +# datamodule: +# args: +# batch_size_training: 64 +# batch_size_inference: 256 +# trainer: +# trainer: +# precision: 32 +# accumulate_grad_batches: 1 + +datamodule: + module_type: "MultitaskFromSmilesDataModule" + # module_type: "FakeDataModule" # Option to use generated data + args: # Matches that in the test_multitask_datamodule.py case. + task_specific_args: # To be replaced by a new class "DatasetParams" + l1000_vcap: + df: null + df_path: graphium/data/neurips2023/large-dataset/LINCS_L1000_VCAP_0-4.csv.gz + # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz + # or set path as the URL directly + smiles_col: "SMILES" + label_cols: geneID-* # geneID-* means all columns starting with "geneID-" + # sample_size: 2000 # use sample_size for test + task_level: graph + splits_path: graphium/data/neurips2023/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` + epoch_sampling_fraction: 1.0 + + l1000_mcf7: + df: null + df_path: graphium/data/neurips2023/large-dataset/LINCS_L1000_MCF7_0-4.csv.gz + # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz + # or set path as the URL directly + smiles_col: "SMILES" + label_cols: geneID-* # geneID-* means all columns starting with "geneID-" + # sample_size: 2000 # use sample_size for test + task_level: graph + splits_path: graphium/data/neurips2023/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` + epoch_sampling_fraction: 1.0 + + pcba_1328: + df: null + df_path: graphium/data/neurips2023/large-dataset/PCBA_1328_1564k.parquet + # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet + # or set path as the URL directly + smiles_col: "SMILES" + label_cols: assayID-* # assayID-* means all columns starting with "assayID-" + # sample_size: 2000 # use sample_size for test + task_level: graph + splits_path: graphium/data/neurips2023/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` + epoch_sampling_fraction: 1.0 + + pcqm4m_g25: + df: null + df_path: graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet + # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # or set path as the URL directly + smiles_col: "ordered_smiles" + label_cols: graph_* # graph_* means all columns starting with "graph_" + # sample_size: 2000 # use sample_size for test + task_level: graph + splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + label_normalization: + normalize_val_test: True + method: "normal" + epoch_sampling_fraction: 1.0 + + pcqm4m_n4: + df: null + df_path: graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet + # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # or set path as the URL directly + smiles_col: "ordered_smiles" + label_cols: node_* # node_* means all columns starting with "node_" + # sample_size: 2000 # use sample_size for test + task_level: node + splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + seed: *seed + label_normalization: + normalize_val_test: True + method: "normal" + epoch_sampling_fraction: 1.0 + + # Featurization + prepare_dict_or_graph: pyg:graph + featurization_n_jobs: 30 + featurization_progress: True + featurization_backend: "loky" + processed_graph_data_path: "../datacache/neurips2023-large/" + featurization: + # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence), + # 'possible_number_radical_e', 'possible_is_aromatic', 'possible_is_in_ring', + # 'num_chiral_centers (not included yet)'] + atom_property_list_onehot: [atomic-number, group, period, total-valence] + atom_property_list_float: [degree, formal-charge, radical-electron, aromatic, in-ring] + # OGB: ['possible_bond_type', 'possible_bond_stereo', 'possible_is_in_ring'] + edge_property_list: [bond-type-onehot, stereo, in-ring] + add_self_loop: False + explicit_H: False # if H is included + use_bonds_weights: False + pos_encoding_as_features: # encoder dropout 0.18 + pos_types: + lap_eigvec: + pos_level: node + pos_type: laplacian_eigvec + num_pos: 8 + normalization: "none" # nomrlization already applied on the eigen vectors + disconnected_comp: True # if eigen values/vector for disconnected graph are included + lap_eigval: + pos_level: node + pos_type: laplacian_eigval + num_pos: 8 + normalization: "none" # nomrlization already applied on the eigen vectors + disconnected_comp: True # if eigen values/vector for disconnected graph are included + rw_pos: # use same name as pe_encoder + pos_level: node + pos_type: rw_return_probs + ksteps: 16 + + # cache_data_path: . + num_workers: 32 # -1 to use all + persistent_workers: True # if use persistent worker at the start of each epoch. + # Using persistent_workers false might make the start of each epoch very long. + + +architecture: + model_type: FullGraphMultiTaskNetwork + mup_base_path: null + pre_nn: # Set as null to avoid a pre-nn network + out_dim: 64 + hidden_dims: 256 + depth: 2 + activation: relu + last_activation: none + dropout: &dropout 0.1 + normalization: &normalization layer_norm + last_normalization: *normalization + residual_type: none + + pre_nn_edges: null + + pe_encoders: + out_dim: 32 + pool: "sum" #"mean" "max" + last_norm: None #"batch_norm", "layer_norm" + encoders: #la_pos | rw_pos + la_pos: # Set as null to avoid a pre-nn network + encoder_type: "laplacian_pe" + input_keys: ["laplacian_eigvec", "laplacian_eigval"] + output_keys: ["feat"] + hidden_dim: 64 + out_dim: 32 + model_type: 'DeepSet' #'Transformer' or 'DeepSet' + num_layers: 2 + num_layers_post: 1 # Num. layers to apply after pooling + dropout: 0.1 + first_normalization: "none" #"batch_norm" or "layer_norm" + rw_pos: + encoder_type: "mlp" + input_keys: ["rw_return_probs"] + output_keys: ["feat"] + hidden_dim: 64 + out_dim: 32 + num_layers: 2 + dropout: 0.1 + normalization: "layer_norm" #"batch_norm" or "layer_norm" + first_normalization: "layer_norm" #"batch_norm" or "layer_norm" + + + + gnn: # Set as null to avoid a post-nn network + in_dim: 64 # or otherwise the correct value + out_dim: &gnn_dim 768 + hidden_dims: *gnn_dim + depth: 4 + activation: gelu + last_activation: none + dropout: 0.1 + normalization: "layer_norm" + last_normalization: *normalization + residual_type: simple + virtual_node: 'none' + + + + graph_output_nn: + graph: + pooling: [sum] + out_dim: *gnn_dim + hidden_dims: *gnn_dim + depth: 1 + activation: relu + last_activation: none + dropout: *dropout + normalization: *normalization + last_normalization: "none" + residual_type: none + node: + pooling: [sum] + out_dim: *gnn_dim + hidden_dims: *gnn_dim + depth: 1 + activation: relu + last_activation: none + dropout: *dropout + normalization: *normalization + last_normalization: "none" + residual_type: none + + task_heads: + l1000_vcap: + task_level: graph + out_dim: 4890 + hidden_dims: 128 + depth: 2 + activation: none + last_activation: none + dropout: *dropout + normalization: *normalization + last_normalization: "none" + residual_type: none + l1000_mcf7: + task_level: graph + out_dim: 4890 + hidden_dims: 128 + depth: 2 + activation: none + last_activation: none + dropout: *dropout + normalization: *normalization + last_normalization: "none" + residual_type: none + pcba_1328: + task_level: graph + out_dim: 1328 + hidden_dims: 64 + depth: 2 + activation: relu + last_activation: none + dropout: *dropout + normalization: *normalization + last_normalization: "none" + residual_type: none + pcqm4m_g25: + task_level: graph + out_dim: 25 + hidden_dims: 32 + depth: 2 + activation: relu + last_activation: none + dropout: *dropout + normalization: *normalization + last_normalization: "none" + residual_type: none + pcqm4m_n4: + task_level: node + out_dim: 4 + hidden_dims: 32 + depth: 2 + activation: relu + last_activation: none + dropout: *dropout + normalization: *normalization + last_normalization: "none" + residual_type: none + +#Task-specific +predictor: + metrics_on_progress_bar: + l1000_vcap: [] + l1000_mcf7: [] + pcba_1328: [] + pcqm4m_g25: [] + pcqm4m_n4: [] + metrics_on_training_set: + l1000_vcap: [] + l1000_mcf7: [] + pcba_1328: [] + pcqm4m_g25: [] + pcqm4m_n4: [] + loss_fun: + l1000_vcap: + name: hybrid_ce_ipu + n_brackets: 5 + alpha: 0.5 + l1000_mcf7: + name: hybrid_ce_ipu + n_brackets: 5 + alpha: 0.5 + pcba_1328: bce_logits_ipu + pcqm4m_g25: mae_ipu + pcqm4m_n4: mae_ipu + random_seed: *seed + optim_kwargs: + lr: 1.e-4 # warmup can be scheduled using torch_scheduler_kwargs + # weight_decay: 1.e-7 + torch_scheduler_kwargs: + module_type: WarmUpLinearLR + max_num_epochs: &max_epochs 100 + warmup_epochs: 10 + verbose: False + scheduler_kwargs: + # monitor: &monitor qm9/mae/train + # mode: min + # frequency: 1 + target_nan_mask: null # null: no mask, 0: 0 mask, ignore-flatten, ignore-mean-per-label + multitask_handling: flatten # flatten, mean-per-label + +# Task-specific +metrics: + l1000_vcap: &classif_metrics + - name: auroc + metric: auroc_ipu + num_classes: 5 + task: multiclass + multitask_handling: mean-per-label + threshold_kwargs: null + - name: avpr + metric: average_precision_ipu + num_classes: 5 + task: multiclass + target_to_int: True + target_nan_mask: -1000 + ignore_index: -1000 + multitask_handling: mean-per-label + threshold_kwargs: null + l1000_mcf7: *classif_metrics + pcba_1328: + - name: auroc + metric: auroc_ipu + task: binary + multitask_handling: mean-per-label + threshold_kwargs: null + - name: avpr + metric: average_precision_ipu + task: binary + multitask_handling: mean-per-label + threshold_kwargs: null + pcqm4m_g25: &pcqm_metrics + - name: mae + metric: mae_ipu + target_nan_mask: null + multitask_handling: mean-per-label + threshold_kwargs: null + - name: pearsonr + metric: pearsonr_ipu + threshold_kwargs: null + target_nan_mask: null + multitask_handling: mean-per-label + - name: r2 + metric: r2_score_ipu + threshold_kwargs: null + target_nan_mask: null + multitask_handling: mean-per-label + pcqm4m_n4: *pcqm_metrics + +trainer: + seed: *seed + logger: + save_dir: logs/neurips2023-large/ + name: ${constants.name} + project: ${constants.name} + model_checkpoint: + dirpath: models_checkpoints/${constants.name}/ + filename: ${constants.name} + # monitor: *monitor + # mode: *mode + # save_top_k: 1 + save_last: True + trainer: + max_epochs: *max_epochs + min_epochs: 1 + check_val_every_n_epoch: 20 diff --git a/expts/neurips2023_configs/base_config/small.yaml b/expts/neurips2023_configs/base_config/small.yaml new file mode 100644 index 000000000..2e63477a1 --- /dev/null +++ b/expts/neurips2023_configs/base_config/small.yaml @@ -0,0 +1,343 @@ +# @package _global_ + +constants: + seed: &seed 42 + raise_train_error: true # Whether the code should raise an error if it crashes during training + entity: multitask-gnn + +accelerator: + type: ipu # cpu or ipu or gpu + config_override: + datamodule: + args: + ipu_dataloader_training_opts: + mode: async + max_num_nodes_per_graph: 44 # train max nodes: 20, max_edges: 54 + max_num_edges_per_graph: 80 + ipu_dataloader_inference_opts: + mode: async + max_num_nodes_per_graph: 44 # valid max nodes: 51, max_edges: 118 + max_num_edges_per_graph: 80 + # Data handling-related + batch_size_training: 50 + batch_size_inference: 50 + predictor: + optim_kwargs: + loss_scaling: 1024 + trainer: + trainer: + precision: 16 + accumulate_grad_batches: 4 + + ipu_config: + - deviceIterations(5) # IPU would require large batches to be ready for the model. + - replicationFactor(16) + # - enableProfiling("graph_analyser") # The folder where the profile will be stored + # - enableExecutableCaching("pop_compiler_cache") + - TensorLocations.numIOTiles(128) + - _Popart.set("defaultBufferingDepth", 128) + - Precision.enableStochasticRounding(True) + +# accelerator: +# type: cpu # cpu or ipu or gpu +# config_override: +# datamodule: +# batch_size_training: 64 +# batch_size_inference: 256 +# trainer: +# trainer: +# precision: 32 +# accumulate_grad_batches: 1 + +datamodule: + module_type: "MultitaskFromSmilesDataModule" + # module_type: "FakeDataModule" # Option to use generated data + args: # Matches that in the test_multitask_datamodule.py case. + task_specific_args: # To be replaced by a new class "DatasetParams" + qm9: + df: null + df_path: data/neurips2023/small-dataset/qm9.csv.gz + # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/qm9.csv.gz + # or set path as the URL directly + smiles_col: "smiles" + label_cols: ["A", "B", "C", "mu", "alpha", "homo", "lumo", "gap", "r2", "zpve", "u0", "u298", "h298", "g298", "cv", "u0_atom", "u298_atom", "h298_atom", "g298_atom"] + # sample_size: 2000 # use sample_size for test + splits_path: data/neurips2023/small-dataset/qm9_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/qm9_random_splits.pt` + seed: *seed + task_level: graph + label_normalization: + normalize_val_test: True + method: "normal" + + tox21: + df: null + df_path: data/neurips2023/small-dataset/Tox21-7k-12-labels.csv.gz + # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/Tox21-7k-12-labels.csv.gz + # or set path as the URL directly + smiles_col: "smiles" + label_cols: ["NR-AR", "NR-AR-LBD", "NR-AhR", "NR-Aromatase", "NR-ER", "NR-ER-LBD", "NR-PPAR-gamma", "SR-ARE", "SR-ATAD5", "SR-HSE", "SR-MMP", "SR-p53"] + # sample_size: 2000 # use sample_size for test + splits_path: data/neurips2023/small-dataset/Tox21_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/Tox21_random_splits.pt` + seed: *seed + task_level: graph + + zinc: + df: null + df_path: data/neurips2023/small-dataset/ZINC12k.csv.gz + # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/ZINC12k.csv.gz + # or set path as the URL directly + smiles_col: "smiles" + label_cols: ["SA", "logp", "score"] + # sample_size: 2000 # use sample_size for test + splits_path: data/neurips2023/small-dataset/ZINC12k_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/ZINC12k_random_splits.pt` + seed: *seed + task_level: graph + label_normalization: + normalize_val_test: True + method: "normal" + + # Featurization + prepare_dict_or_graph: pyg:graph + featurization_n_jobs: 30 + featurization_progress: True + featurization_backend: "loky" + processed_graph_data_path: "../datacache/neurips2023-small/" + featurization: + # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence), + # 'possible_number_radical_e', 'possible_is_aromatic', 'possible_is_in_ring', + # 'num_chiral_centers (not included yet)'] + atom_property_list_onehot: [atomic-number, group, period, total-valence] + atom_property_list_float: [degree, formal-charge, radical-electron, aromatic, in-ring] + # OGB: ['possible_bond_type', 'possible_bond_stereo', 'possible_is_in_ring'] + edge_property_list: [bond-type-onehot, stereo, in-ring] + add_self_loop: False + explicit_H: False # if H is included + use_bonds_weights: False + pos_encoding_as_features: # encoder dropout 0.18 + pos_types: + lap_eigvec: + pos_level: node + pos_type: laplacian_eigvec + num_pos: 8 + normalization: "none" # nomrlization already applied on the eigen vectors + disconnected_comp: True # if eigen values/vector for disconnected graph are included + lap_eigval: + pos_level: node + pos_type: laplacian_eigval + num_pos: 8 + normalization: "none" # nomrlization already applied on the eigen vectors + disconnected_comp: True # if eigen values/vector for disconnected graph are included + rw_pos: # use same name as pe_encoder + pos_level: node + pos_type: rw_return_probs + ksteps: 16 + + # cache_data_path: . + num_workers: 30 # -1 to use all + persistent_workers: False # if use persistent worker at the start of each epoch. + # Using persistent_workers false might make the start of each epoch very long. + + +architecture: + model_type: FullGraphMultiTaskNetwork + mup_base_path: null + pre_nn: # Set as null to avoid a pre-nn network + out_dim: 64 + hidden_dims: 256 + depth: 2 + activation: relu + last_activation: none + dropout: &dropout 0.18 + normalization: &normalization layer_norm + last_normalization: *normalization + residual_type: none + + pre_nn_edges: null # Set as null to avoid a pre-nn network + + pe_encoders: + out_dim: 32 + pool: "sum" #"mean" "max" + last_norm: None #"batch_norm", "layer_norm" + encoders: #la_pos | rw_pos + la_pos: # Set as null to avoid a pre-nn network + encoder_type: "laplacian_pe" + input_keys: ["laplacian_eigvec", "laplacian_eigval"] + output_keys: ["feat"] + hidden_dim: 64 + out_dim: 32 + model_type: 'DeepSet' #'Transformer' or 'DeepSet' + num_layers: 2 + num_layers_post: 1 # Num. layers to apply after pooling + dropout: 0.1 + first_normalization: "none" #"batch_norm" or "layer_norm" + rw_pos: + encoder_type: "mlp" + input_keys: ["rw_return_probs"] + output_keys: ["feat"] + hidden_dim: 64 + out_dim: 32 + num_layers: 2 + dropout: 0.1 + normalization: "layer_norm" #"batch_norm" or "layer_norm" + first_normalization: "layer_norm" #"batch_norm" or "layer_norm" + + + + gnn: # Set as null to avoid a post-nn network + in_dim: 64 # or otherwise the correct value + out_dim: &gnn_dim 96 + hidden_dims: *gnn_dim + depth: 4 + activation: gelu + last_activation: none + dropout: 0.1 + normalization: "layer_norm" + last_normalization: *normalization + residual_type: simple + virtual_node: 'none' + layer_kwargs: null # Parameters for the model itself. You could define dropout_attn: 0.1 + + + graph_output_nn: + graph: + pooling: [sum] + out_dim: *gnn_dim + hidden_dims: *gnn_dim + depth: 1 + activation: relu + last_activation: none + dropout: *dropout + normalization: *normalization + last_normalization: "none" + residual_type: none + + task_heads: + qm9: + task_level: graph + out_dim: 19 + hidden_dims: 128 + depth: 2 + activation: relu + last_activation: none + dropout: *dropout + normalization: *normalization + last_normalization: "none" + residual_type: none + tox21: + task_level: graph + out_dim: 12 + hidden_dims: 64 + depth: 2 + activation: relu + last_activation: sigmoid + dropout: *dropout + normalization: *normalization + last_normalization: "none" + residual_type: none + zinc: + task_level: graph + out_dim: 3 + hidden_dims: 32 + depth: 2 + activation: relu + last_activation: none + dropout: *dropout + normalization: *normalization + last_normalization: "none" + residual_type: none + +#Task-specific +predictor: + metrics_on_progress_bar: + qm9: ["mae"] + tox21: ["auroc"] + zinc: ["mae"] + loss_fun: + qm9: mae_ipu + tox21: bce_ipu + zinc: mae_ipu + random_seed: *seed + optim_kwargs: + lr: 4.e-5 # warmup can be scheduled using torch_scheduler_kwargs + # weight_decay: 1.e-7 + torch_scheduler_kwargs: + module_type: WarmUpLinearLR + max_num_epochs: &max_epochs 100 + warmup_epochs: 10 + verbose: False + scheduler_kwargs: + # monitor: &monitor qm9/mae/train + # mode: min + # frequency: 1 + target_nan_mask: null # null: no mask, 0: 0 mask, ignore-flatten, ignore-mean-per-label + multitask_handling: flatten # flatten, mean-per-label + +# Task-specific +metrics: + qm9: &qm9_metrics + - name: mae + metric: mae_ipu + target_nan_mask: null + multitask_handling: flatten + threshold_kwargs: null + - name: pearsonr + metric: pearsonr_ipu + threshold_kwargs: null + target_nan_mask: null + multitask_handling: mean-per-label + - name: r2_score + metric: r2_score_ipu + target_nan_mask: null + multitask_handling: mean-per-label + threshold_kwargs: null + tox21: + - name: auroc + metric: auroc_ipu + task: binary + multitask_handling: mean-per-label + threshold_kwargs: null + - name: avpr + metric: average_precision_ipu + task: binary + multitask_handling: mean-per-label + threshold_kwargs: null + - name: f1 > 0.5 + metric: f1 + multitask_handling: mean-per-label + target_to_int: True + num_classes: 2 + average: micro + threshold_kwargs: &threshold_05 + operator: greater + threshold: 0.5 + th_on_preds: True + th_on_target: True + - name: precision > 0.5 + metric: precision + multitask_handling: mean-per-label + average: micro + threshold_kwargs: *threshold_05 + zinc: *qm9_metrics + +trainer: + seed: *seed + logger: + save_dir: logs/neurips2023-small/ + name: ${constants.name} + project: ${constants.name} + #early_stopping: + # monitor: *monitor + # min_delta: 0 + # patience: 10 + # mode: &mode min + model_checkpoint: + dirpath: models_checkpoints/${constants.name}/ + filename: ${constants.name} + # monitor: *monitor + # mode: *mode + # save_top_k: 1 + save_last: True + trainer: + max_epochs: *max_epochs + min_epochs: 1 + check_val_every_n_epoch: 20 diff --git a/expts/neurips2023_configs/config_large_gcn.yaml b/expts/neurips2023_configs/config_large_gcn.yaml index 033b8a5f5..1dc397998 100644 --- a/expts/neurips2023_configs/config_large_gcn.yaml +++ b/expts/neurips2023_configs/config_large_gcn.yaml @@ -1,424 +1,12 @@ # Running the gcn model with the largemix dataset on IPU. -constants: - name: &name neurips2023_large_data_gcn - seed: &seed 42 - raise_train_error: true # Whether the code should raise an error if it crashes during training - entity: multitask-gnn - -accelerator: - type: ipu # cpu or ipu or gpu - config_override: - datamodule: - args: - ipu_dataloader_training_opts: - mode: async - max_num_nodes_per_graph: 30 # train max nodes: 20, max_edges: 54 - max_num_edges_per_graph: 100 - ipu_dataloader_inference_opts: - mode: async - max_num_nodes_per_graph: 35 # valid max nodes: 51, max_edges: 118 - max_num_edges_per_graph: 100 - # Data handling-related - batch_size_training: 30 - batch_size_inference: 30 - predictor: - metrics_every_n_train_steps: 1000 - optim_kwargs: - loss_scaling: 1024 - trainer: - trainer: - precision: 16-true - accumulate_grad_batches: 2 - - ipu_config: - - deviceIterations(30) # IPU would require large batches to be ready for the model. - - replicationFactor(16) - # - enableProfiling("graph_analyser") # The folder where the profile will be stored - # - enableExecutableCaching("pop_compiler_cache") - - TensorLocations.numIOTiles(128) - - _Popart.set("defaultBufferingDepth", 96) - - Precision.enableStochasticRounding(True) - # - Precision.enableFloatingPointExceptions(True) - - ipu_inference_config: - # set device iteration and replication factor to 1 during inference - # gradient accumulation was set to 1 in the code - - deviceIterations(1) - - replicationFactor(1) - - Precision.enableStochasticRounding(False) - -# accelerator: -# type: cpu # cpu or ipu or gpu -# config_override: -# datamodule: -# args: -# batch_size_training: 64 -# batch_size_inference: 256 -# trainer: -# trainer: -# precision: 32 -# accumulate_grad_batches: 1 - -datamodule: - module_type: "MultitaskFromSmilesDataModule" - # module_type: "FakeDataModule" # Option to use generated data - args: # Matches that in the test_multitask_datamodule.py case. - task_specific_args: # To be replaced by a new class "DatasetParams" - l1000_vcap: - df: null - df_path: graphium/data/neurips2023/large-dataset/LINCS_L1000_VCAP_0-4.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz - # or set path as the URL directly - smiles_col: "SMILES" - label_cols: geneID-* # geneID-* means all columns starting with "geneID-" - # sample_size: 2000 # use sample_size for test - task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` - epoch_sampling_fraction: 1.0 - l1000_mcf7: - df: null - df_path: graphium/data/neurips2023/large-dataset/LINCS_L1000_MCF7_0-4.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz - # or set path as the URL directly - smiles_col: "SMILES" - label_cols: geneID-* # geneID-* means all columns starting with "geneID-" - # sample_size: 2000 # use sample_size for test - task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` - epoch_sampling_fraction: 1.0 - - pcba_1328: - df: null - df_path: graphium/data/neurips2023/large-dataset/PCBA_1328_1564k.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet - # or set path as the URL directly - smiles_col: "SMILES" - label_cols: assayID-* # assayID-* means all columns starting with "assayID-" - # sample_size: 2000 # use sample_size for test - task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` - epoch_sampling_fraction: 1.0 - - pcqm4m_g25: - df: null - df_path: graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet - # or set path as the URL directly - smiles_col: "ordered_smiles" - label_cols: graph_* # graph_* means all columns starting with "graph_" - # sample_size: 2000 # use sample_size for test - task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` - label_normalization: - normalize_val_test: True - method: "normal" - epoch_sampling_fraction: 1.0 - - pcqm4m_n4: - df: null - df_path: graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet - # or set path as the URL directly - smiles_col: "ordered_smiles" - label_cols: node_* # node_* means all columns starting with "node_" - # sample_size: 2000 # use sample_size for test - task_level: node - splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` - seed: *seed - label_normalization: - normalize_val_test: True - method: "normal" - epoch_sampling_fraction: 1.0 - - # Featurization - prepare_dict_or_graph: pyg:graph - featurization_n_jobs: 30 - featurization_progress: True - featurization_backend: "loky" - processed_graph_data_path: "../datacache/neurips2023-large/" - featurization: - # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence), - # 'possible_number_radical_e', 'possible_is_aromatic', 'possible_is_in_ring', - # 'num_chiral_centers (not included yet)'] - atom_property_list_onehot: [atomic-number, group, period, total-valence] - atom_property_list_float: [degree, formal-charge, radical-electron, aromatic, in-ring] - # OGB: ['possible_bond_type', 'possible_bond_stereo', 'possible_is_in_ring'] - edge_property_list: [bond-type-onehot, stereo, in-ring] - add_self_loop: False - explicit_H: False # if H is included - use_bonds_weights: False - pos_encoding_as_features: # encoder dropout 0.18 - pos_types: - lap_eigvec: - pos_level: node - pos_type: laplacian_eigvec - num_pos: 8 - normalization: "none" # nomrlization already applied on the eigen vectors - disconnected_comp: True # if eigen values/vector for disconnected graph are included - lap_eigval: - pos_level: node - pos_type: laplacian_eigval - num_pos: 8 - normalization: "none" # nomrlization already applied on the eigen vectors - disconnected_comp: True # if eigen values/vector for disconnected graph are included - rw_pos: # use same name as pe_encoder - pos_level: node - pos_type: rw_return_probs - ksteps: 16 - - # cache_data_path: . - num_workers: 32 # -1 to use all - persistent_workers: True # if use persistent worker at the start of each epoch. - # Using persistent_workers false might make the start of each epoch very long. +defaults: + - base_config: large + - _self_ +constants: + name: neurips2023_large_data_gcn architecture: - model_type: FullGraphMultiTaskNetwork - mup_base_path: null - pre_nn: # Set as null to avoid a pre-nn network - out_dim: 64 - hidden_dims: 256 - depth: 2 - activation: relu - last_activation: none - dropout: &dropout 0.1 - normalization: &normalization layer_norm - last_normalization: *normalization - residual_type: none - - pre_nn_edges: null - - pe_encoders: - out_dim: 32 - pool: "sum" #"mean" "max" - last_norm: None #"batch_norm", "layer_norm" - encoders: #la_pos | rw_pos - la_pos: # Set as null to avoid a pre-nn network - encoder_type: "laplacian_pe" - input_keys: ["laplacian_eigvec", "laplacian_eigval"] - output_keys: ["feat"] - hidden_dim: 64 - out_dim: 32 - model_type: 'DeepSet' #'Transformer' or 'DeepSet' - num_layers: 2 - num_layers_post: 1 # Num. layers to apply after pooling - dropout: 0.1 - first_normalization: "none" #"batch_norm" or "layer_norm" - rw_pos: - encoder_type: "mlp" - input_keys: ["rw_return_probs"] - output_keys: ["feat"] - hidden_dim: 64 - out_dim: 32 - num_layers: 2 - dropout: 0.1 - normalization: "layer_norm" #"batch_norm" or "layer_norm" - first_normalization: "layer_norm" #"batch_norm" or "layer_norm" - - - gnn: # Set as null to avoid a post-nn network - in_dim: 64 # or otherwise the correct value - out_dim: &gnn_dim 768 - hidden_dims: *gnn_dim - depth: 4 - activation: gelu - last_activation: none - dropout: 0.1 - normalization: "layer_norm" - last_normalization: *normalization - residual_type: simple - virtual_node: 'none' - layer_type: 'pyg:gcn' #pyg:gine #'pyg:gps' # pyg:gated-gcn, pyg:gine,pyg:gps - - - - graph_output_nn: - graph: - pooling: [sum] - out_dim: *gnn_dim - hidden_dims: *gnn_dim - depth: 1 - activation: relu - last_activation: none - dropout: *dropout - normalization: *normalization - last_normalization: "none" - residual_type: none - node: - pooling: [sum] - out_dim: *gnn_dim - hidden_dims: *gnn_dim - depth: 1 - activation: relu - last_activation: none - dropout: *dropout - normalization: *normalization - last_normalization: "none" - residual_type: none - - task_heads: - l1000_vcap: - task_level: graph - out_dim: 4890 - hidden_dims: 128 - depth: 2 - activation: none - last_activation: none - dropout: *dropout - normalization: *normalization - last_normalization: "none" - residual_type: none - l1000_mcf7: - task_level: graph - out_dim: 4890 - hidden_dims: 128 - depth: 2 - activation: none - last_activation: none - dropout: *dropout - normalization: *normalization - last_normalization: "none" - residual_type: none - pcba_1328: - task_level: graph - out_dim: 1328 - hidden_dims: 64 - depth: 2 - activation: relu - last_activation: none - dropout: *dropout - normalization: *normalization - last_normalization: "none" - residual_type: none - pcqm4m_g25: - task_level: graph - out_dim: 25 - hidden_dims: 32 - depth: 2 - activation: relu - last_activation: none - dropout: *dropout - normalization: *normalization - last_normalization: "none" - residual_type: none - pcqm4m_n4: - task_level: node - out_dim: 4 - hidden_dims: 32 - depth: 2 - activation: relu - last_activation: none - dropout: *dropout - normalization: *normalization - last_normalization: "none" - residual_type: none - -#Task-specific -predictor: - metrics_on_progress_bar: - l1000_vcap: [] - l1000_mcf7: [] - pcba_1328: [] - pcqm4m_g25: [] - pcqm4m_n4: [] - metrics_on_training_set: - l1000_vcap: [] - l1000_mcf7: [] - pcba_1328: [] - pcqm4m_g25: [] - pcqm4m_n4: [] - loss_fun: - l1000_vcap: - name: hybrid_ce_ipu - n_brackets: 5 - alpha: 0.5 - l1000_mcf7: - name: hybrid_ce_ipu - n_brackets: 5 - alpha: 0.5 - pcba_1328: bce_logits_ipu - pcqm4m_g25: mae_ipu - pcqm4m_n4: mae_ipu - random_seed: *seed - optim_kwargs: - lr: 1.e-4 # warmup can be scheduled using torch_scheduler_kwargs - # weight_decay: 1.e-7 - torch_scheduler_kwargs: - module_type: WarmUpLinearLR - max_num_epochs: &max_epochs 100 - warmup_epochs: 10 - verbose: False - scheduler_kwargs: - # monitor: &monitor qm9/mae/train - # mode: min - # frequency: 1 - target_nan_mask: null # null: no mask, 0: 0 mask, ignore-flatten, ignore-mean-per-label - multitask_handling: flatten # flatten, mean-per-label - -# Task-specific -metrics: - l1000_vcap: &classif_metrics - - name: auroc - metric: auroc_ipu - num_classes: 5 - task: multiclass - multitask_handling: mean-per-label - threshold_kwargs: null - - name: avpr - metric: average_precision_ipu - num_classes: 5 - task: multiclass - target_to_int: True - target_nan_mask: -1000 - ignore_index: -1000 - multitask_handling: mean-per-label - threshold_kwargs: null - l1000_mcf7: *classif_metrics - pcba_1328: - - name: auroc - metric: auroc_ipu - task: binary - multitask_handling: mean-per-label - threshold_kwargs: null - - name: avpr - metric: average_precision_ipu - task: binary - multitask_handling: mean-per-label - threshold_kwargs: null - pcqm4m_g25: &pcqm_metrics - - name: mae - metric: mae_ipu - target_nan_mask: null - multitask_handling: mean-per-label - threshold_kwargs: null - - name: pearsonr - metric: pearsonr_ipu - threshold_kwargs: null - target_nan_mask: null - multitask_handling: mean-per-label - - name: r2 - metric: r2_score_ipu - threshold_kwargs: null - target_nan_mask: null - multitask_handling: mean-per-label - pcqm4m_n4: *pcqm_metrics - -trainer: - seed: *seed - logger: - save_dir: logs/neurips2023-large/ - name: *name - project: *name - model_checkpoint: - dirpath: models_checkpoints/neurips2023-large-gcn/ - filename: *name - # monitor: *monitor - # mode: *mode - # save_top_k: 1 - save_last: True - trainer: - max_epochs: *max_epochs - min_epochs: 1 - check_val_every_n_epoch: 20 + layer_type: 'pyg:gcn' #pyg:gine #'pyg:gps' # pyg:gated-gcn, pyg:gine,pyg:gps \ No newline at end of file diff --git a/expts/neurips2023_configs/config_large_gcn_gpu.yaml b/expts/neurips2023_configs/config_large_gcn_gpu.yaml index d0fa82a94..2830530aa 100644 --- a/expts/neurips2023_configs/config_large_gcn_gpu.yaml +++ b/expts/neurips2023_configs/config_large_gcn_gpu.yaml @@ -1,7 +1,15 @@ # Testing GCN on LargeMix with FP16/32 on GPU + +defaults: + - base_config: large + - _self_ + constants: - name: &name neurips2023_large_data_gcn_gpu - config_override: "expts/neurips2023_configs/config_large_gcn.yaml" + name: neurips2023_large_data_gcn_gpu + +architecture: + gnn: # Set as null to avoid a post-nn network + layer_type: 'pyg:gcn' #pyg:gine #'pyg:gps' # pyg:gated-gcn, pyg:gine,pyg:gps accelerator: type: gpu diff --git a/expts/neurips2023_configs/config_large_gin.yaml b/expts/neurips2023_configs/config_large_gin.yaml index eb2a612d2..c7d37c58c 100644 --- a/expts/neurips2023_configs/config_large_gin.yaml +++ b/expts/neurips2023_configs/config_large_gin.yaml @@ -1,26 +1,11 @@ # Running the gin model with the largemix dataset on IPU. +defaults: + - base_config: large + - _self_ + constants: - name: &name neurips2023_large_data_gin - config_override: "expts/neurips2023_configs/config_large_gcn.yaml" + name: neurips2023_large_data_gin architecture: gnn: # Set as null to avoid a post-nn network - out_dim: &gnn_dim 704 - layer_type: 'pyg:gin' #pyg:gine #'pyg:gps' # pyg:gated-gcn, pyg:gine,pyg:gps - hidden_dims: *gnn_dim - - graph_output_nn: - graph: - out_dim: *gnn_dim - hidden_dims: *gnn_dim - node: - out_dim: *gnn_dim - hidden_dims: *gnn_dim - -trainer: - logger: - name: *name - project: *name - model_checkpoint: - dirpath: models_checkpoints/neurips2023-large-gin/ - filename: *name + layer_type: 'pyg:gin' \ No newline at end of file diff --git a/expts/neurips2023_configs/config_large_gine.yaml b/expts/neurips2023_configs/config_large_gine.yaml index 18915d5bb..2278f8422 100644 --- a/expts/neurips2023_configs/config_large_gine.yaml +++ b/expts/neurips2023_configs/config_large_gine.yaml @@ -1,7 +1,11 @@ # Running the gine model with the largemix dataset on IPU. + +defaults: + - base_config: large + - _self_ + constants: - name: &name neurips2023_large_data_gine - config_override: "expts/neurips2023_configs/config_large_gcn.yaml" + name: neurips2023_large_data_gine architecture: pre_nn_edges: # Set as null to avoid a pre-nn network @@ -10,15 +14,15 @@ architecture: depth: 2 activation: relu last_activation: none - dropout: 0.1 - normalization: &normalization layer_norm - last_normalization: *normalization + dropout: ${architecture.pre_nn.dropout} + normalization: ${architecture.pre_nn.normalization} + last_normalization: ${architecture.pre_nn.normalization} residual_type: none - gnn: # Set as null to avoid a post-nn network + gnn: out_dim: &gnn_dim 704 hidden_dims: *gnn_dim - layer_type: 'pyg:gine' #pyg:gine #'pyg:gps' # pyg:gated-gcn, pyg:gine,pyg:gps + layer_type: 'pyg:gine' graph_output_nn: graph: @@ -26,12 +30,4 @@ architecture: hidden_dims: *gnn_dim node: out_dim: *gnn_dim - hidden_dims: *gnn_dim - -trainer: - logger: - name: *name - project: *name - model_checkpoint: - dirpath: models_checkpoints/neurips2023-large-gine/ - filename: *name + hidden_dims: *gnn_dim \ No newline at end of file diff --git a/expts/neurips2023_configs/config_large_mpnn.yaml b/expts/neurips2023_configs/config_large_mpnn.yaml index 213c75e97..b56f8dd2d 100644 --- a/expts/neurips2023_configs/config_large_mpnn.yaml +++ b/expts/neurips2023_configs/config_large_mpnn.yaml @@ -1,12 +1,12 @@ # Testing the mpnn only model with the PCQMv2 dataset on IPU. + +defaults: + - base_config: large + constants: - name: &name neurips2023_large_data_mpnn - config_override: "expts/neurips2023_configs/config_large_gcn.yaml" + name: neurips2023_large_data_mpnn architecture: - model_type: FullGraphMultiTaskNetwork - mup_base_path: null - pre_nn_edges: # Set as null to avoid a pre-nn network out_dim: 32 hidden_dims: 128 @@ -14,32 +14,14 @@ architecture: activation: relu last_activation: none dropout: 0.18 - normalization: layer_norm - last_normalization: layer_norm + normalization: ${architecture.pre_nn.normalization} + last_normalization: ${architecture.pre_nn.normalization} residual_type: none gnn: # Set as null to avoid a post-nn network out_dim: &gnn_dim 64 hidden_dims: *gnn_dim - layer_type: 'pyg:mpnnplus' #pyg:gine #'pyg:gps' # pyg:gated-gcn, pyg:gine,pyg:gps + layer_type: 'pyg:gps' #pyg:gine #'pyg:gps' # pyg:gated-gcn, pyg:gine,pyg:gps layer_kwargs: # Parameters for the model itself. You could define dropout_attn: 0.1 - node_residual: false mpnn_type: 'pyg:mpnnplus' - in_dim_edges: 32 out_dim_edges: 32 - - graph_output_nn: - graph: - out_dim: *gnn_dim - hidden_dims: *gnn_dim - node: - out_dim: *gnn_dim - hidden_dims: *gnn_dim - -trainer: - logger: - name: *name - project: *name - model_checkpoint: - dirpath: models_checkpoints/neurips2023-large-mpnn/ - filename: *name diff --git a/expts/neurips2023_configs/config_small_gated_gcn.yaml b/expts/neurips2023_configs/config_small_gated_gcn.yaml index db3d08ba4..8e00d26f6 100644 --- a/expts/neurips2023_configs/config_small_gated_gcn.yaml +++ b/expts/neurips2023_configs/config_small_gated_gcn.yaml @@ -1,7 +1,11 @@ # Testing the gated_gcn model with the PCQMv2 dataset on IPU. + +defaults: + - base_config: small + - _self_ + constants: - name: &name neurips2023_small_data_gated_gcn - config_override: "expts/neurips2023_configs/config_small_gcn.yaml" + name: neurips2023_small_data_gated_gcn architecture: pre_nn_edges: # Set as null to avoid a pre-nn network @@ -10,19 +14,10 @@ architecture: depth: 2 activation: relu last_activation: none - dropout: 0.18 - normalization: layer_norm - last_normalization: layer_norm + dropout: ${architecture.pre_nn.dropout} + normalization: ${architecture.pre_nn.normalization} + last_normalization: ${architecture.pre_nn.normalization} residual_type: none - gnn: # Set as null to avoid a post-nn network layer_type: 'pyg:gated-gcn' #pyg:gine #'pyg:gps' # pyg:gated-gcn, pyg:gine,pyg:gps - -trainer: - logger: - name: *name - project: *name - model_checkpoint: - dirpath: models_checkpoints/neurips2023-small-gated-gcn/ - filename: *name diff --git a/expts/neurips2023_configs/config_small_gcn.yaml b/expts/neurips2023_configs/config_small_gcn.yaml index fffffbefd..114ce26dc 100644 --- a/expts/neurips2023_configs/config_small_gcn.yaml +++ b/expts/neurips2023_configs/config_small_gcn.yaml @@ -1,345 +1,12 @@ -# Testing the gcn model with the PCQMv2 dataset on IPU. -constants: - name: &name neurips2023_small_data_gcn - seed: &seed 42 - raise_train_error: true # Whether the code should raise an error if it crashes during training - entity: multitask-gnn - -accelerator: - type: ipu # cpu or ipu or gpu - config_override: - datamodule: - args: - ipu_dataloader_training_opts: - mode: async - max_num_nodes_per_graph: 44 # train max nodes: 20, max_edges: 54 - max_num_edges_per_graph: 80 - ipu_dataloader_inference_opts: - mode: async - max_num_nodes_per_graph: 44 # valid max nodes: 51, max_edges: 118 - max_num_edges_per_graph: 80 - # Data handling-related - batch_size_training: 50 - batch_size_inference: 50 - predictor: - optim_kwargs: - loss_scaling: 1024 - trainer: - trainer: - precision: 16 - accumulate_grad_batches: 4 - - ipu_config: - - deviceIterations(5) # IPU would require large batches to be ready for the model. - - replicationFactor(1) - # - enableProfiling("graph_analyser") # The folder where the profile will be stored - # - enableExecutableCaching("pop_compiler_cache") - - TensorLocations.numIOTiles(128) - - _Popart.set("defaultBufferingDepth", 128) - - Precision.enableStochasticRounding(True) - -# accelerator: -# type: cpu # cpu or ipu or gpu -# config_override: -# datamodule: -# batch_size_training: 64 -# batch_size_inference: 256 -# trainer: -# trainer: -# precision: 32 -# accumulate_grad_batches: 1 - -datamodule: - module_type: "MultitaskFromSmilesDataModule" - # module_type: "FakeDataModule" # Option to use generated data - args: # Matches that in the test_multitask_datamodule.py case. - task_specific_args: # To be replaced by a new class "DatasetParams" - qm9: - df: null - df_path: data/neurips2023/small-dataset/qm9.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/qm9.csv.gz - # or set path as the URL directly - smiles_col: "smiles" - label_cols: ["A", "B", "C", "mu", "alpha", "homo", "lumo", "gap", "r2", "zpve", "u0", "u298", "h298", "g298", "cv", "u0_atom", "u298_atom", "h298_atom", "g298_atom"] - # sample_size: 2000 # use sample_size for test - splits_path: data/neurips2023/small-dataset/qm9_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/qm9_random_splits.pt` - seed: *seed - task_level: graph - label_normalization: - normalize_val_test: True - method: "normal" +# Testing the gcn model with the toymix dataset on IPU. - tox21: - df: null - df_path: data/neurips2023/small-dataset/Tox21-7k-12-labels.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/Tox21-7k-12-labels.csv.gz - # or set path as the URL directly - smiles_col: "smiles" - label_cols: ["NR-AR", "NR-AR-LBD", "NR-AhR", "NR-Aromatase", "NR-ER", "NR-ER-LBD", "NR-PPAR-gamma", "SR-ARE", "SR-ATAD5", "SR-HSE", "SR-MMP", "SR-p53"] - # sample_size: 2000 # use sample_size for test - splits_path: data/neurips2023/small-dataset/Tox21_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/Tox21_random_splits.pt` - seed: *seed - task_level: graph - - zinc: - df: null - df_path: data/neurips2023/small-dataset/ZINC12k.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/ZINC12k.csv.gz - # or set path as the URL directly - smiles_col: "smiles" - label_cols: ["SA", "logp", "score"] - # sample_size: 2000 # use sample_size for test - splits_path: data/neurips2023/small-dataset/ZINC12k_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/ZINC12k_random_splits.pt` - seed: *seed - task_level: graph - label_normalization: - normalize_val_test: True - method: "normal" - - # Featurization - prepare_dict_or_graph: pyg:graph - featurization_n_jobs: 30 - featurization_progress: True - featurization_backend: "loky" - processed_graph_data_path: "../datacache/neurips2023-small/" - featurization: - # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence), - # 'possible_number_radical_e', 'possible_is_aromatic', 'possible_is_in_ring', - # 'num_chiral_centers (not included yet)'] - atom_property_list_onehot: [atomic-number, group, period, total-valence] - atom_property_list_float: [degree, formal-charge, radical-electron, aromatic, in-ring] - # OGB: ['possible_bond_type', 'possible_bond_stereo', 'possible_is_in_ring'] - edge_property_list: [bond-type-onehot, stereo, in-ring] - add_self_loop: False - explicit_H: False # if H is included - use_bonds_weights: False - pos_encoding_as_features: # encoder dropout 0.18 - pos_types: - lap_eigvec: - pos_level: node - pos_type: laplacian_eigvec - num_pos: 8 - normalization: "none" # nomrlization already applied on the eigen vectors - disconnected_comp: True # if eigen values/vector for disconnected graph are included - lap_eigval: - pos_level: node - pos_type: laplacian_eigval - num_pos: 8 - normalization: "none" # nomrlization already applied on the eigen vectors - disconnected_comp: True # if eigen values/vector for disconnected graph are included - rw_pos: # use same name as pe_encoder - pos_level: node - pos_type: rw_return_probs - ksteps: 16 - - # cache_data_path: . - num_workers: 30 # -1 to use all - persistent_workers: False # if use persistent worker at the start of each epoch. - # Using persistent_workers false might make the start of each epoch very long. - featurization_backend: "loky" +defaults: + - base_config: small + - _self_ +constants: + name: neurips2023_small_data_gcn architecture: - model_type: FullGraphMultiTaskNetwork - mup_base_path: null - pre_nn: # Set as null to avoid a pre-nn network - out_dim: 64 - hidden_dims: 256 - depth: 2 - activation: relu - last_activation: none - dropout: &dropout 0.18 - normalization: &normalization layer_norm - last_normalization: *normalization - residual_type: none - - pre_nn_edges: null # Set as null to avoid a pre-nn network - - pe_encoders: - out_dim: 32 - pool: "sum" #"mean" "max" - last_norm: None #"batch_norm", "layer_norm" - encoders: #la_pos | rw_pos - la_pos: # Set as null to avoid a pre-nn network - encoder_type: "laplacian_pe" - input_keys: ["laplacian_eigvec", "laplacian_eigval"] - output_keys: ["feat"] - hidden_dim: 64 - out_dim: 32 - model_type: 'DeepSet' #'Transformer' or 'DeepSet' - num_layers: 2 - num_layers_post: 1 # Num. layers to apply after pooling - dropout: 0.1 - first_normalization: "none" #"batch_norm" or "layer_norm" - rw_pos: - encoder_type: "mlp" - input_keys: ["rw_return_probs"] - output_keys: ["feat"] - hidden_dim: 64 - out_dim: 32 - num_layers: 2 - dropout: 0.1 - normalization: "layer_norm" #"batch_norm" or "layer_norm" - first_normalization: "layer_norm" #"batch_norm" or "layer_norm" - - - gnn: # Set as null to avoid a post-nn network - in_dim: 64 # or otherwise the correct value - out_dim: &gnn_dim 96 - hidden_dims: *gnn_dim - depth: 4 - activation: gelu - last_activation: none - dropout: 0.1 - normalization: "layer_norm" - last_normalization: *normalization - residual_type: simple - virtual_node: 'none' layer_type: 'pyg:gcn' #pyg:gine #'pyg:gps' # pyg:gated-gcn, pyg:gine,pyg:gps - layer_kwargs: null # Parameters for the model itself. You could define dropout_attn: 0.1 - - - graph_output_nn: - graph: - pooling: [sum] - out_dim: *gnn_dim - hidden_dims: *gnn_dim - depth: 1 - activation: relu - last_activation: none - dropout: *dropout - normalization: *normalization - last_normalization: "none" - residual_type: none - - task_heads: - qm9: - task_level: graph - out_dim: 19 - hidden_dims: 128 - depth: 2 - activation: relu - last_activation: none - dropout: *dropout - normalization: *normalization - last_normalization: "none" - residual_type: none - tox21: - task_level: graph - out_dim: 12 - hidden_dims: 64 - depth: 2 - activation: relu - last_activation: sigmoid - dropout: *dropout - normalization: *normalization - last_normalization: "none" - residual_type: none - zinc: - task_level: graph - out_dim: 3 - hidden_dims: 32 - depth: 2 - activation: relu - last_activation: none - dropout: *dropout - normalization: *normalization - last_normalization: "none" - residual_type: none - -#Task-specific -predictor: - metrics_on_progress_bar: - qm9: ["mae"] - tox21: ["auroc"] - zinc: ["mae"] - loss_fun: - qm9: mae_ipu - tox21: bce_ipu - zinc: mae_ipu - random_seed: *seed - optim_kwargs: - lr: 4.e-5 # warmup can be scheduled using torch_scheduler_kwargs - # weight_decay: 1.e-7 - torch_scheduler_kwargs: - module_type: WarmUpLinearLR - max_num_epochs: &max_epochs 100 - warmup_epochs: 10 - verbose: False - scheduler_kwargs: - # monitor: &monitor qm9/mae/train - # mode: min - # frequency: 1 - target_nan_mask: null # null: no mask, 0: 0 mask, ignore-flatten, ignore-mean-per-label - multitask_handling: flatten # flatten, mean-per-label - -# Task-specific -metrics: - qm9: &qm9_metrics - - name: mae - metric: mae_ipu - target_nan_mask: null - multitask_handling: flatten - threshold_kwargs: null - - name: pearsonr - metric: pearsonr_ipu - threshold_kwargs: null - target_nan_mask: null - multitask_handling: mean-per-label - - name: r2_score - metric: r2_score_ipu - target_nan_mask: null - multitask_handling: mean-per-label - threshold_kwargs: null - tox21: - - name: auroc - metric: auroc_ipu - task: binary - multitask_handling: mean-per-label - threshold_kwargs: null - - name: avpr - metric: average_precision_ipu - task: binary - multitask_handling: mean-per-label - threshold_kwargs: null - - name: f1 > 0.5 - metric: f1 - multitask_handling: mean-per-label - target_to_int: True - num_classes: 2 - average: micro - threshold_kwargs: &threshold_05 - operator: greater - threshold: 0.5 - th_on_preds: True - th_on_target: True - - name: precision > 0.5 - metric: precision - multitask_handling: mean-per-label - average: micro - threshold_kwargs: *threshold_05 - zinc: *qm9_metrics - -trainer: - seed: *seed - logger: - save_dir: logs/neurips2023-small/ - name: *name - project: *name - #early_stopping: - # monitor: *monitor - # min_delta: 0 - # patience: 10 - # mode: &mode min - model_checkpoint: - dirpath: models_checkpoints/neurips2023-small-gcn/ - filename: *name - # monitor: *monitor - # mode: *mode - # save_top_k: 1 - save_last: True - trainer: - max_epochs: *max_epochs - min_epochs: 1 - check_val_every_n_epoch: 20 diff --git a/expts/neurips2023_configs/config_small_gcn_gpu.yaml b/expts/neurips2023_configs/config_small_gcn_gpu.yaml index bf2a03a3b..8b5a46e26 100644 --- a/expts/neurips2023_configs/config_small_gcn_gpu.yaml +++ b/expts/neurips2023_configs/config_small_gcn_gpu.yaml @@ -1,7 +1,15 @@ # Testing GCN on ToyMix with FP16/32 on GPU + +defaults: + - base_config: small + - _self_ + constants: - name: &name neurips2023_small_data_gcn_gpu - config_override: "expts/neurips2023_configs/config_small_gcn.yaml" + name: neurips2023_small_data_gcn_gpu + +architecture: + gnn: # Set as null to avoid a post-nn network + layer_type: 'pyg:gcn' #pyg:gine #'pyg:gps' # pyg:gated-gcn, pyg:gine,pyg:gps accelerator: type: gpu # cpu or ipu or gpu diff --git a/expts/neurips2023_configs/config_small_gin.yaml b/expts/neurips2023_configs/config_small_gin.yaml index a22a4e6a6..e018f722a 100644 --- a/expts/neurips2023_configs/config_small_gin.yaml +++ b/expts/neurips2023_configs/config_small_gin.yaml @@ -1,16 +1,12 @@ # Testing the gin model with the PCQMv2 dataset on IPU. + +defaults: + - base_config: small + - _self_ + constants: - name: &name neurips2023_small_data_gin - config_override: "expts/neurips2023_configs/config_small_gcn.yaml" + name: neurips2023_small_data_gin architecture: gnn: # Set as null to avoid a post-nn network - layer_type: 'pyg:gin' #pyg:gine #'pyg:gps' # pyg:gated-gcn, pyg:gine,pyg:gps - -trainer: - logger: - name: *name - project: *name - model_checkpoint: - dirpath: models_checkpoints/neurips2023-small-gin/ - filename: *name + layer_type: 'pyg:gin' diff --git a/expts/neurips2023_configs/config_small_gine.yaml b/expts/neurips2023_configs/config_small_gine.yaml index d25f89a72..111bebbc2 100644 --- a/expts/neurips2023_configs/config_small_gine.yaml +++ b/expts/neurips2023_configs/config_small_gine.yaml @@ -1,7 +1,11 @@ # Testing the gine model with the PCQMv2 dataset on IPU. + +defaults: + - base_config: small + - _self_ + constants: - name: &name neurips2023_small_data_gine - config_override: "expts/neurips2023_configs/config_small_gcn.yaml" + name: neurips2023_small_data_gine architecture: pre_nn_edges: # Set as null to avoid a pre-nn network @@ -10,18 +14,10 @@ architecture: depth: 2 activation: relu last_activation: none - dropout: 0.1 - normalization: &normalization layer_norm - last_normalization: *normalization + dropout: ${architecture.pre_nn.dropout} + normalization: ${architecture.pre_nn.normalization} + last_normalization: ${architecture.pre_nn.normalization} residual_type: none gnn: # Set as null to avoid a post-nn network - layer_type: 'pyg:gine' #pyg:gine #'pyg:gps' # pyg:gated-gcn, pyg:gine,pyg:gps - -trainer: - logger: - name: *name - project: *name - model_checkpoint: - dirpath: models_checkpoints/neurips2023-small-gine/ - filename: *name + layer_type: 'pyg:gine' #pyg:gine #'pyg:gps' # pyg:gated-gcn, pyg:gine,pyg:gps \ No newline at end of file diff --git a/expts/neurips2023_configs/config_small_mpnn.yaml b/expts/neurips2023_configs/config_small_mpnn.yaml index d7c862359..357a8f560 100644 --- a/expts/neurips2023_configs/config_small_mpnn.yaml +++ b/expts/neurips2023_configs/config_small_mpnn.yaml @@ -1,7 +1,10 @@ # Testing the mpnn only model with the PCQMv2 dataset on IPU. + +defaults: + - base_config: small + constants: - name: &name neurips2023_small_data_mpnn - config_override: "expts/neurips2023_configs/config_small_gcn.yaml" + name: neurips2023_small_data_mpnn architecture: pre_nn_edges: # Set as null to avoid a pre-nn network @@ -10,33 +13,15 @@ architecture: depth: 2 activation: relu last_activation: none - dropout: 0.18 - normalization: layer_norm - last_normalization: layer_norm + dropout: ${architecture.pre_nn.dropout} + normalization: ${architecture.pre_nn.normalization} + last_normalization: ${architecture.pre_nn.normalization} residual_type: none gnn: # Set as null to avoid a post-nn network out_dim: &gnn_dim 64 hidden_dims: *gnn_dim - depth: 4 - activation: gelu - last_activation: none - dropout: 0.1 - normalization: "layer_norm" - last_normalization: *normalization - residual_type: simple - virtual_node: 'none' - layer_type: 'pyg:mpnnplus' #pyg:gine #'pyg:gps' # pyg:gated-gcn, pyg:gine,pyg:gps + layer_type: 'pyg:gps' #pyg:gine #'pyg:gps' # pyg:gated-gcn, pyg:gine,pyg:gps layer_kwargs: # Parameters for the model itself. You could define dropout_attn: 0.1 - in_dim: *gnn_dim - out_dim: *gnn_dim - in_dim_edges: 32 + mpnn_type: 'pyg:mpnnplus' out_dim_edges: 32 - -trainer: - logger: - name: *name - project: *name - model_checkpoint: - dirpath: models_checkpoints/neurips2023-small-mpnn/ - filename: *name diff --git a/expts/neurips2023_configs/debug/config_debug.yaml b/expts/neurips2023_configs/debug/config_debug.yaml index 5e078d7c4..a323427e5 100644 --- a/expts/neurips2023_configs/debug/config_debug.yaml +++ b/expts/neurips2023_configs/debug/config_debug.yaml @@ -3,6 +3,7 @@ constants: name: &name neurips2023_small_data_mpnn seed: &seed 999 raise_train_error: true # Whether the code should raise an error if it crashes during training + entity: multitask-gnn # accelerator: # type: ipu # cpu or ipu or gpu @@ -108,7 +109,6 @@ datamodule: num_workers: 0 # -1 to use all persistent_workers: False # if use persistent worker at the start of each epoch. # Using persistent_workers false might make the start of each epoch very long. - featurization_backend: "loky" architecture: diff --git a/expts/neurips2023_configs/debug/config_small_gcn_debug.yaml b/expts/neurips2023_configs/debug/config_small_gcn_debug.yaml index 1b4171a43..717ae0675 100644 --- a/expts/neurips2023_configs/debug/config_small_gcn_debug.yaml +++ b/expts/neurips2023_configs/debug/config_small_gcn_debug.yaml @@ -3,6 +3,7 @@ constants: name: &name neurips2023_small_data_gcn seed: &seed 42 raise_train_error: true # Whether the code should raise an error if it crashes during training + entity: multitask-gnn accelerator: type: ipu # cpu or ipu or gpu @@ -122,7 +123,6 @@ datamodule: num_workers: 30 # -1 to use all persistent_workers: False # if use persistent worker at the start of each epoch. # Using persistent_workers false might make the start of each epoch very long. - featurization_backend: "loky" architecture: