Skip to content

Commit

Permalink
Merge branch 'graphium_3.0' into torchmetrics
Browse files Browse the repository at this point in the history
  • Loading branch information
DomInvivo authored Jul 13, 2024
2 parents 10a1017 + 7f933b7 commit 8aa0f2b
Show file tree
Hide file tree
Showing 107 changed files with 7,079 additions and 4,926 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10"]
python-version: ["3.09", "3.10", "3.11"]
pytorch-version: ["2.0"]

runs-on: "ubuntu-latest"
Expand Down Expand Up @@ -52,6 +52,9 @@ jobs:
- name: Install test dependencies
run: micromamba install -c conda-forge pytdc # Required to run the `test_finetuning.py`

- name: Install C++ library
run: cd graphium/graphium_cpp && git clone https://github.com/pybind/pybind11.git && export PYTHONPATH=$PYTHONPATH:./pybind11 && python -m pip install . && cd ../..

- name: Run tests
run: pytest -m 'not ipu'

Expand Down
1 change: 1 addition & 0 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@
Copyright 2023 Valence Labs
Copyright 2023 Recursion Pharmaceuticals
Copyright 2023 Graphcore Limited
Copyright 2024 NVIDIA CORPORATION & AFFILIATES

Various Academic groups have also contributed to this software under
the given license. These include, but are not limited, to the following
Expand Down
29 changes: 0 additions & 29 deletions docs/api/graphium.features.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,37 +5,8 @@ Feature extraction and manipulation
=== "Contents"

* [Featurizer](#featurizer)
* [Positional Encoding](#positional-encoding)
* [Properties](#properties)
* [Spectral PE](#spectral-pe)
* [Random Walk PE](#random-walk-pe)
* [NMP](#nmp)

## Featurizer
------------
::: graphium.features.featurizer


## Positional Encoding
------------
::: graphium.features.positional_encoding


## Properties
------------
::: graphium.features.properties


## Spectral PE
------------
::: graphium.features.spectral


## Random Walk PE
------------
::: graphium.features.rw


## NMP
------------
::: graphium.features.nmp
3 changes: 2 additions & 1 deletion env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ dependencies:
- gcsfs >=2021.6

# ML packages
- cuda-version # works also with CPU-only system.
- cuda-version == 11.2 # works also with CPU-only system.
- pytorch >=1.12
- lightning >=2.0
- torchmetrics
Expand All @@ -43,6 +43,7 @@ dependencies:
# chemistry
- rdkit
- datamol >=0.10
- boost # needed by rdkit

# Optional deps
- sympy
Expand Down
6 changes: 0 additions & 6 deletions expts/configs/config_gps_10M_pcqm4m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ accelerator:

datamodule:
module_type: "MultitaskFromSmilesDataModule"
# module_type: "FakeDataModule" # Option to use generated data
args: # Matches that in the test_multitask_datamodule.py case.
task_specific_args: # To be replaced by a new class "DatasetParams"
homolumo:
Expand All @@ -76,10 +75,6 @@ datamodule:
split_test: 0.1

# Featurization
prepare_dict_or_graph: pyg:graph
featurization_n_jobs: 30
featurization_progress: True
featurization_backend: "loky"
featurization:
# OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),
# 'possible_number_radical_e', 'possible_is_aromatic', 'possible_is_in_ring',
Expand Down Expand Up @@ -115,7 +110,6 @@ datamodule:
num_workers: 0 # -1 to use all
persistent_workers: False # if use persistent worker at the start of each epoch.
# Using persistent_workers false might make the start of each epoch very long.
featurization_backend: "loky"


architecture:
Expand Down
6 changes: 0 additions & 6 deletions expts/configs/config_gps_10M_pcqm4m_mod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ constants:

datamodule:
module_type: "MultitaskFromSmilesDataModule"
# module_type: "FakeDataModule" # Option to use generated data
args: # Matches that in the test_multitask_datamodule.py case.
task_specific_args: # To be replaced by a new class "DatasetParams"
homolumo:
Expand All @@ -25,10 +24,6 @@ datamodule:
split_test: 0.1

# Featurization
prepare_dict_or_graph: pyg:graph
featurization_n_jobs: 30
featurization_progress: True
featurization_backend: "loky"
featurization:
# OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),
# 'possible_number_radical_e', 'possible_is_aromatic', 'possible_is_in_ring',
Expand Down Expand Up @@ -84,7 +79,6 @@ datamodule:
num_workers: 0 # -1 to use all
persistent_workers: False # if use persistent worker at the start of each epoch.
# Using persistent_workers false might make the start of each epoch very long.
featurization_backend: "loky"

# ipu_dataloader_training_opts:
# mode: async
Expand Down
7 changes: 0 additions & 7 deletions expts/configs/config_mpnn_10M_b3lyp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ accelerator:

datamodule:
module_type: "MultitaskFromSmilesDataModule"
# module_type: "FakeDataModule" # Option to use generated data
args: # Matches that in the test_multitask_datamodule.py case.
task_specific_args: # To be replaced by a new class "DatasetParams"
betagap:
Expand Down Expand Up @@ -88,12 +87,7 @@ datamodule:
split_test: 0.1

# Featurization
prepare_dict_or_graph: pyg:graph
featurization_n_jobs: 30
featurization_progress: True
featurization_backend: "loky"
processed_graph_data_path: "../datacache/b3lyp/"
dataloading_from: ram
featurization:
# OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),
# 'possible_number_radical_e', 'possible_is_aromatic', 'possible_is_in_ring',
Expand Down Expand Up @@ -127,7 +121,6 @@ datamodule:
num_workers: 0 # -1 to use all
persistent_workers: False # if use persistent worker at the start of each epoch.
# Using persistent_workers false might make the start of each epoch very long.
featurization_backend: "loky"


architecture:
Expand Down
7 changes: 0 additions & 7 deletions expts/configs/config_mpnn_pcqm4m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ constants:

datamodule:
module_type: "MultitaskFromSmilesDataModule"
# module_type: "FakeDataModule" # Option to use generated data
args: # Matches that in the test_multitask_datamodule.py case.
task_specific_args: # To be replaced by a new class "DatasetParams"
homolumo:
Expand All @@ -26,12 +25,7 @@ datamodule:
split_names: ["train", "valid", "test-dev"]

# Featurization
prepare_dict_or_graph: pyg:graph
featurization_n_jobs: 20
featurization_progress: True
featurization_backend: "loky"
processed_graph_data_path: "graphium/data/PCQM4Mv2/"
dataloading_from: ram
featurization:
# OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),
# 'possible_number_radical_e', 'possible_is_aromatic', 'possible_is_in_ring',
Expand Down Expand Up @@ -61,7 +55,6 @@ datamodule:
num_workers: 40 # -1 to use all
persistent_workers: False # if use persistent worker at the start of each epoch.
# Using persistent_workers false might make the start of each epoch very long.
featurization_backend: "loky"

# ipu_dataloader_training_opts:
# mode: async
Expand Down
5 changes: 0 additions & 5 deletions expts/hydra-configs/architecture/largemix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,7 @@ architecture:
datamodule:
module_type: "MultitaskFromSmilesDataModule"
args:
prepare_dict_or_graph: pyg:graph
featurization_n_jobs: 20
featurization_progress: True
featurization_backend: "loky"
processed_graph_data_path: ${constants.datacache_path}
dataloading_from: "disk"
num_workers: 20 # -1 to use all
persistent_workers: True
featurization:
Expand Down
5 changes: 0 additions & 5 deletions expts/hydra-configs/architecture/pcqm4m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,8 @@ architecture:

datamodule:
module_type: "MultitaskFromSmilesDataModule"
# module_type: "FakeDataModule" # Option to use generated data
args: # Matches that in the test_multitask_datamodule.py case.
# Featurization
prepare_dict_or_graph: pyg:graph
featurization_n_jobs: 30
featurization_progress: True
featurization_backend: "loky"
processed_graph_data_path: ${constants.datacache_path}
num_workers: 40 # -1 to use all
persistent_workers: False # if use persistent worker at the start of each epoch.
Expand Down
5 changes: 0 additions & 5 deletions expts/hydra-configs/architecture/toymix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,7 @@ architecture:
datamodule:
module_type: "MultitaskFromSmilesDataModule"
args:
prepare_dict_or_graph: pyg:graph
featurization_n_jobs: 30
featurization_progress: True
featurization_backend: "loky"
processed_graph_data_path: ${constants.datacache_path}
dataloading_from: ram
num_workers: 30 # -1 to use all
persistent_workers: False
featurization:
Expand Down
1 change: 0 additions & 1 deletion expts/hydra-configs/finetuning/admet_baseline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ constants:
datamodule:
args:
batch_size_training: 32
dataloading_from: ram
persistent_workers: true
num_workers: 4

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ metrics:

datamodule:
module_type: "MultitaskFromSmilesDataModule"
# module_type: "FakeDataModule" # Option to use generated data
args: # Matches that in the test_multitask_datamodule.py case.
task_specific_args: # To be replaced by a new class "DatasetParams"
homolumo:
Expand Down
1 change: 0 additions & 1 deletion expts/hydra-configs/training/accelerator/largemix_cpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ datamodule:
args:
batch_size_training: 200
batch_size_inference: 200
featurization_n_jobs: 20
num_workers: 20

predictor:
Expand Down
1 change: 0 additions & 1 deletion expts/hydra-configs/training/accelerator/largemix_gpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ datamodule:
args:
batch_size_training: 2048
batch_size_inference: 2048
featurization_n_jobs: 6
num_workers: 6

predictor:
Expand Down
1 change: 0 additions & 1 deletion expts/hydra-configs/training/accelerator/toymix_cpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ datamodule:
args:
batch_size_training: 200
batch_size_inference: 200
featurization_n_jobs: 4
num_workers: 4

predictor:
Expand Down
1 change: 0 additions & 1 deletion expts/hydra-configs/training/accelerator/toymix_gpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ datamodule:
args:
batch_size_training: 200
batch_size_inference: 200
featurization_n_jobs: 4
num_workers: 4

predictor:
Expand Down
6 changes: 0 additions & 6 deletions expts/neurips2023_configs/base_config/large.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ accelerator:

datamodule:
module_type: "MultitaskFromSmilesDataModule"
# module_type: "FakeDataModule" # Option to use generated data
args: # Matches that in the test_multitask_datamodule.py case.
task_specific_args: # To be replaced by a new class "DatasetParams"
l1000_vcap:
Expand Down Expand Up @@ -133,11 +132,6 @@ datamodule:
epoch_sampling_fraction: 1.0

# Featurization
prepare_dict_or_graph: pyg:graph
featurization_n_jobs: 30
featurization_progress: True
featurization_backend: "loky"
dataloading_from: disk
processed_graph_data_path: ${constants.datacache_path}
featurization:
# OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),
Expand Down
6 changes: 0 additions & 6 deletions expts/neurips2023_configs/base_config/large_pcba.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ accelerator:

datamodule:
module_type: "MultitaskFromSmilesDataModule"
# module_type: "FakeDataModule" # Option to use generated data
args: # Matches that in the test_multitask_datamodule.py case.
task_specific_args: # To be replaced by a new class "DatasetParams"

Expand Down Expand Up @@ -132,11 +131,6 @@ datamodule:
#epoch_sampling_fraction: 1.0

# Featurization
prepare_dict_or_graph: pyg:graph
featurization_n_jobs: 30
featurization_progress: True
featurization_backend: "loky"
dataloading_from: disk
processed_graph_data_path: ${constants.datacache_path}
featurization:
# OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),
Expand Down
6 changes: 0 additions & 6 deletions expts/neurips2023_configs/base_config/large_pcqm_g25.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ accelerator:

datamodule:
module_type: "MultitaskFromSmilesDataModule"
# module_type: "FakeDataModule" # Option to use generated data
args: # Matches that in the test_multitask_datamodule.py case.
task_specific_args: # To be replaced by a new class "DatasetParams"

Expand Down Expand Up @@ -132,11 +131,6 @@ datamodule:
# epoch_sampling_fraction: 1.0

# Featurization
prepare_dict_or_graph: pyg:graph
featurization_n_jobs: 30
featurization_progress: True
featurization_backend: "loky"
dataloading_from: disk
processed_graph_data_path: ${constants.datacache_path}
featurization:
# OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),
Expand Down
6 changes: 0 additions & 6 deletions expts/neurips2023_configs/base_config/large_pcqm_n4.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ accelerator:

datamodule:
module_type: "MultitaskFromSmilesDataModule"
# module_type: "FakeDataModule" # Option to use generated data
args: # Matches that in the test_multitask_datamodule.py case.
task_specific_args: # To be replaced by a new class "DatasetParams"

Expand Down Expand Up @@ -132,11 +131,6 @@ datamodule:
epoch_sampling_fraction: 1.0

# Featurization
prepare_dict_or_graph: pyg:graph
featurization_n_jobs: 30
featurization_progress: True
featurization_backend: "loky"
dataloading_from: disk
processed_graph_data_path: ${constants.datacache_path}
featurization:
# OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),
Expand Down
5 changes: 0 additions & 5 deletions expts/neurips2023_configs/base_config/small.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ accelerator:

datamodule:
module_type: "MultitaskFromSmilesDataModule"
# module_type: "FakeDataModule" # Option to use generated data
args: # Matches that in the test_multitask_datamodule.py case.
task_specific_args: # To be replaced by a new class "DatasetParams"
qm9:
Expand Down Expand Up @@ -97,10 +96,6 @@ datamodule:
method: "normal"

# Featurization
prepare_dict_or_graph: pyg:graph
featurization_n_jobs: 30
featurization_progress: True
featurization_backend: "loky"
processed_graph_data_path: "../datacache/neurips2023-small/"
featurization:
# OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),
Expand Down
Loading

0 comments on commit 8aa0f2b

Please sign in to comment.