Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
danielegrattarola committed Apr 9, 2022
2 parents c1d8878 + 613f7a5 commit 43ce413
Show file tree
Hide file tree
Showing 34 changed files with 1,018 additions and 480 deletions.
2 changes: 2 additions & 0 deletions docs/autogen.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@
"functions": [],
"methods": [],
"classes": [
layers.SRCPool,
layers.DiffPool,
layers.LaPool,
layers.MinCutPool,
layers.SAGPool,
layers.TopKPool,
Expand Down
14 changes: 8 additions & 6 deletions docs/templates/creating-layer.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,28 +53,30 @@ def call(self, inputs):
```

Then, we implement the `message` function.
The `get_i` and `get_j` built-in methods can be used to automatically access either side of the edges \(i \leftarrow j\). For instance, we can use `get_j` to access the node features `x[j]` of all neighbors `j`.
The `get_sources` and `get_targets` built-in methods can be used to automatically retrieve the node attributes of nodes that are sending (sources) or receiving (targets) a message.
For instance, we can use `get_targets` to access the node features `x[j]` of all neighbors `j`.

If you need direct access to the edge indices, you can use the `index_i` and `index_j` attributes.
If you need direct access to the edge indices, you can use the `index_sources` and `index_targets` attributes.

In this case, we only need to get the neighbors' features and return them:
In this case, we only need to get the neighbors' features and return them:

```py
def message(self, x):
# Get the node features of all neighbors
return self.get_j(x)
return self.get_sources(x)
```

Then, we define an aggregation function for the messages. We can use a simple average of the nodes:

```py
from spektral.layers.ops import scatter_mean


def aggregate(self, messages):
return scatter_mean(messages, self.index_i, self.n_nodes)
return scatter_mean(messages, self.index_targets, self.n_nodes)
```

**Note**: `n_nodes` is computed dynamically at the start of propagation, exactly like `index_i`.
**Note**: `n_nodes` is computed dynamically at the start of propagation, exactly like `index_targets`.

Since there are a few common aggregation functions that are often used in the literature, you can also skip the implementation of this method and simply pass a special keyword to the `__init__()` method of the superclass:

Expand Down
2 changes: 1 addition & 1 deletion examples/node_prediction/citation_gcn.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def mask_to_weights(mask):
for mask in (dataset.mask_tr, dataset.mask_va, dataset.mask_te)
)

model = GCN(n_labels=dataset.n_labels, n_input_channels=dataset.n_node_features)
model = GCN(n_labels=dataset.n_labels)
model.compile(
optimizer=Adam(learning_rate),
loss=CategoricalCrossentropy(reduction="sum"),
Expand Down
2 changes: 1 addition & 1 deletion examples/node_prediction/citation_gcn_custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
x, a, y = graph.x, graph.a, graph.y
mask_tr, mask_va, mask_te = dataset.mask_tr, dataset.mask_va, dataset.mask_te

model = GCN(n_labels=dataset.n_labels, n_input_channels=dataset.n_node_features)
model = GCN(n_labels=dataset.n_labels)
optimizer = Adam(lr=1e-2)
loss_fn = CategoricalCrossentropy()

Expand Down
2 changes: 1 addition & 1 deletion examples/other/explain_node_predictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def mask_to_weights(mask):
for mask in (dataset.mask_tr, dataset.mask_va, dataset.mask_te)
)

model = GCN(n_labels=dataset.n_labels, n_input_channels=dataset.n_node_features)
model = GCN(n_labels=dataset.n_labels)
model.compile(
optimizer=Adam(learning_rate),
loss=CategoricalCrossentropy(reduction="sum"),
Expand Down
2 changes: 1 addition & 1 deletion examples/other/node_clustering_mincut.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def train_step(inputs):
a_in = Input(shape=(None,), name="A_in", sparse=True)

x_1 = GCSConv(16, activation="elu")([x_in, a_in])
x_1, a_1, s_1 = MinCutPool(n_clusters, return_mask=True)([x_1, a_in])
x_1, a_1, s_1 = MinCutPool(n_clusters, return_selection=True)([x_1, a_in])

model = Model([x_in, a_in], [x_1, s_1])

Expand Down
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@

setup(
name="spektral",
version="1.0.8",
version="1.0.9",
packages=find_packages(),
install_requires=[
"joblib",
"lxml",
"networkx",
"numpy<1.20",
"numpy",
"pandas",
"requests",
"scikit-learn",
"scipy",
"tensorflow>=2.1.0",
"tensorflow>=2.2.0",
"tqdm",
],
url="https://github.com/danielegrattarola/spektral",
Expand All @@ -27,8 +27,8 @@
long_description=long_description,
long_description_content_type="text/markdown",
classifiers=[
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
],
)
143 changes: 79 additions & 64 deletions spektral/data/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from spektral.data.utils import (
batch_generator,
collate_labels_batch,
collate_labels_disjoint,
get_spec,
prepend_none,
Expand Down Expand Up @@ -78,10 +79,10 @@ def train_step(inputs, target):
**Arguments**
- `dataset`: a `spektral.data.Dataset` object;
- `batch_size`: size of the mini-batches;
- `epochs`: number of epochs to iterate over the dataset. By default (`None`)
- `batch_size`: int, size of the mini-batches;
- `epochs`: int, number of epochs to iterate over the dataset. By default (`None`)
iterates indefinitely;
- `shuffle`: whether to shuffle the dataset at the start of each epoch.
- `shuffle`: bool, whether to shuffle the dataset at the start of each epoch.
"""

def __init__(self, dataset, batch_size=1, epochs=None, shuffle=True):
Expand Down Expand Up @@ -178,11 +179,10 @@ class SingleLoader(Loader):
**Arguments**
- `dataset`: a `spektral.data.Dataset` object with only one graph;
- `epochs`: number of epochs to iterate over the dataset. By default (`None`)
- `epochs`: int, number of epochs to iterate over the dataset. By default (`None`)
iterates indefinitely;
- `shuffle`: whether to shuffle the data at the start of each epoch;
- `sample_weights`: if given, these will be appended to the output
automatically.
- `shuffle`: bool, whether to shuffle the data at the start of each epoch;
- `sample_weights`: Numpy array, will be appended to the output automatically.
**Output**
Expand All @@ -197,9 +197,8 @@ class SingleLoader(Loader):
- `e`: same as `dataset[0].e`;
`labels` is the same as `dataset[0].y`.
`sample_weights` is the same object passed to the constructor.
`sample_weights` is the same array passed when creating the loader.
"""

def __init__(self, dataset, epochs=None, sample_weights=None):
Expand Down Expand Up @@ -262,6 +261,8 @@ class DisjointLoader(Loader):
**Arguments**
- `dataset`: a graph Dataset;
- `node_level`: bool, if `True` stack the labels vertically for node-level
prediction;
- `batch_size`: size of the mini-batches;
- `epochs`: number of epochs to iterate over the dataset. By default (`None`)
iterates indefinitely;
Expand Down Expand Up @@ -321,7 +322,7 @@ def tf_signature(self):
Adjacency matrix has shape [n_nodes, n_nodes]
Node features have shape [n_nodes, n_node_features]
Edge features have shape [n_edges, n_edge_features]
Targets have shape [..., n_labels]
Targets have shape [*, n_labels]
"""
signature = self.dataset.signature
if "y" in signature:
Expand All @@ -347,33 +348,40 @@ class BatchLoader(Loader):
If `n_max` is the number of nodes of the biggest graph in the batch, then
the padding consist of adding zeros to the node features, adjacency matrix,
and edge attributes of each graph so that they have shapes
`(n_max, n_node_features)`, `(n_max, n_max)`, and
`(n_max, n_max, n_edge_features)` respectively.
`[n_max, n_node_features]`, `[n_max, n_max]`, and
`[n_max, n_max, n_edge_features]` respectively.
The zero-padding is done batch-wise, which saves up memory at the cost of
more computation. If latency is an issue but memory isn't, or if the
dataset has graphs with a similar number of nodes, you can use
the `PackedBatchLoader` that first zero-pads all the dataset and then
the `PackedBatchLoader` that zero-pads all the dataset once and then
iterates over it.
Note that the adjacency matrix and edge attributes are returned as dense
arrays (mostly due to the lack of support for sparse tensor operations for
rank >2).
arrays.
Only graph-level labels are supported with this loader (i.e., labels are not
zero-padded because they are assumed to have no "node" dimensions).
if `mask=True`, node attributes will be extended with a binary mask that indicates
valid nodes (the last feature of each node will be 1 if the node was originally in
the graph and 0 if it is a fake node added by zero-padding).
Use this flag in conjunction with layers.base.GraphMasking to start the propagation
of masks in a model (necessary for node-level prediction and models that use a
dense pooling layer like DiffPool or MinCutPool).
If `node_level=False`, the labels are interpreted as graph-level labels and
are returned as an array of shape `[batch, n_labels]`.
If `node_level=True`, then the labels are padded along the node dimension and are
returned as an array of shape `[batch, n_max, n_labels]`.
**Arguments**
- `dataset`: a graph Dataset;
- `mask`: if True, node attributes will be extended with a binary mask that
indicates valid nodes (the last feature of each node will be 1 if the node is valid
and 0 otherwise). Use this flag in conjunction with layers.base.GraphMasking to
start the propagation of masks in a model.
- `batch_size`: size of the mini-batches;
- `epochs`: number of epochs to iterate over the dataset. By default (`None`)
- `mask`: bool, whether to add a mask to the node features;
- `batch_size`: int, size of the mini-batches;
- `epochs`: int, number of epochs to iterate over the dataset. By default (`None`)
iterates indefinitely;
- `shuffle`: whether to shuffle the data at the start of each epoch.
- `shuffle`: bool, whether to shuffle the data at the start of each epoch;
- `node_level`: bool, if `True` pad the labels along the node dimension;
**Output**
Expand All @@ -385,19 +393,30 @@ class BatchLoader(Loader):
- `a`: adjacency matrices of shape `[batch, n_max, n_max]`;
- `e`: edge attributes of shape `[batch, n_max, n_max, n_edge_features]`.
`labels` have shape `[batch, n_labels]`.
`labels` have shape `[batch, n_labels]` if `node_level=False` or
`[batch, n_max, n_labels]` otherwise.
"""

def __init__(self, dataset, mask=False, batch_size=1, epochs=None, shuffle=True):
def __init__(
self,
dataset,
mask=False,
batch_size=1,
epochs=None,
shuffle=True,
node_level=False,
):
self.mask = mask
self.node_level = node_level
self.signature = dataset.signature
super().__init__(dataset, batch_size=batch_size, epochs=epochs, shuffle=shuffle)

def collate(self, batch):
packed = self.pack(batch)

y = packed.pop("y_list", None)
if y is not None:
y = np.array(y)
y = collate_labels_batch(y, node_level=self.node_level)

output = to_batch(**packed, mask=self.mask)
output = sp_matrices_to_sp_tensors(output)
Expand All @@ -415,12 +434,13 @@ def tf_signature(self):
Adjacency matrix has shape [batch, n_nodes, n_nodes]
Node features have shape [batch, n_nodes, n_node_features]
Edge features have shape [batch, n_nodes, n_nodes, n_edge_features]
Targets have shape [batch, ..., n_labels]
Labels have shape [batch, n_labels]
"""
signature = self.dataset.signature
signature = self.signature
for k in signature:
signature[k]["shape"] = prepend_none(signature[k]["shape"])
if "x" in signature:
if "x" in signature and self.mask:
# In case we have a mask, the mask is concatenated to the features
signature["x"]["shape"] = signature["x"]["shape"][:-1] + (
signature["x"]["shape"][-1] + 1,
)
Expand All @@ -430,6 +450,9 @@ def tf_signature(self):
if "e" in signature:
# Edge attributes have an extra None dimension in batch mode
signature["e"]["shape"] = prepend_none(signature["e"]["shape"])
if "y" in signature and self.node_level:
# Node labels have an extra None dimension
signature["y"]["shape"] = prepend_none(signature["y"]["shape"])

return to_tf_signature(signature)

Expand All @@ -454,10 +477,12 @@ class PackedBatchLoader(BatchLoader):
**Arguments**
- `dataset`: a graph Dataset;
- `batch_size`: size of the mini-batches;
- `epochs`: number of epochs to iterate over the dataset. By default (`None`)
- `mask`: bool, whether to add a mask to the node features;
- `batch_size`: int, size of the mini-batches;
- `epochs`: int, number of epochs to iterate over the dataset. By default (`None`)
iterates indefinitely;
- `shuffle`: whether to shuffle the data at the start of each epoch.
- `shuffle`: bool, whether to shuffle the data at the start of each epoch;
- `node_level`: bool, if `True` pad the labels along the node dimension;
**Output**
Expand All @@ -469,22 +494,35 @@ class PackedBatchLoader(BatchLoader):
- `a`: adjacency matrices of shape `[batch, n_max, n_max]`;
- `e`: edge attributes of shape `[batch, n_max, n_max, n_edge_features]`.
`labels` have shape `[batch, ..., n_labels]`.
`labels` have shape `[batch, n_labels]` if `node_level=False` or
`[batch, n_max, n_labels]` otherwise.
"""

def __init__(self, dataset, mask=False, batch_size=1, epochs=None, shuffle=True):
def __init__(
self,
dataset,
mask=False,
batch_size=1,
epochs=None,
shuffle=True,
node_level=False,
):
super().__init__(
dataset, mask=mask, batch_size=batch_size, epochs=epochs, shuffle=shuffle
dataset,
mask=mask,
batch_size=batch_size,
epochs=epochs,
shuffle=shuffle,
node_level=node_level,
)

# Drop the Dataset container and work on packed tensors directly
packed = self.pack(self.dataset)

y = packed.pop("y_list", None)
if y is not None:
y = np.array(y)
y = collate_labels_batch(y, node_level=self.node_level)

self.signature = dataset.signature
self.dataset = to_batch(**packed, mask=mask)
if y is not None:
self.dataset += (y,)
Expand All @@ -501,29 +539,6 @@ def collate(self, batch):
else:
return batch[:-1], batch[-1]

def tf_signature(self):
"""
Adjacency matrix has shape [batch, n_nodes, n_nodes]
Node features have shape [batch, n_nodes, n_node_features]
Edge features have shape [batch, n_nodes, n_nodes, n_edge_features]
Targets have shape [batch, ..., n_labels]
"""
signature = self.signature
for k in signature:
signature[k]["shape"] = prepend_none(signature[k]["shape"])
if "x" in signature:
signature["x"]["shape"] = signature["x"]["shape"][:-1] + (
signature["x"]["shape"][-1] + 1,
)
if "a" in signature:
# Adjacency matrix in batch mode is dense
signature["a"]["spec"] = tf.TensorSpec
if "e" in signature:
# Edge attributes have an extra None dimension in batch mode
signature["e"]["shape"] = prepend_none(signature["e"]["shape"])

return to_tf_signature(signature)

@property
def steps_per_epoch(self):
if len(self.dataset) > 0:
Expand All @@ -544,10 +559,10 @@ class MixedLoader(Loader):
**Arguments**
- `dataset`: a graph Dataset;
- `batch_size`: size of the mini-batches;
- `epochs`: number of epochs to iterate over the dataset. By default (`None`)
- `batch_size`: int, size of the mini-batches;
- `epochs`: int, number of epochs to iterate over the dataset. By default (`None`)
iterates indefinitely;
- `shuffle`: whether to shuffle the data at the start of each epoch.
- `shuffle`: bool, whether to shuffle the data at the start of each epoch.
**Output**
Expand Down
Loading

0 comments on commit 43ce413

Please sign in to comment.