Creating A Graph

import torch
from torch_geometric.data import Data
 
x = torch.tensor([[-1], [0], [1]], dtype=torch.float) 
edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]], dtype=torch.long) 
 
# x.shape: [num_nodes x num_node_features] (node feature matrix)
# edge_index.shape: [2 x num_edges] (graph connectivity)
# num_attr.shape: [num_edges x num_edge_features] (edge feature matrix)
# y.shape: [num_nodes, ...] or [1, ...] (node-level or graph-level target)
# pos.shape: [num_nodes, num_dimmensions] (node position matrix)
data = Data(x=x, edge_index=edge_index, edge_attr=None, y=None, pos=None)
>>> Data(x=[3, 1], edge_index=[2, 4]) # show the shape of the attributes

The elements in edge_index should be in the range ${0, ..., n - 1}$ , where $n$ is the number of nodes.

Examining Elements

print(data.keys())
>>> ['x', 'edge_index']
 
print(data['x'])
>>> tensor([[-1.0], [0.0], [1.0]])
 
[print(f'{key} found in data') for key, item in data]
>>> x found in data
>>> edge_index found in data
 
'edge_attr' in data
>>> False
 
data.num_nodes
data.num_edges
data.num_node_features
 
data.has_isolated_nodes()
data.has_self_loops()
data.is_undirected()
data.is_directed()

Datasets

Graph-level task dataset

from torch_geometric.datasets import TUDataset
 
dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')
len(dataset)
>>> 600
 
dataset.num_classes
>>> 6
 
dataset.num_node_features
>>> 3
 
data = dataset[0]
>>> Data(edge_index=[2, 168], x=[37, 3], y=[1])

Node-level task dataset

from torch_geometric.datasets import Planetoid
 
dataset = Planetoid(root='/tmp/Cora', name='Cora')
len(dataset)
>>> 1
 
dataset.num_classes
>>> 7
 
dataset.num_node_features
>>> 1433
 
data = dataset[0]
>>> Data(edge_index=[2, 10556], test_mask=[2708], train_mask=[2708], val_mask=[2708], x=[2708, 1433], y=[2708])

Mini-batches

PyG creates sparse block matrices and concatenating feature and target matrices in the node dimension. It allows differing number of nodes and edges over examples in one batch

$A = A_{1} ⋱ A_{n}, X = X_{1} ⋮ X_{n}, Y = Y_{1} ⋮ Y_{n}$

from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader
 
dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)
 
for batch in loader:
    batch
    >>> DataBatch(batch=[1082], edge_index=[2, 4066], x=[1082, 21], y=[32]) 
 
    batch.num_graphs
    >>> 32

DataBatch.batch is a column vector which maps each node to its respective graph in the batch

Data Transforms

Convert the point cloud dataset into a graph dataset by generating nearest neighbor graphs

import torch_geometric.transforms as T
from torch_geometric.datasets import ShapeNet
 
dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'])
 
dataset[0]
>>> Data(pos=[2518, 3], y=[2518])
 
dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'], pre_transform=T.KNNGraph(k=6))
 
dataset[0]
>>> Data(edge_index=[2, 15108], pos=[2518, 3], y=[2518])

My Knowledge Base

Explorer

Pytorch-Geometric Cheat-Sheet