Creating A Graph

import torch
from torch_geometric.data import Data
 
x = torch.tensor([[-1], [0], [1]], dtype=torch.float) 
edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]], dtype=torch.long) 
 
# x.shape: [num_nodes x num_node_features] (node feature matrix)
# edge_index.shape: [2 x num_edges] (graph connectivity)
# num_attr.shape: [num_edges x num_edge_features] (edge feature matrix)
# y.shape: [num_nodes, ...] or [1, ...] (node-level or graph-level target)
# pos.shape: [num_nodes, num_dimmensions] (node position matrix)
data = Data(x=x, edge_index=edge_index, edge_attr=None, y=None, pos=None)
>>> Data(x=[3, 1], edge_index=[2, 4]) # show the shape of the attributes

The elements in edge_index should be in the range , where is the number of nodes.

Examining Elements

print(data.keys())
>>> ['x', 'edge_index']
 
print(data['x'])
>>> tensor([[-1.0], [0.0], [1.0]])
 
[print(f'{key} found in data') for key, item in data]
>>> x found in data
>>> edge_index found in data
 
'edge_attr' in data
>>> False
 
data.num_nodes
data.num_edges
data.num_node_features
 
data.has_isolated_nodes()
data.has_self_loops()
data.is_undirected()
data.is_directed()

Datasets

Graph-level task dataset

from torch_geometric.datasets import TUDataset
 
dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')
len(dataset)
>>> 600
 
dataset.num_classes
>>> 6
 
dataset.num_node_features
>>> 3
 
data = dataset[0]
>>> Data(edge_index=[2, 168], x=[37, 3], y=[1])

Node-level task dataset

from torch_geometric.datasets import Planetoid
 
dataset = Planetoid(root='/tmp/Cora', name='Cora')
len(dataset)
>>> 1
 
dataset.num_classes
>>> 7
 
dataset.num_node_features
>>> 1433
 
data = dataset[0]
>>> Data(edge_index=[2, 10556], test_mask=[2708], train_mask=[2708], val_mask=[2708], x=[2708, 1433], y=[2708])

Mini-batches

PyG creates sparse block matrices and concatenating feature and target matrices in the node dimension. It allows differing number of nodes and edges over examples in one batch

from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader
 
dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)
 
for batch in loader:
    batch
    >>> DataBatch(batch=[1082], edge_index=[2, 4066], x=[1082, 21], y=[32]) 
 
    batch.num_graphs
    >>> 32

DataBatch.batch is a column vector which maps each node to its respective graph in the batch

Data Transforms

Convert the point cloud dataset into a graph dataset by generating nearest neighbor graphs

import torch_geometric.transforms as T
from torch_geometric.datasets import ShapeNet
 
dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'])
 
dataset[0]
>>> Data(pos=[2518, 3], y=[2518])
 
dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'], pre_transform=T.KNNGraph(k=6))
 
dataset[0]
>>> Data(edge_index=[2, 15108], pos=[2518, 3], y=[2518])