Creating A Graph
import torch
from torch_geometric.data import Data
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)
edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]], dtype=torch.long)
# x.shape: [num_nodes x num_node_features] (node feature matrix)
# edge_index.shape: [2 x num_edges] (graph connectivity)
# num_attr.shape: [num_edges x num_edge_features] (edge feature matrix)
# y.shape: [num_nodes, ...] or [1, ...] (node-level or graph-level target)
# pos.shape: [num_nodes, num_dimmensions] (node position matrix)
data = Data(x=x, edge_index=edge_index, edge_attr=None, y=None, pos=None)
>>> Data(x=[3, 1], edge_index=[2, 4]) # show the shape of the attributesThe elements in edge_index should be in the range , where is the number of nodes.
Examining Elements
print(data.keys())
>>> ['x', 'edge_index']
print(data['x'])
>>> tensor([[-1.0], [0.0], [1.0]])
[print(f'{key} found in data') for key, item in data]
>>> x found in data
>>> edge_index found in data
'edge_attr' in data
>>> False
data.num_nodes
data.num_edges
data.num_node_features
data.has_isolated_nodes()
data.has_self_loops()
data.is_undirected()
data.is_directed()Datasets
Graph-level task dataset
from torch_geometric.datasets import TUDataset
dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')
len(dataset)
>>> 600
dataset.num_classes
>>> 6
dataset.num_node_features
>>> 3
data = dataset[0]
>>> Data(edge_index=[2, 168], x=[37, 3], y=[1])Node-level task dataset
from torch_geometric.datasets import Planetoid
dataset = Planetoid(root='/tmp/Cora', name='Cora')
len(dataset)
>>> 1
dataset.num_classes
>>> 7
dataset.num_node_features
>>> 1433
data = dataset[0]
>>> Data(edge_index=[2, 10556], test_mask=[2708], train_mask=[2708], val_mask=[2708], x=[2708, 1433], y=[2708])Mini-batches
PyG creates sparse block matrices and concatenating feature and target matrices in the node dimension. It allows differing number of nodes and edges over examples in one batch
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader
dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)
for batch in loader:
batch
>>> DataBatch(batch=[1082], edge_index=[2, 4066], x=[1082, 21], y=[32])
batch.num_graphs
>>> 32DataBatch.batch is a column vector which maps each node to its respective graph in the batch
Data Transforms
Convert the point cloud dataset into a graph dataset by generating nearest neighbor graphs
import torch_geometric.transforms as T
from torch_geometric.datasets import ShapeNet
dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'])
dataset[0]
>>> Data(pos=[2518, 3], y=[2518])
dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'], pre_transform=T.KNNGraph(k=6))
dataset[0]
>>> Data(edge_index=[2, 15108], pos=[2518, 3], y=[2518])