Sid sidneyarcidiacono

## PROTEINS_embedding.py
# Set our optimizer (adam)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
# Define our loss function
criterion = torch.nn.CrossEntropyLoss()

# Initialize our train function
def train():
    model.train()

    for data in train_loader:  # Iterate in batches over the training dataset.

## PROTEINS_embedding.py
# Import everything we need to build our network:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool

# Define our GCN class as a pytorch Module
class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()

## PROTEINS_embedding.py
# Import DataLoader for batching
from torch_geometric.data import DataLoader

# our DataLoader creates diagonal adjacency matrices, and concatenates features
# and target matrices in the node dimension. This allows differing numbers of nodes and edges
# over examples in one batch. (from pytorch geometric docs)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

## PROTEINS_embedding.py
# Now, we need to perform our train/test split.
# We create a seed, and then shuffle our data
torch.manual_seed(12345)
dataset = dataset.shuffle()

# Once it's shuffled, we slice the data to split
train_dataset = dataset[150:-150]
test_dataset = dataset[0:150]

# Take a look at the training versus test graphs

## PROTEINS_embedding.py
# Let's take a look at our data. We'll look at dataset (all data) and data (our first graph):

data = dataset[0]  # Get the first graph object.

print()
print(f'Dataset: {dataset}:')
print('====================')
# How many graphs?
print(f'Number of graphs: {len(dataset)}')
# How many features?

## PROTEINS_embedding.py
import torch
from torch_geometric.datasets import TUDataset

# Like Spektral, pytorch geometric provides us with benchmark TUDatasets
dataset = TUDataset(root='data/TUDataset', name='PROTEINS')

## PROTEINS_embedding.py
# Install required packages.
!pip install -q torch-scatter -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install -q torch-sparse -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install -q torch-geometric

## PROTEINS_embedding.py
# And feed it to our model by calling .load()

loss = model.evaluate(loader.load(), steps=loader.steps_per_epoch)

print('Test loss: {}'.format(loss))

## PROTEINS_embedding.py
# To evaluate, let's instantiate another loader to test

test_loader = BatchLoader(data_test, batch_size=32)

## PROTEINS_embedding.py
# Now we can train! We don't need to specify a batch size, since our loader is basically a generator
# But we do need to specify the steps_per_epoch parameter

model.fit(loader.load(), steps_per_epoch=loader.steps_per_epoch, epochs=10)
	# Set our optimizer (adam)
	optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
	# Define our loss function
	criterion = torch.nn.CrossEntropyLoss()

	# Initialize our train function
	def train():
	model.train()

	for data in train_loader: # Iterate in batches over the training dataset.
	# Import everything we need to build our network:
	from torch.nn import Linear
	import torch.nn.functional as F
	from torch_geometric.nn import GCNConv
	from torch_geometric.nn import global_mean_pool

	# Define our GCN class as a pytorch Module
	class GCN(torch.nn.Module):
	def __init__(self, hidden_channels):
	super(GCN, self).__init__()
	# Import DataLoader for batching
	from torch_geometric.data import DataLoader

	# our DataLoader creates diagonal adjacency matrices, and concatenates features
	# and target matrices in the node dimension. This allows differing numbers of nodes and edges
	# over examples in one batch. (from pytorch geometric docs)
	train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
	test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
	# Now, we need to perform our train/test split.
	# We create a seed, and then shuffle our data
	torch.manual_seed(12345)
	dataset = dataset.shuffle()

	# Once it's shuffled, we slice the data to split
	train_dataset = dataset[150:-150]
	test_dataset = dataset[0:150]

	# Take a look at the training versus test graphs
	# Let's take a look at our data. We'll look at dataset (all data) and data (our first graph):

	data = dataset[0] # Get the first graph object.

	print()
	print(f'Dataset: {dataset}:')
	print('====================')
	# How many graphs?
	print(f'Number of graphs: {len(dataset)}')
	# How many features?
	import torch
	from torch_geometric.datasets import TUDataset

	# Like Spektral, pytorch geometric provides us with benchmark TUDatasets
	dataset = TUDataset(root='data/TUDataset', name='PROTEINS')
	# Install required packages.
	!pip install -q torch-scatter -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
	!pip install -q torch-sparse -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
	!pip install -q torch-geometric
	# And feed it to our model by calling .load()

	loss = model.evaluate(loader.load(), steps=loader.steps_per_epoch)

	print('Test loss: {}'.format(loss))
	# To evaluate, let's instantiate another loader to test

	test_loader = BatchLoader(data_test, batch_size=32)
	# Now we can train! We don't need to specify a batch size, since our loader is basically a generator
	# But we do need to specify the steps_per_epoch parameter

	model.fit(loader.load(), steps_per_epoch=loader.steps_per_epoch, epochs=10)