Sid sidneyarcidiacono

## proteins_embedding.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                sidneyarcidiacono
                / proteins_embedding.ipynb
            
            
              Created
              May 17, 2021 20:25
            
              
                PROTEINS_Embedding.ipynb
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## PROTEINS_Embedding.py
# Reading in the PROTEINS dataset
from spektral.datasets import TUDataset

# Spectral provides the TUDataset class, which contains benchmark datasets for graph classification
data = TUDataset('PROTEINS')
data

## PROTEINS_embedding.py
# Since we want to utilize the Spektral GCN layer, we want to follow the original paper for this method and perform some preprocessing:
from spektral.transforms import GCNFilter

data.apply(GCNFilter())

## PROTEINS_embedding.py
# Split our train and test data. This just splits based on the first 80%/second 20% which isn't entirely ideal, so we'll shuffle the data first.
import numpy as np

np.random.shuffle(data)
split = int(0.8 * len(data))
data_train, data_test = data[:split], data[split:]

## PROTEINS_embedding.py
# Spektral is built on top of Keras, so we can use the Keras functional API to build a model that first embeds,
# then sums the nodes together (global pooling), then classifies the result with a dense softmax layer

# First, let's import the necessary layers:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout
from spektral.layers import GCNConv, GlobalSumPool

## PROTEINS_embedding.py
# Now, we can use model subclassing to define our model:

class ProteinsGNN(Model):

  def __init__(self, n_hidden, n_labels):
    super().__init__()
    # Define our GCN layer with our n_hidden layers
    self.graph_conv = GCNConv(n_hidden)
    # Define our global pooling layer
    self.pool = GlobalSumPool()

## PROTEINS_embedding.py
# Instantiate our model for training
model = ProteinsGNN(32, data.n_labels)

## PROTEINS_embedding.py
# Compile model with our optimizer (adam) and loss function
model.compile('adam', 'categorical_crossentropy')

## PROTEINS_embedding.py
# Here's the trick - we can't just call Keras' fit() method on this model.
# Instead, we have to use Loaders, which Spektral walks us through. Loaders create mini-batches by iterating over the graph
# Since we're using Spektral for an experiment, for our first trial we'll use the recommended loader in the getting started tutorial

# TODO: read up on modes and try other loaders later
from spektral.data import BatchLoader

loader = BatchLoader(data_train, batch_size=32)

## PROTEINS_embedding.py
!pip install spektral
	# Reading in the PROTEINS dataset
	from spektral.datasets import TUDataset

	# Spectral provides the TUDataset class, which contains benchmark datasets for graph classification
	data = TUDataset('PROTEINS')
	data
	# Since we want to utilize the Spektral GCN layer, we want to follow the original paper for this method and perform some preprocessing:
	from spektral.transforms import GCNFilter

	data.apply(GCNFilter())
	# Split our train and test data. This just splits based on the first 80%/second 20% which isn't entirely ideal, so we'll shuffle the data first.
	import numpy as np

	np.random.shuffle(data)
	split = int(0.8 * len(data))
	data_train, data_test = data[:split], data[split:]
	# Spektral is built on top of Keras, so we can use the Keras functional API to build a model that first embeds,
	# then sums the nodes together (global pooling), then classifies the result with a dense softmax layer

	# First, let's import the necessary layers:
	from tensorflow.keras.models import Model
	from tensorflow.keras.layers import Dense, Dropout
	from spektral.layers import GCNConv, GlobalSumPool
	# Now, we can use model subclassing to define our model:

	class ProteinsGNN(Model):

	def __init__(self, n_hidden, n_labels):
	super().__init__()
	# Define our GCN layer with our n_hidden layers
	self.graph_conv = GCNConv(n_hidden)
	# Define our global pooling layer
	self.pool = GlobalSumPool()
	# Instantiate our model for training
	model = ProteinsGNN(32, data.n_labels)
	# Compile model with our optimizer (adam) and loss function
	model.compile('adam', 'categorical_crossentropy')
	# Here's the trick - we can't just call Keras' fit() method on this model.
	# Instead, we have to use Loaders, which Spektral walks us through. Loaders create mini-batches by iterating over the graph
	# Since we're using Spektral for an experiment, for our first trial we'll use the recommended loader in the getting started tutorial

	# TODO: read up on modes and try other loaders later
	from spektral.data import BatchLoader

	loader = BatchLoader(data_train, batch_size=32)