Mark Saroufim msaroufim

## convert_to_sparse.py
if(np.sum(A) / np.prod(dense.shape) < 0.1):
  zero = tf.constant(0, dtype=tf.float32)
  where = tf.not_equal(dense, zero)
  indices = tf.where(where)
  values = tf.gather_nd(dense, indices)
  sparse = tf.SparseTensor(indices, values, dense.shape)

## arg_parser_example.py
import argparse
def main():
    parser = argparse.ArgumentParser(description='Spektral Argument Parser')
    parser.add_argument('--epochs', type=int, default=100, help= 'number of epochs')
    parser.add_argument('--batch_size', type=int, default=256, help= 'batch size')
    parser.add_argument('--amount', type=int, default=133000, help= 'number of molecules in dataset')
    parser.add_argument('--learning_rate', type=float, default=1e-3, help='learning rate')

    args = parser.parse_args()
    return args

## ogbn-papers100M.py
"""
This example implements the same GCN example for node classification provided
with the [Open Graph Benchmark](https://ogb.stanford.edu).
See https://github.com/snap-stanford/ogb/blob/master/examples/nodeproppred/arxiv/gnn.py
for the reference implementation.
"""
import numpy as np
from ogb.nodeproppred import NodePropPredDataset, Evaluator
from tensorflow.keras.layers import Input, Dropout, BatchNormalization
from tensorflow.keras.losses import SparseCategoricalCrossentropy

## qm9_disjoint.py
"""
This example shows how to perform regression of molecular properties with the
QM9 database, using a simple GNN in disjoint mode.
"""

import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.losses import MeanSquaredError

## multithreaded-sampling.jl
using Turing

@model function gdemo(x)
    s ~ InverseGamma(2, 3)
    m ~ Normal(0, sqrt(s))

    for i in eachindex(x)
        x[i] ~ Normal(m, sqrt(s))
    end
end

## gameobject.cs
using System.Collections;
using System.Collections.Generic;
using UnityEngine;

public class Unit : MonoBehaviour {
    public Transform transform;

    void Update()
    {
        transform.position = new Vector2(transform.position.x + 1, transform.position.y + 1);

## splitting.py
# Original model - OOM :(
model = tf.keras.Sequential([
     Flatten(input_shape=(L,A)),
     Dense(units=L*A)
     Reshape((L,A)),
     Activation("softmax")

# Keep splitting until max layer size is OK
model_in = Input(shape=(L, A))
flatten = Flatten(input_shape=(L,A))(model_in)

## biotokenizer.py
class ProteinTokenizer:
  def __init__(self):
    self.vocab = ["U", "C", "F", "W", "G", "A", "M", "X", "L", "V", "D",
     "I", "E", "P", "T", "S", "R", "K", "Q", "Y", "H", "N", "*", "[MASK]", "[CLS]"]

  def tokenize(self, line):
    protein = line
    return [amino_acid for amino_acid in protein]

  def convert_tokens_to_ids(self, protein):

## parse_protein.py
from sys import platform
import string
import os

def parse_fasta(filename, a3m=False):
  if a3m:
    # for a3m files the lowercase letters are removed
    # as these do not align to the query sequence
    rm_lc = str.maketrans(dict.fromkeys(string.ascii_lowercase))


## download_fasta.py
# Description of FASTA format
# https://zhanglab.ccmb.med.umich.edu/FASTA/#:~:text=FASTA%20format%20is%20a%20text,by%20lines%20of%20sequence%20data

import os
import wget
import gzip


def download_fasta(url, data_directory='fasta_data'):
    if not os.path.exists(data_directory):
	if(np.sum(A) / np.prod(dense.shape) < 0.1):
	zero = tf.constant(0, dtype=tf.float32)
	where = tf.not_equal(dense, zero)
	indices = tf.where(where)
	values = tf.gather_nd(dense, indices)
	sparse = tf.SparseTensor(indices, values, dense.shape)
	import argparse
	def main():
	parser = argparse.ArgumentParser(description='Spektral Argument Parser')
	parser.add_argument('--epochs', type=int, default=100, help= 'number of epochs')
	parser.add_argument('--batch_size', type=int, default=256, help= 'batch size')
	parser.add_argument('--amount', type=int, default=133000, help= 'number of molecules in dataset')
	parser.add_argument('--learning_rate', type=float, default=1e-3, help='learning rate')

	args = parser.parse_args()
	return args
	"""
	This example implements the same GCN example for node classification provided
	with the [Open Graph Benchmark](https://ogb.stanford.edu).
	See https://github.com/snap-stanford/ogb/blob/master/examples/nodeproppred/arxiv/gnn.py
	for the reference implementation.
	"""
	import numpy as np
	from ogb.nodeproppred import NodePropPredDataset, Evaluator
	from tensorflow.keras.layers import Input, Dropout, BatchNormalization
	from tensorflow.keras.losses import SparseCategoricalCrossentropy
	"""
	This example shows how to perform regression of molecular properties with the
	QM9 database, using a simple GNN in disjoint mode.
	"""

	import numpy as np
	import tensorflow as tf
	from sklearn.model_selection import train_test_split
	from tensorflow.keras.layers import Input, Dense
	from tensorflow.keras.losses import MeanSquaredError
	using Turing

	@model function gdemo(x)
	s ~ InverseGamma(2, 3)
	m ~ Normal(0, sqrt(s))

	for i in eachindex(x)
	x[i] ~ Normal(m, sqrt(s))
	end
	end
	using System.Collections;
	using System.Collections.Generic;
	using UnityEngine;

	public class Unit : MonoBehaviour {
	public Transform transform;

	void Update()
	{
	transform.position = new Vector2(transform.position.x + 1, transform.position.y + 1);
	# Original model - OOM :(
	model = tf.keras.Sequential([
	Flatten(input_shape=(L,A)),
	Dense(units=L*A)
	Reshape((L,A)),
	Activation("softmax")

	# Keep splitting until max layer size is OK
	model_in = Input(shape=(L, A))
	flatten = Flatten(input_shape=(L,A))(model_in)
	class ProteinTokenizer:
	def __init__(self):
	self.vocab = ["U", "C", "F", "W", "G", "A", "M", "X", "L", "V", "D",
	"I", "E", "P", "T", "S", "R", "K", "Q", "Y", "H", "N", "*", "[MASK]", "[CLS]"]

	def tokenize(self, line):
	protein = line
	return [amino_acid for amino_acid in protein]

	def convert_tokens_to_ids(self, protein):
	from sys import platform
	import string
	import os

	def parse_fasta(filename, a3m=False):
	if a3m:
	# for a3m files the lowercase letters are removed
	# as these do not align to the query sequence
	rm_lc = str.maketrans(dict.fromkeys(string.ascii_lowercase))
	# Description of FASTA format
	# https://zhanglab.ccmb.med.umich.edu/FASTA/#:~:text=FASTA%20format%20is%20a%20text,by%20lines%20of%20sequence%20data

	import os
	import wget
	import gzip


	def download_fasta(url, data_directory='fasta_data'):
	if not os.path.exists(data_directory):