Skip to content

Instantly share code, notes, and snippets.

@bbrighttaer
Created July 7, 2021 13:11
Show Gist options
  • Save bbrighttaer/3e511d107e495fbb5f711b075f173ef8 to your computer and use it in GitHub Desktop.
Save bbrighttaer/3e511d107e495fbb5f711b075f173ef8 to your computer and use it in GitHub Desktop.
def default_hparams_bopt(flags):
"""
protein model types:
--------------------------------------------------------------------------------------------
short name | full name
------------|-------------------------------------------------------------------------------
psc | Protein Sequence Composition
------------|-------------------------------------------------------------------------------
p2v | Protein to Vector / Embeddings using n-gram amino acid 'words'.
------------|-------------------------------------------------------------------------------
rnn | Uses embeddings and an RNN variant (e.g. LSTM) to learn protein features.
------------|-------------------------------------------------------------------------------
pcnn | Protein CNN: https://academic.oup.com/bioinformatics/article/35/2/309/5050020
| The final output is a 1D vector for each protein in a batch.
------------|-------------------------------------------------------------------------------
pcnn2d | A variant of PCNN that returns a 2D tensor for each protein in a batch.
------------|-------------------------------------------------------------------------------
NOTE: All protein models, except 'psc' and 'p2v', use embeddings from the :class:Prot2Vec module.
"""
return {
"explain_mode": flags.explain,
"attn_heads": 2,
"attn_layers": 1,
"lin_dims": [1033, 1481, 1800],
"output_dim": len(flags.tasks),
"latent_dim": 512,
# weight initialization
"kaiming_constant": 5,
# dropout
"dprob": 0.2,
"tr_batch_size": 256,
"val_batch_size": 128,
"test_batch_size": 128,
# optimizer params
"optimizer": "adamax",
"optimizer__global__weight_decay": 0.0007,
"optimizer__global__lr": 0.0004,
"prot": {
"model_types": flags["prot_model_types"],
"vocab_size": flags["prot_vocab_size"],
"window": 11,
"pcnn_num_layers": 2,
"embedding_dim": 33,
"psc_dim": 8421,
"rnn_hidden_state_dim": 27
},
"weave": {
"dim": 50,
"update_pairs": False,
},
"gconv": {
"dim": 512,
},
"ecfp8": {
"dim": 1024,
},
"gnn": {
"fingerprint_size": len(flags["gnn_fingerprint"]) if flags["gnn_fingerprint"] else 0,
"num_layers": 3,
"dim": 100,
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment