Created
September 11, 2022 23:27
-
-
Save partarstu/cb0052bcda3401ae381d421afc8f3e5f to your computer and use it in GitHub Desktop.
Generative model SD summary
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- Summary --- | |
Variables: 197 (51 with arrays) | |
Functions: 141 | |
SameDiff Function Defs: 0 | |
Loss function variables: [loss] | |
--- Variables --- | |
- Name - - Array Shape - - Variable Type - - Data Type- - Output Of Function - - Inputs To Functions - | |
FLAT_TARGET_BATCH_POSITIONS [-1] PLACEHOLDER INT <none> [gather_1] | |
FLAT_TARGET_TOKEN_VOCAB_INDICES [-1] PLACEHOLDER INT <none> [onehot] | |
attentionHeadEmbeddingSize [] CONSTANT INT <none> [decoder/self_attention_default/multiply_1, decoder/self_attention_default/stack_1, decoder/self_attention_default/stack_2] | |
attentionHeadsAmount [] CONSTANT INT <none> [decoder/self_attention_default/multiply_1, decoder/self_attention_default/stack_1, decoder/self_attention_default/stack_2] | |
batchDecoderInputTokenEmbeddings - ARRAY FLOAT gather(gather) [decoder/shape_of, decoder/shape_of_1, decoder/reshape] | |
decoder/0/keyAndQueryInputNormalized - ARRAY FLOAT decoder/0/layer_norm_1(layer_norm) [decoder/0/self_attention_default/matmul, decoder/0/self_attention_default/matmul_1] | |
decoder/0/keyAndQueryInputNormalized_embedNormGain [768] CONSTANT FLOAT <none> [decoder/0/layer_norm_1] | |
decoder/0/keyAndQueryInput_WithPositionalEmbed - ARRAY FLOAT decoder/0/add(add) [decoder/0/layer_norm_1] | |
decoder/0/normalizedAttentionInput - ARRAY FLOAT decoder/0/layer_norm(layer_norm) [decoder/0/self_attention_default/matmul_2] | |
decoder/0/normalizedAttentionInput_embedNormGain [768] CONSTANT FLOAT <none> [decoder/0/layer_norm] | |
decoder/0/selfAttentionResidualProduct - ARRAY FLOAT decoder/0/add_1(add) [decoder/hidden_ff_layer_0/layer_norm, decoder/hidden_ff_layer_0/add] | |
decoder/0/self_attention_default/AttentionKeyWeights_0 [768, 768] VARIABLE FLOAT <none> [decoder/0/self_attention_default/matmul] | |
decoder/0/self_attention_default/AttentionOutWeights_0 [768, 768] VARIABLE FLOAT <none> [decoder/0/self_attention_default/matmul_5] | |
decoder/0/self_attention_default/AttentionQueryWeights_0 [768, 768] VARIABLE FLOAT <none> [decoder/0/self_attention_default/matmul_1] | |
decoder/0/self_attention_default/AttentionValueWeights_0 [768, 768] VARIABLE FLOAT <none> [decoder/0/self_attention_default/matmul_2] | |
decoder/0/self_attention_default/attentionDotProductOutput_0 - ARRAY FLOAT decoder/0/self_attention_default/reshape_3(reshape) [decoder/0/self_attention_default/matmul_5] | |
decoder/0/self_attention_default/attentionMaskDisqualifier_0 - ARRAY FLOAT decoder/0/self_attention_default/mul_scalar(mul_scalar) [decoder/0/self_attention_default/add] | |
decoder/0/self_attention_default/attentionOutput_0 - ARRAY FLOAT decoder/0/self_attention_default/matmul_5(matmul) [decoder/0/add_1] | |
decoder/0/self_attention_default/attentionScoresBeforeMasking_0 - ARRAY FLOAT decoder/0/self_attention_default/mul_scalar_1(mul_scalar) [decoder/0/self_attention_default/add] | |
decoder/0/self_attention_default/attentionSoftmaxScores_0 - ARRAY FLOAT decoder/0/self_attention_default/softmax(softmax) [decoder/0/self_attention_default/matmul_4] | |
decoder/0/self_attention_default/attentionWeightsMasked_0 - ARRAY FLOAT decoder/0/self_attention_default/add(add) [decoder/0/self_attention_default/softmax] | |
decoder/0/self_attention_default/keyProjections_0 - ARRAY FLOAT decoder/0/self_attention_default/matmul(matmul) [decoder/0/self_attention_default/reshape] | |
decoder/0/self_attention_default/matmul_3 - ARRAY FLOAT decoder/0/self_attention_default/matmul_3(matmul) [decoder/0/self_attention_default/mul_scalar_1] | |
decoder/0/self_attention_default/permute - ARRAY FLOAT decoder/0/self_attention_default/permute(permute) [decoder/0/self_attention_default/matmul_3] | |
decoder/0/self_attention_default/permute_1 - ARRAY FLOAT decoder/0/self_attention_default/permute_1(permute) [decoder/0/self_attention_default/matmul_3] | |
decoder/0/self_attention_default/permute_2 - ARRAY FLOAT decoder/0/self_attention_default/permute_2(permute) [decoder/0/self_attention_default/matmul_4] | |
decoder/0/self_attention_default/permute_3 - ARRAY FLOAT decoder/0/self_attention_default/permute_3(permute) [decoder/0/self_attention_default/reshape_3] | |
decoder/0/self_attention_default/queryProjections_0 - ARRAY FLOAT decoder/0/self_attention_default/matmul_1(matmul) [decoder/0/self_attention_default/reshape_1] | |
decoder/0/self_attention_default/reshape - ARRAY FLOAT decoder/0/self_attention_default/reshape(reshape) [decoder/0/self_attention_default/permute] | |
decoder/0/self_attention_default/reshape_1 - ARRAY FLOAT decoder/0/self_attention_default/reshape_1(reshape) [decoder/0/self_attention_default/permute_1] | |
decoder/0/self_attention_default/reshape_2 - ARRAY FLOAT decoder/0/self_attention_default/reshape_2(reshape) [decoder/0/self_attention_default/permute_2] | |
decoder/0/self_attention_default/sub_scalar - ARRAY FLOAT decoder/0/self_attention_default/sub_scalar(sub_scalar) [decoder/0/self_attention_default/mul_scalar] | |
decoder/0/self_attention_default/valueProjections_0 - ARRAY FLOAT decoder/0/self_attention_default/matmul_2(matmul) [decoder/0/self_attention_default/reshape_2] | |
decoder/0/self_attention_default/valuesBasedOnAttentionScores_0 - ARRAY FLOAT decoder/0/self_attention_default/matmul_4(matmul) [decoder/0/self_attention_default/permute_3] | |
decoder/1/keyAndQueryInputNormalized - ARRAY FLOAT decoder/1/layer_norm_1(layer_norm) [decoder/1/self_attention_default/matmul, decoder/1/self_attention_default/matmul_1] | |
decoder/1/keyAndQueryInputNormalized_embedNormGain [768] CONSTANT FLOAT <none> [decoder/1/layer_norm_1] | |
decoder/1/keyAndQueryInput_WithPositionalEmbed - ARRAY FLOAT decoder/1/add(add) [decoder/1/layer_norm_1] | |
decoder/1/normalizedAttentionInput - ARRAY FLOAT decoder/1/layer_norm(layer_norm) [decoder/1/self_attention_default/matmul_2] | |
decoder/1/normalizedAttentionInput_embedNormGain [768] CONSTANT FLOAT <none> [decoder/1/layer_norm] | |
decoder/1/selfAttentionResidualProduct - ARRAY FLOAT decoder/1/add_1(add) [decoder/hidden_ff_layer_1/layer_norm, decoder/hidden_ff_layer_1/add] | |
decoder/1/self_attention_default/AttentionKeyWeights_1 [768, 768] VARIABLE FLOAT <none> [decoder/1/self_attention_default/matmul] | |
decoder/1/self_attention_default/AttentionOutWeights_1 [768, 768] VARIABLE FLOAT <none> [decoder/1/self_attention_default/matmul_5] | |
decoder/1/self_attention_default/AttentionQueryWeights_1 [768, 768] VARIABLE FLOAT <none> [decoder/1/self_attention_default/matmul_1] | |
decoder/1/self_attention_default/AttentionValueWeights_1 [768, 768] VARIABLE FLOAT <none> [decoder/1/self_attention_default/matmul_2] | |
decoder/1/self_attention_default/attentionDotProductOutput_1 - ARRAY FLOAT decoder/1/self_attention_default/reshape_3(reshape) [decoder/1/self_attention_default/matmul_5] | |
decoder/1/self_attention_default/attentionMaskDisqualifier_1 - ARRAY FLOAT decoder/1/self_attention_default/mul_scalar(mul_scalar) [decoder/1/self_attention_default/add] | |
decoder/1/self_attention_default/attentionOutput_1 - ARRAY FLOAT decoder/1/self_attention_default/matmul_5(matmul) [decoder/1/add_1] | |
decoder/1/self_attention_default/attentionScoresBeforeMasking_1 - ARRAY FLOAT decoder/1/self_attention_default/mul_scalar_1(mul_scalar) [decoder/1/self_attention_default/add] | |
decoder/1/self_attention_default/attentionSoftmaxScores_1 - ARRAY FLOAT decoder/1/self_attention_default/softmax(softmax) [decoder/1/self_attention_default/matmul_4] | |
decoder/1/self_attention_default/attentionWeightsMasked_1 - ARRAY FLOAT decoder/1/self_attention_default/add(add) [decoder/1/self_attention_default/softmax] | |
decoder/1/self_attention_default/keyProjections_1 - ARRAY FLOAT decoder/1/self_attention_default/matmul(matmul) [decoder/1/self_attention_default/reshape] | |
decoder/1/self_attention_default/matmul_3 - ARRAY FLOAT decoder/1/self_attention_default/matmul_3(matmul) [decoder/1/self_attention_default/mul_scalar_1] | |
decoder/1/self_attention_default/permute - ARRAY FLOAT decoder/1/self_attention_default/permute(permute) [decoder/1/self_attention_default/matmul_3] | |
decoder/1/self_attention_default/permute_1 - ARRAY FLOAT decoder/1/self_attention_default/permute_1(permute) [decoder/1/self_attention_default/matmul_3] | |
decoder/1/self_attention_default/permute_2 - ARRAY FLOAT decoder/1/self_attention_default/permute_2(permute) [decoder/1/self_attention_default/matmul_4] | |
decoder/1/self_attention_default/permute_3 - ARRAY FLOAT decoder/1/self_attention_default/permute_3(permute) [decoder/1/self_attention_default/reshape_3] | |
decoder/1/self_attention_default/queryProjections_1 - ARRAY FLOAT decoder/1/self_attention_default/matmul_1(matmul) [decoder/1/self_attention_default/reshape_1] | |
decoder/1/self_attention_default/reshape - ARRAY FLOAT decoder/1/self_attention_default/reshape(reshape) [decoder/1/self_attention_default/permute] | |
decoder/1/self_attention_default/reshape_1 - ARRAY FLOAT decoder/1/self_attention_default/reshape_1(reshape) [decoder/1/self_attention_default/permute_1] | |
decoder/1/self_attention_default/reshape_2 - ARRAY FLOAT decoder/1/self_attention_default/reshape_2(reshape) [decoder/1/self_attention_default/permute_2] | |
decoder/1/self_attention_default/sub_scalar - ARRAY FLOAT decoder/1/self_attention_default/sub_scalar(sub_scalar) [decoder/1/self_attention_default/mul_scalar] | |
decoder/1/self_attention_default/valueProjections_1 - ARRAY FLOAT decoder/1/self_attention_default/matmul_2(matmul) [decoder/1/self_attention_default/reshape_2] | |
decoder/1/self_attention_default/valuesBasedOnAttentionScores_1 - ARRAY FLOAT decoder/1/self_attention_default/matmul_4(matmul) [decoder/1/self_attention_default/permute_3] | |
decoder/2/keyAndQueryInputNormalized - ARRAY FLOAT decoder/2/layer_norm_1(layer_norm) [decoder/2/self_attention_default/matmul, decoder/2/self_attention_default/matmul_1] | |
decoder/2/keyAndQueryInputNormalized_embedNormGain [768] CONSTANT FLOAT <none> [decoder/2/layer_norm_1] | |
decoder/2/keyAndQueryInput_WithPositionalEmbed - ARRAY FLOAT decoder/2/add(add) [decoder/2/layer_norm_1] | |
decoder/2/normalizedAttentionInput - ARRAY FLOAT decoder/2/layer_norm(layer_norm) [decoder/2/self_attention_default/matmul_2] | |
decoder/2/normalizedAttentionInput_embedNormGain [768] CONSTANT FLOAT <none> [decoder/2/layer_norm] | |
decoder/2/selfAttentionResidualProduct - ARRAY FLOAT decoder/2/add_1(add) [decoder/hidden_ff_layer_2/layer_norm, decoder/hidden_ff_layer_2/add] | |
decoder/2/self_attention_default/AttentionKeyWeights_2 [768, 768] VARIABLE FLOAT <none> [decoder/2/self_attention_default/matmul] | |
decoder/2/self_attention_default/AttentionOutWeights_2 [768, 768] VARIABLE FLOAT <none> [decoder/2/self_attention_default/matmul_5] | |
decoder/2/self_attention_default/AttentionQueryWeights_2 [768, 768] VARIABLE FLOAT <none> [decoder/2/self_attention_default/matmul_1] | |
decoder/2/self_attention_default/AttentionValueWeights_2 [768, 768] VARIABLE FLOAT <none> [decoder/2/self_attention_default/matmul_2] | |
decoder/2/self_attention_default/attentionDotProductOutput_2 - ARRAY FLOAT decoder/2/self_attention_default/reshape_3(reshape) [decoder/2/self_attention_default/matmul_5] | |
decoder/2/self_attention_default/attentionMaskDisqualifier_2 - ARRAY FLOAT decoder/2/self_attention_default/mul_scalar(mul_scalar) [decoder/2/self_attention_default/add] | |
decoder/2/self_attention_default/attentionOutput_2 - ARRAY FLOAT decoder/2/self_attention_default/matmul_5(matmul) [decoder/2/add_1] | |
decoder/2/self_attention_default/attentionScoresBeforeMasking_2 - ARRAY FLOAT decoder/2/self_attention_default/mul_scalar_1(mul_scalar) [decoder/2/self_attention_default/add] | |
decoder/2/self_attention_default/attentionSoftmaxScores_2 - ARRAY FLOAT decoder/2/self_attention_default/softmax(softmax) [decoder/2/self_attention_default/matmul_4] | |
decoder/2/self_attention_default/attentionWeightsMasked_2 - ARRAY FLOAT decoder/2/self_attention_default/add(add) [decoder/2/self_attention_default/softmax] | |
decoder/2/self_attention_default/keyProjections_2 - ARRAY FLOAT decoder/2/self_attention_default/matmul(matmul) [decoder/2/self_attention_default/reshape] | |
decoder/2/self_attention_default/matmul_3 - ARRAY FLOAT decoder/2/self_attention_default/matmul_3(matmul) [decoder/2/self_attention_default/mul_scalar_1] | |
decoder/2/self_attention_default/permute - ARRAY FLOAT decoder/2/self_attention_default/permute(permute) [decoder/2/self_attention_default/matmul_3] | |
decoder/2/self_attention_default/permute_1 - ARRAY FLOAT decoder/2/self_attention_default/permute_1(permute) [decoder/2/self_attention_default/matmul_3] | |
decoder/2/self_attention_default/permute_2 - ARRAY FLOAT decoder/2/self_attention_default/permute_2(permute) [decoder/2/self_attention_default/matmul_4] | |
decoder/2/self_attention_default/permute_3 - ARRAY FLOAT decoder/2/self_attention_default/permute_3(permute) [decoder/2/self_attention_default/reshape_3] | |
decoder/2/self_attention_default/queryProjections_2 - ARRAY FLOAT decoder/2/self_attention_default/matmul_1(matmul) [decoder/2/self_attention_default/reshape_1] | |
decoder/2/self_attention_default/reshape - ARRAY FLOAT decoder/2/self_attention_default/reshape(reshape) [decoder/2/self_attention_default/permute] | |
decoder/2/self_attention_default/reshape_1 - ARRAY FLOAT decoder/2/self_attention_default/reshape_1(reshape) [decoder/2/self_attention_default/permute_1] | |
decoder/2/self_attention_default/reshape_2 - ARRAY FLOAT decoder/2/self_attention_default/reshape_2(reshape) [decoder/2/self_attention_default/permute_2] | |
decoder/2/self_attention_default/sub_scalar - ARRAY FLOAT decoder/2/self_attention_default/sub_scalar(sub_scalar) [decoder/2/self_attention_default/mul_scalar] | |
decoder/2/self_attention_default/valueProjections_2 - ARRAY FLOAT decoder/2/self_attention_default/matmul_2(matmul) [decoder/2/self_attention_default/reshape_2] | |
decoder/2/self_attention_default/valuesBasedOnAttentionScores_2 - ARRAY FLOAT decoder/2/self_attention_default/matmul_4(matmul) [decoder/2/self_attention_default/permute_3] | |
decoder/3/keyAndQueryInputNormalized - ARRAY FLOAT decoder/3/layer_norm_1(layer_norm) [decoder/3/self_attention_default/matmul, decoder/3/self_attention_default/matmul_1] | |
decoder/3/keyAndQueryInputNormalized_embedNormGain [768] CONSTANT FLOAT <none> [decoder/3/layer_norm_1] | |
decoder/3/keyAndQueryInput_WithPositionalEmbed - ARRAY FLOAT decoder/3/add(add) [decoder/3/layer_norm_1] | |
decoder/3/normalizedAttentionInput - ARRAY FLOAT decoder/3/layer_norm(layer_norm) [decoder/3/self_attention_default/matmul_2] | |
decoder/3/normalizedAttentionInput_embedNormGain [768] CONSTANT FLOAT <none> [decoder/3/layer_norm] | |
decoder/3/selfAttentionResidualProduct - ARRAY FLOAT decoder/3/add_1(add) [decoder/hidden_ff_layer_3/layer_norm, decoder/hidden_ff_layer_3/add] | |
decoder/3/self_attention_default/AttentionKeyWeights_3 [768, 768] VARIABLE FLOAT <none> [decoder/3/self_attention_default/matmul] | |
decoder/3/self_attention_default/AttentionOutWeights_3 [768, 768] VARIABLE FLOAT <none> [decoder/3/self_attention_default/matmul_5] | |
decoder/3/self_attention_default/AttentionQueryWeights_3 [768, 768] VARIABLE FLOAT <none> [decoder/3/self_attention_default/matmul_1] | |
decoder/3/self_attention_default/AttentionValueWeights_3 [768, 768] VARIABLE FLOAT <none> [decoder/3/self_attention_default/matmul_2] | |
decoder/3/self_attention_default/attentionDotProductOutput_3 - ARRAY FLOAT decoder/3/self_attention_default/reshape_3(reshape) [decoder/3/self_attention_default/matmul_5] | |
decoder/3/self_attention_default/attentionMaskDisqualifier_3 - ARRAY FLOAT decoder/3/self_attention_default/mul_scalar(mul_scalar) [decoder/3/self_attention_default/add] | |
decoder/3/self_attention_default/attentionOutput_3 - ARRAY FLOAT decoder/3/self_attention_default/matmul_5(matmul) [decoder/3/add_1] | |
decoder/3/self_attention_default/attentionScoresBeforeMasking_3 - ARRAY FLOAT decoder/3/self_attention_default/mul_scalar_1(mul_scalar) [decoder/3/self_attention_default/add] | |
decoder/3/self_attention_default/attentionSoftmaxScores_3 - ARRAY FLOAT decoder/3/self_attention_default/softmax(softmax) [decoder/3/self_attention_default/matmul_4] | |
decoder/3/self_attention_default/attentionWeightsMasked_3 - ARRAY FLOAT decoder/3/self_attention_default/add(add) [decoder/3/self_attention_default/softmax] | |
decoder/3/self_attention_default/keyProjections_3 - ARRAY FLOAT decoder/3/self_attention_default/matmul(matmul) [decoder/3/self_attention_default/reshape] | |
decoder/3/self_attention_default/matmul_3 - ARRAY FLOAT decoder/3/self_attention_default/matmul_3(matmul) [decoder/3/self_attention_default/mul_scalar_1] | |
decoder/3/self_attention_default/permute - ARRAY FLOAT decoder/3/self_attention_default/permute(permute) [decoder/3/self_attention_default/matmul_3] | |
decoder/3/self_attention_default/permute_1 - ARRAY FLOAT decoder/3/self_attention_default/permute_1(permute) [decoder/3/self_attention_default/matmul_3] | |
decoder/3/self_attention_default/permute_2 - ARRAY FLOAT decoder/3/self_attention_default/permute_2(permute) [decoder/3/self_attention_default/matmul_4] | |
decoder/3/self_attention_default/permute_3 - ARRAY FLOAT decoder/3/self_attention_default/permute_3(permute) [decoder/3/self_attention_default/reshape_3] | |
decoder/3/self_attention_default/queryProjections_3 - ARRAY FLOAT decoder/3/self_attention_default/matmul_1(matmul) [decoder/3/self_attention_default/reshape_1] | |
decoder/3/self_attention_default/reshape - ARRAY FLOAT decoder/3/self_attention_default/reshape(reshape) [decoder/3/self_attention_default/permute] | |
decoder/3/self_attention_default/reshape_1 - ARRAY FLOAT decoder/3/self_attention_default/reshape_1(reshape) [decoder/3/self_attention_default/permute_1] | |
decoder/3/self_attention_default/reshape_2 - ARRAY FLOAT decoder/3/self_attention_default/reshape_2(reshape) [decoder/3/self_attention_default/permute_2] | |
decoder/3/self_attention_default/sub_scalar - ARRAY FLOAT decoder/3/self_attention_default/sub_scalar(sub_scalar) [decoder/3/self_attention_default/mul_scalar] | |
decoder/3/self_attention_default/valueProjections_3 - ARRAY FLOAT decoder/3/self_attention_default/matmul_2(matmul) [decoder/3/self_attention_default/reshape_2] | |
decoder/3/self_attention_default/valuesBasedOnAttentionScores_3 - ARRAY FLOAT decoder/3/self_attention_default/matmul_4(matmul) [decoder/3/self_attention_default/permute_3] | |
decoder/attentionInputShape - ARRAY INT decoder/stack(stack) [decoder/reshape] | |
decoder/batchSize - ARRAY INT decoder/cast(cast) [decoder/multiply, decoder/stack_1, decoder/self_attention_default/multiply, decoder/self_attention_default/stack_1, decoder/self_attention_default/stack_2] | |
decoder/decoderOutputNormalized - ARRAY FLOAT decoder/layer_norm(layer_norm) [gather_1] | |
decoder/decoderOutputNormalized_embedNormGain [768] CONSTANT FLOAT <none> [decoder/layer_norm] | |
decoder/decoderSequenceLength - ARRAY INT decoder/cast_1(cast) [decoder/multiply, decoder/self_attention_default/multiply, decoder/self_attention_default/stack_1, decoder/self_attention_default/stack_2] | |
decoder/flatBatchSize - ARRAY INT decoder/multiply(multiply) [decoder/stack] | |
decoder/hidden_ff_layer_0/hiddenFinalLayerOutput_0 - ARRAY FLOAT decoder/hidden_ff_layer_0/xw_plus_b_1(xw_plus_b) [decoder/hidden_ff_layer_0/add] | |
decoder/hidden_ff_layer_0/hiddenInnerLayerActivations_0 - ARRAY FLOAT decoder/hidden_ff_layer_0/xw_plus_b(xw_plus_b) [decoder/hidden_ff_layer_0/gelu] | |
decoder/hidden_ff_layer_0/hiddenInnerLayerBias_0 [1024] VARIABLE FLOAT <none> [decoder/hidden_ff_layer_0/xw_plus_b] | |
decoder/hidden_ff_layer_0/hiddenInnerLayerOutput_0 - ARRAY FLOAT decoder/hidden_ff_layer_0/gelu(gelu) [decoder/hidden_ff_layer_0/xw_plus_b_1] | |
decoder/hidden_ff_layer_0/hiddenInnerLayerWeights_0 [768, 1024] VARIABLE FLOAT <none> [decoder/hidden_ff_layer_0/xw_plus_b] | |
decoder/hidden_ff_layer_0/hiddenLayerFinalOutputNormalized_0 - ARRAY FLOAT decoder/hidden_ff_layer_0/add(add) [decoder/1/layer_norm, decoder/1/add, decoder/1/add_1] | |
decoder/hidden_ff_layer_0/hiddenLayerInputNormalized_0 - ARRAY FLOAT decoder/hidden_ff_layer_0/layer_norm(layer_norm) [decoder/hidden_ff_layer_0/xw_plus_b] | |
decoder/hidden_ff_layer_0/hiddenLayerInputNormalized_0_embedNormGain [768] CONSTANT FLOAT <none> [decoder/hidden_ff_layer_0/layer_norm] | |
decoder/hidden_ff_layer_0/hiddenOutLayerBias_0 [768] VARIABLE FLOAT <none> [decoder/hidden_ff_layer_0/xw_plus_b_1] | |
decoder/hidden_ff_layer_0/hiddenOutLayerWeights_0 [1024, 768] VARIABLE FLOAT <none> [decoder/hidden_ff_layer_0/xw_plus_b_1] | |
decoder/hidden_ff_layer_1/hiddenFinalLayerOutput_1 - ARRAY FLOAT decoder/hidden_ff_layer_1/xw_plus_b_1(xw_plus_b) [decoder/hidden_ff_layer_1/add] | |
decoder/hidden_ff_layer_1/hiddenInnerLayerActivations_1 - ARRAY FLOAT decoder/hidden_ff_layer_1/xw_plus_b(xw_plus_b) [decoder/hidden_ff_layer_1/gelu] | |
decoder/hidden_ff_layer_1/hiddenInnerLayerBias_1 [1024] VARIABLE FLOAT <none> [decoder/hidden_ff_layer_1/xw_plus_b] | |
decoder/hidden_ff_layer_1/hiddenInnerLayerOutput_1 - ARRAY FLOAT decoder/hidden_ff_layer_1/gelu(gelu) [decoder/hidden_ff_layer_1/xw_plus_b_1] | |
decoder/hidden_ff_layer_1/hiddenInnerLayerWeights_1 [768, 1024] VARIABLE FLOAT <none> [decoder/hidden_ff_layer_1/xw_plus_b] | |
decoder/hidden_ff_layer_1/hiddenLayerFinalOutputNormalized_1 - ARRAY FLOAT decoder/hidden_ff_layer_1/add(add) [decoder/2/layer_norm, decoder/2/add, decoder/2/add_1] | |
decoder/hidden_ff_layer_1/hiddenLayerInputNormalized_1 - ARRAY FLOAT decoder/hidden_ff_layer_1/layer_norm(layer_norm) [decoder/hidden_ff_layer_1/xw_plus_b] | |
decoder/hidden_ff_layer_1/hiddenLayerInputNormalized_1_embedNormGain [768] CONSTANT FLOAT <none> [decoder/hidden_ff_layer_1/layer_norm] | |
decoder/hidden_ff_layer_1/hiddenOutLayerBias_1 [768] VARIABLE FLOAT <none> [decoder/hidden_ff_layer_1/xw_plus_b_1] | |
decoder/hidden_ff_layer_1/hiddenOutLayerWeights_1 [1024, 768] VARIABLE FLOAT <none> [decoder/hidden_ff_layer_1/xw_plus_b_1] | |
decoder/hidden_ff_layer_2/hiddenFinalLayerOutput_2 - ARRAY FLOAT decoder/hidden_ff_layer_2/xw_plus_b_1(xw_plus_b) [decoder/hidden_ff_layer_2/add] | |
decoder/hidden_ff_layer_2/hiddenInnerLayerActivations_2 - ARRAY FLOAT decoder/hidden_ff_layer_2/xw_plus_b(xw_plus_b) [decoder/hidden_ff_layer_2/gelu] | |
decoder/hidden_ff_layer_2/hiddenInnerLayerBias_2 [1024] VARIABLE FLOAT <none> [decoder/hidden_ff_layer_2/xw_plus_b] | |
decoder/hidden_ff_layer_2/hiddenInnerLayerOutput_2 - ARRAY FLOAT decoder/hidden_ff_layer_2/gelu(gelu) [decoder/hidden_ff_layer_2/xw_plus_b_1] | |
decoder/hidden_ff_layer_2/hiddenInnerLayerWeights_2 [768, 1024] VARIABLE FLOAT <none> [decoder/hidden_ff_layer_2/xw_plus_b] | |
decoder/hidden_ff_layer_2/hiddenLayerFinalOutputNormalized_2 - ARRAY FLOAT decoder/hidden_ff_layer_2/add(add) [decoder/3/layer_norm, decoder/3/add, decoder/3/add_1] | |
decoder/hidden_ff_layer_2/hiddenLayerInputNormalized_2 - ARRAY FLOAT decoder/hidden_ff_layer_2/layer_norm(layer_norm) [decoder/hidden_ff_layer_2/xw_plus_b] | |
decoder/hidden_ff_layer_2/hiddenLayerInputNormalized_2_embedNormGain [768] CONSTANT FLOAT <none> [decoder/hidden_ff_layer_2/layer_norm] | |
decoder/hidden_ff_layer_2/hiddenOutLayerBias_2 [768] VARIABLE FLOAT <none> [decoder/hidden_ff_layer_2/xw_plus_b_1] | |
decoder/hidden_ff_layer_2/hiddenOutLayerWeights_2 [1024, 768] VARIABLE FLOAT <none> [decoder/hidden_ff_layer_2/xw_plus_b_1] | |
decoder/hidden_ff_layer_3/hiddenFinalLayerOutput_3 - ARRAY FLOAT decoder/hidden_ff_layer_3/xw_plus_b_1(xw_plus_b) [decoder/hidden_ff_layer_3/add] | |
decoder/hidden_ff_layer_3/hiddenInnerLayerActivations_3 - ARRAY FLOAT decoder/hidden_ff_layer_3/xw_plus_b(xw_plus_b) [decoder/hidden_ff_layer_3/gelu] | |
decoder/hidden_ff_layer_3/hiddenInnerLayerBias_3 [1024] VARIABLE FLOAT <none> [decoder/hidden_ff_layer_3/xw_plus_b] | |
decoder/hidden_ff_layer_3/hiddenInnerLayerOutput_3 - ARRAY FLOAT decoder/hidden_ff_layer_3/gelu(gelu) [decoder/hidden_ff_layer_3/xw_plus_b_1] | |
decoder/hidden_ff_layer_3/hiddenInnerLayerWeights_3 [768, 1024] VARIABLE FLOAT <none> [decoder/hidden_ff_layer_3/xw_plus_b] | |
decoder/hidden_ff_layer_3/hiddenLayerFinalOutputNormalized_3 - ARRAY FLOAT decoder/hidden_ff_layer_3/add(add) [decoder/layer_norm] | |
decoder/hidden_ff_layer_3/hiddenLayerInputNormalized_3 - ARRAY FLOAT decoder/hidden_ff_layer_3/layer_norm(layer_norm) [decoder/hidden_ff_layer_3/xw_plus_b] | |
decoder/hidden_ff_layer_3/hiddenLayerInputNormalized_3_embedNormGain [768] CONSTANT FLOAT <none> [decoder/hidden_ff_layer_3/layer_norm] | |
decoder/hidden_ff_layer_3/hiddenOutLayerBias_3 [768] VARIABLE FLOAT <none> [decoder/hidden_ff_layer_3/xw_plus_b_1] | |
decoder/hidden_ff_layer_3/hiddenOutLayerWeights_3 [1024, 768] VARIABLE FLOAT <none> [decoder/hidden_ff_layer_3/xw_plus_b_1] | |
decoder/positionalEmbeddingsForAttention - ARRAY DOUBLE decoder/tile(tile) [decoder/0/add, decoder/1/add, decoder/2/add, decoder/3/add] | |
decoder/reshape - ARRAY FLOAT decoder/reshape(reshape) [decoder/0/layer_norm, decoder/0/add, decoder/0/add_1] | |
decoder/self_attention_default/attentionDotProductShape - ARRAY INT decoder/self_attention_default/stack(stack) [decoder/0/self_attention_default/reshape_3, decoder/1/self_attention_default/reshape_3, decoder/2/self_attention_default/reshape_3, decoder/3/self_attention_default/reshape_3] | |
decoder/self_attention_default/keysPerHeadAttentionShape - ARRAY INT decoder/self_attention_default/stack_1(stack) [decoder/0/self_attention_default/reshape, decoder/0/self_attention_default/reshape_2, decoder/1/self_attention_default/reshape, decoder/1/self_attention_default/reshape_2, decoder/2/self_attention_default/reshape, decoder/2/self_attention_default/reshape_2, decoder/3/self_attention_default/reshape, decoder/3/self_attention_default/reshape_2] | |
decoder/self_attention_default/multiply - ARRAY INT decoder/self_attention_default/multiply(multiply) [decoder/self_attention_default/stack] | |
decoder/self_attention_default/multiply_1 - ARRAY INT decoder/self_attention_default/multiply_1(multiply) [decoder/self_attention_default/stack] | |
decoder/self_attention_default/queriesPerHeadAttentionShape - ARRAY INT decoder/self_attention_default/stack_2(stack) [decoder/0/self_attention_default/reshape_1, decoder/1/self_attention_default/reshape_1, decoder/2/self_attention_default/reshape_1, decoder/3/self_attention_default/reshape_1] | |
decoder/shape_of - ARRAY LONG decoder/shape_of(shape_of) [decoder/strided_slice] | |
decoder/shape_of_1 - ARRAY LONG decoder/shape_of_1(shape_of) [decoder/strided_slice_1] | |
decoder/stack_1 - ARRAY INT decoder/stack_1(stack) [decoder/tile] | |
decoder/strided_slice - ARRAY LONG decoder/strided_slice(strided_slice) [decoder/cast] | |
decoder/strided_slice_1 - ARRAY LONG decoder/strided_slice_1(strided_slice) [decoder/cast_1] | |
decoderInputTokenVocabIndices [-1, 256] PLACEHOLDER INT <none> [gather] | |
decoderPositionalEmbeddingsMatrix [256, 768] CONSTANT DOUBLE <none> [decoder/tile] | |
finalLayerResult - ARRAY FLOAT matmul(matmul) [softmax] | |
hiddenSize [] CONSTANT INT <none> [decoder/stack] | |
log - ARRAY FLOAT log(log) [multiply] | |
loss - ARRAY FLOAT neg(neg) | |
modelPredictionLogits - ARRAY FLOAT softmax(softmax) [log] | |
mul_scalar - ARRAY FLOAT mul_scalar(mul_scalar) [add_scalar] | |
multiply - ARRAY FLOAT multiply(multiply) [reduce_sum] | |
one [] CONSTANT INT <none> [decoder/stack_1] | |
oneHotLabels - ARRAY FLOAT onehot(onehot) [mul_scalar, multiply] | |
oneHotSmoothLabels - ARRAY FLOAT add_scalar(add_scalar) | |
positionalAttentionMasks [-1, 256, 256] PLACEHOLDER BYTE <none> | |
predictionTokenEmbeddings - ARRAY FLOAT gather_1(gather) [matmul] | |
reduce_mean - ARRAY FLOAT reduce_mean(reduce_mean) [neg] | |
reduce_sum - ARRAY FLOAT reduce_sum(reduce_sum) [reduce_mean] | |
selfAttentionCausalMasks [-1, 1, 256, 256] PLACEHOLDER FLOAT <none> [decoder/0/self_attention_default/sub_scalar, decoder/1/self_attention_default/sub_scalar, decoder/2/self_attention_default/sub_scalar, decoder/3/self_attention_default/sub_scalar] | |
tokenEmbeddingsMatrix [30522, 768] VARIABLE FLOAT <none> [gather, matmul] | |
--- Functions --- | |
- Function Name - - Op - - Inputs - - Outputs - | |
0 gather EnhancedGatherOp [tokenEmbeddingsMatrix, decoderInputTokenVocabIndices] [batchDecoderInputTokenEmbeddings] | |
1 decoder/shape_of Shape [batchDecoderInputTokenEmbeddings] [decoder/shape_of] | |
2 decoder/strided_slice StridedSlice [decoder/shape_of] [decoder/strided_slice] | |
3 decoder/cast Cast [decoder/strided_slice] [decoder/batchSize] | |
4 decoder/shape_of_1 Shape [batchDecoderInputTokenEmbeddings] [decoder/shape_of_1] | |
5 decoder/strided_slice_1 StridedSlice [decoder/shape_of_1] [decoder/strided_slice_1] | |
6 decoder/cast_1 Cast [decoder/strided_slice_1] [decoder/decoderSequenceLength] | |
7 decoder/multiply MulOp [decoder/batchSize, decoder/decoderSequenceLength] [decoder/flatBatchSize] | |
8 decoder/stack Stack [decoder/flatBatchSize, hiddenSize] [decoder/attentionInputShape] | |
9 decoder/stack_1 Stack [decoder/batchSize, one] [decoder/stack_1] | |
10 decoder/tile Tile [decoderPositionalEmbeddingsMatrix, decoder/stack_1] [decoder/positionalEmbeddingsForAttention] | |
11 decoder/self_attention_default/multiply MulOp [decoder/batchSize, decoder/decoderSequenceLength] [decoder/self_attention_default/multiply] | |
12 decoder/self_attention_default/multiply_1 MulOp [attentionHeadsAmount, attentionHeadEmbeddingSize] [decoder/self_attention_default/multiply_1] | |
13 decoder/self_attention_default/stack Stack [decoder/self_attention_default/multiply, decoder/self_attention_default/multiply_1] [decoder/self_attention_default/attentionDotProductShape] | |
14 decoder/self_attention_default/stack_1 Stack [decoder/batchSize, decoder/decoderSequenceLength, attentionHeadsAmount, attentionHeadEmbeddingSize] [decoder/self_attention_default/keysPerHeadAttentionShape] | |
15 decoder/self_attention_default/stack_2 Stack [decoder/batchSize, decoder/decoderSequenceLength, attentionHeadsAmount, attentionHeadEmbeddingSize] [decoder/self_attention_default/queriesPerHeadAttentionShape] | |
16 decoder/reshape Reshape [batchDecoderInputTokenEmbeddings, decoder/attentionInputShape] [decoder/reshape] | |
17 decoder/0/layer_norm LayerNorm [decoder/reshape, decoder/0/normalizedAttentionInput_embedNormGain] [decoder/0/normalizedAttentionInput] | |
18 decoder/0/add AddOp [decoder/reshape, decoder/positionalEmbeddingsForAttention] [decoder/0/keyAndQueryInput_WithPositionalEmbed] | |
19 decoder/0/layer_norm_1 LayerNorm [decoder/0/keyAndQueryInput_WithPositionalEmbed, decoder/0/keyAndQueryInputNormalized_embedNormGain] [decoder/0/keyAndQueryInputNormalized] | |
20 decoder/0/self_attention_default/sub_scalar ScalarSubtraction [selfAttentionCausalMasks] [decoder/0/self_attention_default/sub_scalar] | |
21 decoder/0/self_attention_default/mul_scalar ScalarMultiplication [decoder/0/self_attention_default/sub_scalar] [decoder/0/self_attention_default/attentionMaskDisqualifier_0] | |
22 decoder/0/self_attention_default/matmul Mmul [decoder/0/keyAndQueryInputNormalized, decoder/0/self_attention_default/AttentionKeyWeights_0] [decoder/0/self_attention_default/keyProjections_0] | |
23 decoder/0/self_attention_default/matmul_1 Mmul [decoder/0/keyAndQueryInputNormalized, decoder/0/self_attention_default/AttentionQueryWeights_0] [decoder/0/self_attention_default/queryProjections_0] | |
24 decoder/0/self_attention_default/matmul_2 Mmul [decoder/0/normalizedAttentionInput, decoder/0/self_attention_default/AttentionValueWeights_0] [decoder/0/self_attention_default/valueProjections_0] | |
25 decoder/0/self_attention_default/reshape Reshape [decoder/0/self_attention_default/keyProjections_0, decoder/self_attention_default/keysPerHeadAttentionShape] [decoder/0/self_attention_default/reshape] | |
26 decoder/0/self_attention_default/permute Permute [decoder/0/self_attention_default/reshape] [decoder/0/self_attention_default/permute] | |
27 decoder/0/self_attention_default/reshape_1 Reshape [decoder/0/self_attention_default/queryProjections_0, decoder/self_attention_default/queriesPerHeadAttentionShape] [decoder/0/self_attention_default/reshape_1] | |
28 decoder/0/self_attention_default/permute_1 Permute [decoder/0/self_attention_default/reshape_1] [decoder/0/self_attention_default/permute_1] | |
29 decoder/0/self_attention_default/reshape_2 Reshape [decoder/0/self_attention_default/valueProjections_0, decoder/self_attention_default/keysPerHeadAttentionShape] [decoder/0/self_attention_default/reshape_2] | |
30 decoder/0/self_attention_default/permute_2 Permute [decoder/0/self_attention_default/reshape_2] [decoder/0/self_attention_default/permute_2] | |
31 decoder/0/self_attention_default/matmul_3 Mmul [decoder/0/self_attention_default/permute_1, decoder/0/self_attention_default/permute] [decoder/0/self_attention_default/matmul_3] | |
32 decoder/0/self_attention_default/mul_scalar_1 ScalarMultiplication [decoder/0/self_attention_default/matmul_3] [decoder/0/self_attention_default/attentionScoresBeforeMasking_0] | |
33 decoder/0/self_attention_default/add AddOp [decoder/0/self_attention_default/attentionScoresBeforeMasking_0, decoder/0/self_attention_default/attentionMaskDisqualifier_0] [decoder/0/self_attention_default/attentionWeightsMasked_0] | |
34 decoder/0/self_attention_default/softmax SoftMax [decoder/0/self_attention_default/attentionWeightsMasked_0] [decoder/0/self_attention_default/attentionSoftmaxScores_0] | |
35 decoder/0/self_attention_default/matmul_4 Mmul [decoder/0/self_attention_default/attentionSoftmaxScores_0, decoder/0/self_attention_default/permute_2] [decoder/0/self_attention_default/valuesBasedOnAttentionScores_0] | |
36 decoder/0/self_attention_default/permute_3 Permute [decoder/0/self_attention_default/valuesBasedOnAttentionScores_0] [decoder/0/self_attention_default/permute_3] | |
37 decoder/0/self_attention_default/reshape_3 Reshape [decoder/0/self_attention_default/permute_3, decoder/self_attention_default/attentionDotProductShape] [decoder/0/self_attention_default/attentionDotProductOutput_0] | |
38 decoder/0/self_attention_default/matmul_5 Mmul [decoder/0/self_attention_default/attentionDotProductOutput_0, decoder/0/self_attention_default/AttentionOutWeights_0] [decoder/0/self_attention_default/attentionOutput_0] | |
39 decoder/0/add_1 AddOp [decoder/reshape, decoder/0/self_attention_default/attentionOutput_0] [decoder/0/selfAttentionResidualProduct] | |
40 decoder/hidden_ff_layer_0/layer_norm LayerNorm [decoder/0/selfAttentionResidualProduct, decoder/hidden_ff_layer_0/hiddenLayerInputNormalized_0_embedNormGain] [decoder/hidden_ff_layer_0/hiddenLayerInputNormalized_0] | |
41 decoder/hidden_ff_layer_0/xw_plus_b XwPlusB [decoder/hidden_ff_layer_0/hiddenLayerInputNormalized_0, decoder/hidden_ff_layer_0/hiddenInnerLayerWeights_0, decoder/hidden_ff_layer_0/hiddenInnerLayerBias_0] [decoder/hidden_ff_layer_0/hiddenInnerLayerActivations_0] | |
42 decoder/hidden_ff_layer_0/gelu GELU [decoder/hidden_ff_layer_0/hiddenInnerLayerActivations_0] [decoder/hidden_ff_layer_0/hiddenInnerLayerOutput_0] | |
43 decoder/hidden_ff_layer_0/xw_plus_b_1 XwPlusB [decoder/hidden_ff_layer_0/hiddenInnerLayerOutput_0, decoder/hidden_ff_layer_0/hiddenOutLayerWeights_0, decoder/hidden_ff_layer_0/hiddenOutLayerBias_0] [decoder/hidden_ff_layer_0/hiddenFinalLayerOutput_0] | |
44 decoder/hidden_ff_layer_0/add AddOp [decoder/0/selfAttentionResidualProduct, decoder/hidden_ff_layer_0/hiddenFinalLayerOutput_0] [decoder/hidden_ff_layer_0/hiddenLayerFinalOutputNormalized_0] | |
45 decoder/1/layer_norm LayerNorm [decoder/hidden_ff_layer_0/hiddenLayerFinalOutputNormalized_0, decoder/1/normalizedAttentionInput_embedNormGain] [decoder/1/normalizedAttentionInput] | |
46 decoder/1/add AddOp [decoder/hidden_ff_layer_0/hiddenLayerFinalOutputNormalized_0, decoder/positionalEmbeddingsForAttention] [decoder/1/keyAndQueryInput_WithPositionalEmbed] | |
47 decoder/1/layer_norm_1 LayerNorm [decoder/1/keyAndQueryInput_WithPositionalEmbed, decoder/1/keyAndQueryInputNormalized_embedNormGain] [decoder/1/keyAndQueryInputNormalized] | |
48 decoder/1/self_attention_default/sub_scalar ScalarSubtraction [selfAttentionCausalMasks] [decoder/1/self_attention_default/sub_scalar] | |
49 decoder/1/self_attention_default/mul_scalar ScalarMultiplication [decoder/1/self_attention_default/sub_scalar] [decoder/1/self_attention_default/attentionMaskDisqualifier_1] | |
50 decoder/1/self_attention_default/matmul Mmul [decoder/1/keyAndQueryInputNormalized, decoder/1/self_attention_default/AttentionKeyWeights_1] [decoder/1/self_attention_default/keyProjections_1] | |
51 decoder/1/self_attention_default/matmul_1 Mmul [decoder/1/keyAndQueryInputNormalized, decoder/1/self_attention_default/AttentionQueryWeights_1] [decoder/1/self_attention_default/queryProjections_1] | |
52 decoder/1/self_attention_default/matmul_2 Mmul [decoder/1/normalizedAttentionInput, decoder/1/self_attention_default/AttentionValueWeights_1] [decoder/1/self_attention_default/valueProjections_1] | |
53 decoder/1/self_attention_default/reshape Reshape [decoder/1/self_attention_default/keyProjections_1, decoder/self_attention_default/keysPerHeadAttentionShape] [decoder/1/self_attention_default/reshape] | |
54 decoder/1/self_attention_default/permute Permute [decoder/1/self_attention_default/reshape] [decoder/1/self_attention_default/permute] | |
55 decoder/1/self_attention_default/reshape_1 Reshape [decoder/1/self_attention_default/queryProjections_1, decoder/self_attention_default/queriesPerHeadAttentionShape] [decoder/1/self_attention_default/reshape_1] | |
56 decoder/1/self_attention_default/permute_1 Permute [decoder/1/self_attention_default/reshape_1] [decoder/1/self_attention_default/permute_1] | |
57 decoder/1/self_attention_default/reshape_2 Reshape [decoder/1/self_attention_default/valueProjections_1, decoder/self_attention_default/keysPerHeadAttentionShape] [decoder/1/self_attention_default/reshape_2] | |
58 decoder/1/self_attention_default/permute_2 Permute [decoder/1/self_attention_default/reshape_2] [decoder/1/self_attention_default/permute_2] | |
59 decoder/1/self_attention_default/matmul_3 Mmul [decoder/1/self_attention_default/permute_1, decoder/1/self_attention_default/permute] [decoder/1/self_attention_default/matmul_3] | |
60 decoder/1/self_attention_default/mul_scalar_1 ScalarMultiplication [decoder/1/self_attention_default/matmul_3] [decoder/1/self_attention_default/attentionScoresBeforeMasking_1] | |
61 decoder/1/self_attention_default/add AddOp [decoder/1/self_attention_default/attentionScoresBeforeMasking_1, decoder/1/self_attention_default/attentionMaskDisqualifier_1] [decoder/1/self_attention_default/attentionWeightsMasked_1] | |
62 decoder/1/self_attention_default/softmax SoftMax [decoder/1/self_attention_default/attentionWeightsMasked_1] [decoder/1/self_attention_default/attentionSoftmaxScores_1] | |
63 decoder/1/self_attention_default/matmul_4 Mmul [decoder/1/self_attention_default/attentionSoftmaxScores_1, decoder/1/self_attention_default/permute_2] [decoder/1/self_attention_default/valuesBasedOnAttentionScores_1] | |
64 decoder/1/self_attention_default/permute_3 Permute [decoder/1/self_attention_default/valuesBasedOnAttentionScores_1] [decoder/1/self_attention_default/permute_3] | |
65 decoder/1/self_attention_default/reshape_3 Reshape [decoder/1/self_attention_default/permute_3, decoder/self_attention_default/attentionDotProductShape] [decoder/1/self_attention_default/attentionDotProductOutput_1] | |
66 decoder/1/self_attention_default/matmul_5 Mmul [decoder/1/self_attention_default/attentionDotProductOutput_1, decoder/1/self_attention_default/AttentionOutWeights_1] [decoder/1/self_attention_default/attentionOutput_1] | |
67 decoder/1/add_1 AddOp [decoder/hidden_ff_layer_0/hiddenLayerFinalOutputNormalized_0, decoder/1/self_attention_default/attentionOutput_1] [decoder/1/selfAttentionResidualProduct] | |
68 decoder/hidden_ff_layer_1/layer_norm LayerNorm [decoder/1/selfAttentionResidualProduct, decoder/hidden_ff_layer_1/hiddenLayerInputNormalized_1_embedNormGain] [decoder/hidden_ff_layer_1/hiddenLayerInputNormalized_1] | |
69 decoder/hidden_ff_layer_1/xw_plus_b XwPlusB [decoder/hidden_ff_layer_1/hiddenLayerInputNormalized_1, decoder/hidden_ff_layer_1/hiddenInnerLayerWeights_1, decoder/hidden_ff_layer_1/hiddenInnerLayerBias_1] [decoder/hidden_ff_layer_1/hiddenInnerLayerActivations_1] | |
70 decoder/hidden_ff_layer_1/gelu GELU [decoder/hidden_ff_layer_1/hiddenInnerLayerActivations_1] [decoder/hidden_ff_layer_1/hiddenInnerLayerOutput_1] | |
71 decoder/hidden_ff_layer_1/xw_plus_b_1 XwPlusB [decoder/hidden_ff_layer_1/hiddenInnerLayerOutput_1, decoder/hidden_ff_layer_1/hiddenOutLayerWeights_1, decoder/hidden_ff_layer_1/hiddenOutLayerBias_1] [decoder/hidden_ff_layer_1/hiddenFinalLayerOutput_1] | |
72 decoder/hidden_ff_layer_1/add AddOp [decoder/1/selfAttentionResidualProduct, decoder/hidden_ff_layer_1/hiddenFinalLayerOutput_1] [decoder/hidden_ff_layer_1/hiddenLayerFinalOutputNormalized_1] | |
73 decoder/2/layer_norm LayerNorm [decoder/hidden_ff_layer_1/hiddenLayerFinalOutputNormalized_1, decoder/2/normalizedAttentionInput_embedNormGain] [decoder/2/normalizedAttentionInput] | |
74 decoder/2/add AddOp [decoder/hidden_ff_layer_1/hiddenLayerFinalOutputNormalized_1, decoder/positionalEmbeddingsForAttention] [decoder/2/keyAndQueryInput_WithPositionalEmbed] | |
75 decoder/2/layer_norm_1 LayerNorm [decoder/2/keyAndQueryInput_WithPositionalEmbed, decoder/2/keyAndQueryInputNormalized_embedNormGain] [decoder/2/keyAndQueryInputNormalized] | |
76 decoder/2/self_attention_default/sub_scalar ScalarSubtraction [selfAttentionCausalMasks] [decoder/2/self_attention_default/sub_scalar] | |
77 decoder/2/self_attention_default/mul_scalar ScalarMultiplication [decoder/2/self_attention_default/sub_scalar] [decoder/2/self_attention_default/attentionMaskDisqualifier_2] | |
78 decoder/2/self_attention_default/matmul Mmul [decoder/2/keyAndQueryInputNormalized, decoder/2/self_attention_default/AttentionKeyWeights_2] [decoder/2/self_attention_default/keyProjections_2] | |
79 decoder/2/self_attention_default/matmul_1 Mmul [decoder/2/keyAndQueryInputNormalized, decoder/2/self_attention_default/AttentionQueryWeights_2] [decoder/2/self_attention_default/queryProjections_2] | |
80 decoder/2/self_attention_default/matmul_2 Mmul [decoder/2/normalizedAttentionInput, decoder/2/self_attention_default/AttentionValueWeights_2] [decoder/2/self_attention_default/valueProjections_2] | |
81 decoder/2/self_attention_default/reshape Reshape [decoder/2/self_attention_default/keyProjections_2, decoder/self_attention_default/keysPerHeadAttentionShape] [decoder/2/self_attention_default/reshape] | |
82 decoder/2/self_attention_default/permute Permute [decoder/2/self_attention_default/reshape] [decoder/2/self_attention_default/permute] | |
83 decoder/2/self_attention_default/reshape_1 Reshape [decoder/2/self_attention_default/queryProjections_2, decoder/self_attention_default/queriesPerHeadAttentionShape] [decoder/2/self_attention_default/reshape_1] | |
84 decoder/2/self_attention_default/permute_1 Permute [decoder/2/self_attention_default/reshape_1] [decoder/2/self_attention_default/permute_1] | |
85 decoder/2/self_attention_default/reshape_2 Reshape [decoder/2/self_attention_default/valueProjections_2, decoder/self_attention_default/keysPerHeadAttentionShape] [decoder/2/self_attention_default/reshape_2] | |
86 decoder/2/self_attention_default/permute_2 Permute [decoder/2/self_attention_default/reshape_2] [decoder/2/self_attention_default/permute_2] | |
87 decoder/2/self_attention_default/matmul_3 Mmul [decoder/2/self_attention_default/permute_1, decoder/2/self_attention_default/permute] [decoder/2/self_attention_default/matmul_3] | |
88 decoder/2/self_attention_default/mul_scalar_1 ScalarMultiplication [decoder/2/self_attention_default/matmul_3] [decoder/2/self_attention_default/attentionScoresBeforeMasking_2] | |
89 decoder/2/self_attention_default/add AddOp [decoder/2/self_attention_default/attentionScoresBeforeMasking_2, decoder/2/self_attention_default/attentionMaskDisqualifier_2] [decoder/2/self_attention_default/attentionWeightsMasked_2] | |
90 decoder/2/self_attention_default/softmax SoftMax [decoder/2/self_attention_default/attentionWeightsMasked_2] [decoder/2/self_attention_default/attentionSoftmaxScores_2] | |
91 decoder/2/self_attention_default/matmul_4 Mmul [decoder/2/self_attention_default/attentionSoftmaxScores_2, decoder/2/self_attention_default/permute_2] [decoder/2/self_attention_default/valuesBasedOnAttentionScores_2] | |
92 decoder/2/self_attention_default/permute_3 Permute [decoder/2/self_attention_default/valuesBasedOnAttentionScores_2] [decoder/2/self_attention_default/permute_3] | |
93 decoder/2/self_attention_default/reshape_3 Reshape [decoder/2/self_attention_default/permute_3, decoder/self_attention_default/attentionDotProductShape] [decoder/2/self_attention_default/attentionDotProductOutput_2] | |
94 decoder/2/self_attention_default/matmul_5 Mmul [decoder/2/self_attention_default/attentionDotProductOutput_2, decoder/2/self_attention_default/AttentionOutWeights_2] [decoder/2/self_attention_default/attentionOutput_2] | |
95 decoder/2/add_1 AddOp [decoder/hidden_ff_layer_1/hiddenLayerFinalOutputNormalized_1, decoder/2/self_attention_default/attentionOutput_2] [decoder/2/selfAttentionResidualProduct] | |
96 decoder/hidden_ff_layer_2/layer_norm LayerNorm [decoder/2/selfAttentionResidualProduct, decoder/hidden_ff_layer_2/hiddenLayerInputNormalized_2_embedNormGain] [decoder/hidden_ff_layer_2/hiddenLayerInputNormalized_2] | |
97 decoder/hidden_ff_layer_2/xw_plus_b XwPlusB [decoder/hidden_ff_layer_2/hiddenLayerInputNormalized_2, decoder/hidden_ff_layer_2/hiddenInnerLayerWeights_2, decoder/hidden_ff_layer_2/hiddenInnerLayerBias_2] [decoder/hidden_ff_layer_2/hiddenInnerLayerActivations_2] | |
98 decoder/hidden_ff_layer_2/gelu GELU [decoder/hidden_ff_layer_2/hiddenInnerLayerActivations_2] [decoder/hidden_ff_layer_2/hiddenInnerLayerOutput_2] | |
99 decoder/hidden_ff_layer_2/xw_plus_b_1 XwPlusB [decoder/hidden_ff_layer_2/hiddenInnerLayerOutput_2, decoder/hidden_ff_layer_2/hiddenOutLayerWeights_2, decoder/hidden_ff_layer_2/hiddenOutLayerBias_2] [decoder/hidden_ff_layer_2/hiddenFinalLayerOutput_2] | |
100 decoder/hidden_ff_layer_2/add AddOp [decoder/2/selfAttentionResidualProduct, decoder/hidden_ff_layer_2/hiddenFinalLayerOutput_2] [decoder/hidden_ff_layer_2/hiddenLayerFinalOutputNormalized_2] | |
101 decoder/3/layer_norm LayerNorm [decoder/hidden_ff_layer_2/hiddenLayerFinalOutputNormalized_2, decoder/3/normalizedAttentionInput_embedNormGain] [decoder/3/normalizedAttentionInput] | |
102 decoder/3/add AddOp [decoder/hidden_ff_layer_2/hiddenLayerFinalOutputNormalized_2, decoder/positionalEmbeddingsForAttention] [decoder/3/keyAndQueryInput_WithPositionalEmbed] | |
103 decoder/3/layer_norm_1 LayerNorm [decoder/3/keyAndQueryInput_WithPositionalEmbed, decoder/3/keyAndQueryInputNormalized_embedNormGain] [decoder/3/keyAndQueryInputNormalized] | |
104 decoder/3/self_attention_default/sub_scalar ScalarSubtraction [selfAttentionCausalMasks] [decoder/3/self_attention_default/sub_scalar] | |
105 decoder/3/self_attention_default/mul_scalar ScalarMultiplication [decoder/3/self_attention_default/sub_scalar] [decoder/3/self_attention_default/attentionMaskDisqualifier_3] | |
106 decoder/3/self_attention_default/matmul Mmul [decoder/3/keyAndQueryInputNormalized, decoder/3/self_attention_default/AttentionKeyWeights_3] [decoder/3/self_attention_default/keyProjections_3] | |
107 decoder/3/self_attention_default/matmul_1 Mmul [decoder/3/keyAndQueryInputNormalized, decoder/3/self_attention_default/AttentionQueryWeights_3] [decoder/3/self_attention_default/queryProjections_3] | |
108 decoder/3/self_attention_default/matmul_2 Mmul [decoder/3/normalizedAttentionInput, decoder/3/self_attention_default/AttentionValueWeights_3] [decoder/3/self_attention_default/valueProjections_3] | |
109 decoder/3/self_attention_default/reshape Reshape [decoder/3/self_attention_default/keyProjections_3, decoder/self_attention_default/keysPerHeadAttentionShape] [decoder/3/self_attention_default/reshape] | |
110 decoder/3/self_attention_default/permute Permute [decoder/3/self_attention_default/reshape] [decoder/3/self_attention_default/permute] | |
111 decoder/3/self_attention_default/reshape_1 Reshape [decoder/3/self_attention_default/queryProjections_3, decoder/self_attention_default/queriesPerHeadAttentionShape] [decoder/3/self_attention_default/reshape_1] | |
112 decoder/3/self_attention_default/permute_1 Permute [decoder/3/self_attention_default/reshape_1] [decoder/3/self_attention_default/permute_1] | |
113 decoder/3/self_attention_default/reshape_2 Reshape [decoder/3/self_attention_default/valueProjections_3, decoder/self_attention_default/keysPerHeadAttentionShape] [decoder/3/self_attention_default/reshape_2] | |
114 decoder/3/self_attention_default/permute_2 Permute [decoder/3/self_attention_default/reshape_2] [decoder/3/self_attention_default/permute_2] | |
115 decoder/3/self_attention_default/matmul_3 Mmul [decoder/3/self_attention_default/permute_1, decoder/3/self_attention_default/permute] [decoder/3/self_attention_default/matmul_3] | |
116 decoder/3/self_attention_default/mul_scalar_1 ScalarMultiplication [decoder/3/self_attention_default/matmul_3] [decoder/3/self_attention_default/attentionScoresBeforeMasking_3] | |
117 decoder/3/self_attention_default/add AddOp [decoder/3/self_attention_default/attentionScoresBeforeMasking_3, decoder/3/self_attention_default/attentionMaskDisqualifier_3] [decoder/3/self_attention_default/attentionWeightsMasked_3] | |
118 decoder/3/self_attention_default/softmax SoftMax [decoder/3/self_attention_default/attentionWeightsMasked_3] [decoder/3/self_attention_default/attentionSoftmaxScores_3] | |
119 decoder/3/self_attention_default/matmul_4 Mmul [decoder/3/self_attention_default/attentionSoftmaxScores_3, decoder/3/self_attention_default/permute_2] [decoder/3/self_attention_default/valuesBasedOnAttentionScores_3] | |
120 decoder/3/self_attention_default/permute_3 Permute [decoder/3/self_attention_default/valuesBasedOnAttentionScores_3] [decoder/3/self_attention_default/permute_3] | |
121 decoder/3/self_attention_default/reshape_3 Reshape [decoder/3/self_attention_default/permute_3, decoder/self_attention_default/attentionDotProductShape] [decoder/3/self_attention_default/attentionDotProductOutput_3] | |
122 decoder/3/self_attention_default/matmul_5 Mmul [decoder/3/self_attention_default/attentionDotProductOutput_3, decoder/3/self_attention_default/AttentionOutWeights_3] [decoder/3/self_attention_default/attentionOutput_3] | |
123 decoder/3/add_1 AddOp [decoder/hidden_ff_layer_2/hiddenLayerFinalOutputNormalized_2, decoder/3/self_attention_default/attentionOutput_3] [decoder/3/selfAttentionResidualProduct] | |
124 decoder/hidden_ff_layer_3/layer_norm LayerNorm [decoder/3/selfAttentionResidualProduct, decoder/hidden_ff_layer_3/hiddenLayerInputNormalized_3_embedNormGain] [decoder/hidden_ff_layer_3/hiddenLayerInputNormalized_3] | |
125 decoder/hidden_ff_layer_3/xw_plus_b XwPlusB [decoder/hidden_ff_layer_3/hiddenLayerInputNormalized_3, decoder/hidden_ff_layer_3/hiddenInnerLayerWeights_3, decoder/hidden_ff_layer_3/hiddenInnerLayerBias_3] [decoder/hidden_ff_layer_3/hiddenInnerLayerActivations_3] | |
126 decoder/hidden_ff_layer_3/gelu GELU [decoder/hidden_ff_layer_3/hiddenInnerLayerActivations_3] [decoder/hidden_ff_layer_3/hiddenInnerLayerOutput_3] | |
127 decoder/hidden_ff_layer_3/xw_plus_b_1 XwPlusB [decoder/hidden_ff_layer_3/hiddenInnerLayerOutput_3, decoder/hidden_ff_layer_3/hiddenOutLayerWeights_3, decoder/hidden_ff_layer_3/hiddenOutLayerBias_3] [decoder/hidden_ff_layer_3/hiddenFinalLayerOutput_3] | |
128 decoder/hidden_ff_layer_3/add AddOp [decoder/3/selfAttentionResidualProduct, decoder/hidden_ff_layer_3/hiddenFinalLayerOutput_3] [decoder/hidden_ff_layer_3/hiddenLayerFinalOutputNormalized_3] | |
129 decoder/layer_norm LayerNorm [decoder/hidden_ff_layer_3/hiddenLayerFinalOutputNormalized_3, decoder/decoderOutputNormalized_embedNormGain] [decoder/decoderOutputNormalized] | |
130 gather_1 EnhancedGatherOp [decoder/decoderOutputNormalized, FLAT_TARGET_BATCH_POSITIONS] [predictionTokenEmbeddings] | |
131 matmul Mmul [predictionTokenEmbeddings, tokenEmbeddingsMatrix] [finalLayerResult] | |
132 onehot OneHot [FLAT_TARGET_TOKEN_VOCAB_INDICES] [oneHotLabels] | |
133 mul_scalar ScalarMultiplication [oneHotLabels] [mul_scalar] | |
134 add_scalar ScalarAdd [mul_scalar] [oneHotSmoothLabels] | |
135 softmax SimpleSoftMaxOp [finalLayerResult] [modelPredictionLogits] | |
136 log Log [modelPredictionLogits] [log] | |
137 multiply MulOp [log, oneHotLabels] [multiply] | |
138 reduce_sum Sum [multiply] [reduce_sum] | |
139 reduce_mean Mean [reduce_sum] [reduce_mean] | |
140 neg Negative [reduce_mean] [loss] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment