Skip to content

Instantly share code, notes, and snippets.

@pzread
Last active July 3, 2023 05:47
Show Gist options
  • Save pzread/cf3e089d0a4e0d5c452aebc1df6dbc44 to your computer and use it in GitHub Desktop.
Save pzread/cf3e089d0a4e0d5c452aebc1df6dbc44 to your computer and use it in GitHub Desktop.
{
"trigger": {
"timestamp": "1688161104"
},
"execution_environment": {
"python_environment": [
{
"name": "absl-py",
"version": "1.4.0"
},
{
"name": "certifi",
"version": "2023.5.7"
},
{
"name": "charset-normalizer",
"version": "3.1.0"
},
{
"name": "chex",
"version": "0.1.81"
},
{
"name": "dm-tree",
"version": "0.1.8"
},
{
"name": "etils",
"version": "1.3.0"
},
{
"name": "filelock",
"version": "3.12.2"
},
{
"name": "flax",
"version": "0.6.11"
},
{
"name": "fsspec",
"version": "2023.6.0"
},
{
"name": "huggingface-hub",
"version": "0.15.1"
},
{
"name": "idna",
"version": "3.4"
},
{
"name": "importlib-resources",
"version": "5.12.0"
},
{
"name": "jax",
"version": "0.4.13"
},
{
"name": "jaxlib",
"version": "0.4.13+cuda11.cudnn86"
},
{
"name": "markdown-it-py",
"version": "3.0.0"
},
{
"name": "mdurl",
"version": "0.1.2"
},
{
"name": "ml-dtypes",
"version": "0.2.0"
},
{
"name": "msgpack",
"version": "1.0.5"
},
{
"name": "nest-asyncio",
"version": "1.5.6"
},
{
"name": "numpy",
"version": "1.25.0"
},
{
"name": "opt-einsum",
"version": "3.3.0"
},
{
"name": "optax",
"version": "0.1.5"
},
{
"name": "orbax-checkpoint",
"version": "0.2.7"
},
{
"name": "packaging",
"version": "23.1"
},
{
"name": "Pillow",
"version": "9.5.0"
},
{
"name": "pip",
"version": "23.1.2"
},
{
"name": "protobuf",
"version": "4.23.3"
},
{
"name": "Pygments",
"version": "2.15.1"
},
{
"name": "PyYAML",
"version": "6.0"
},
{
"name": "regex",
"version": "2023.6.3"
},
{
"name": "requests",
"version": "2.31.0"
},
{
"name": "rich",
"version": "13.4.2"
},
{
"name": "safetensors",
"version": "0.3.1"
},
{
"name": "scipy",
"version": "1.11.1"
},
{
"name": "setuptools",
"version": "59.6.0"
},
{
"name": "tensorstore",
"version": "0.1.39"
},
{
"name": "tokenizers",
"version": "0.13.3"
},
{
"name": "toolz",
"version": "0.12.0"
},
{
"name": "tqdm",
"version": "4.65.0"
},
{
"name": "transformers",
"version": "4.30.2"
},
{
"name": "typing_extensions",
"version": "4.7.0"
},
{
"name": "urllib3",
"version": "2.0.3"
},
{
"name": "zipp",
"version": "3.15.0"
}
]
},
"benchmarks": [
{
"definition": {
"benchmark_id": "(TBD!)",
"benchmark_name": "models/T5_LARGE_FP32_JAX_512XI32_BATCH1/inputs/INPUT_DATA_T5_LARGE_JAX_SEQLEN512_I32_BATCH1/expected_outputs/OUTPUT_DATA_T5_LARGE_FP32_JAX_512X1024XF32_BATCH1/target_devices/a2-highgpu-1g",
"framework": "ModelFrameworkType.JAX",
"data_type": "fp32",
"batch_size": 1,
"inputs": [
"1x512xi32",
"1x512xi32"
],
"outputs": [
"1x512x1024xi32"
],
"compiler": "xla",
"device": "a2-highgpu-1g",
"tags": [
"fp32",
"transformer-encoder",
"transformer-decoder",
"t5",
"batch-1"
]
},
"metrics": {
"framework_level": {
"min_warmup_latency_ms": 25.52878699998473,
"max_warmup_latency_ms": 47837.934887999836,
"mean_warmup_latency_ms": 9588.069566200056,
"median_warmup_latency_ms": 25.607183000147415,
"stddev_warmup_latency_ms": 21382.324747528313,
"warmup_iterations": 5,
"min_latency_ms": 20.393096999896443,
"max_latency_ms": 23.528001000158838,
"mean_latency_ms": 20.517171560009047,
"median_latency_ms": 20.439312999997128,
"stddev_latency_ms": 0.439280720341796,
"benchmark_iterations": 50,
"compile_time_s": 47.837934887999836,
"input_data_transfer_ms": 5.637465999825508
}
}
},
{
"definition": {
"benchmark_id": "(TBD!)",
"benchmark_name": "models/T5_LARGE_FP32_JAX_512XI32_BATCH16/inputs/INPUT_DATA_T5_LARGE_JAX_SEQLEN512_I32_BATCH16/expected_outputs/OUTPUT_DATA_T5_LARGE_FP32_JAX_512X1024XF32_BATCH16/target_devices/a2-highgpu-1g",
"framework": "ModelFrameworkType.JAX",
"data_type": "fp32",
"batch_size": 16,
"inputs": [
"16x512xi32",
"16x512xi32"
],
"outputs": [
"16x512x1024xi32"
],
"compiler": "xla",
"device": "a2-highgpu-1g",
"tags": [
"fp32",
"transformer-encoder",
"transformer-decoder",
"t5",
"batch-16"
]
},
"metrics": {
"framework_level": {
"min_warmup_latency_ms": 244.83693199999834,
"max_warmup_latency_ms": 44236.528324000115,
"mean_warmup_latency_ms": 9043.2044692001,
"median_warmup_latency_ms": 244.88933600014207,
"stddev_warmup_latency_ms": 19673.666123397856,
"warmup_iterations": 5,
"min_latency_ms": 244.73059999991165,
"max_latency_ms": 244.97117400005664,
"mean_latency_ms": 244.8336874800043,
"median_latency_ms": 244.83465949992933,
"stddev_latency_ms": 0.053037209136561805,
"benchmark_iterations": 50,
"compile_time_s": 44.23652832400011,
"input_data_transfer_ms": 5.467187000022022
}
}
},
{
"definition": {
"benchmark_id": "(TBD!)",
"benchmark_name": "models/T5_LARGE_FP32_JAX_512XI32_BATCH24/inputs/INPUT_DATA_T5_LARGE_JAX_SEQLEN512_I32_BATCH24/expected_outputs/OUTPUT_DATA_T5_LARGE_FP32_JAX_512X1024XF32_BATCH24/target_devices/a2-highgpu-1g",
"framework": "ModelFrameworkType.JAX",
"data_type": "fp32",
"batch_size": 24,
"inputs": [
"24x512xi32",
"24x512xi32"
],
"outputs": [
"24x512x1024xi32"
],
"compiler": "xla",
"device": "a2-highgpu-1g",
"tags": [
"fp32",
"transformer-encoder",
"transformer-decoder",
"t5",
"batch-24"
]
},
"metrics": {
"framework_level": {
"min_warmup_latency_ms": 368.34331599993675,
"max_warmup_latency_ms": 44615.365311,
"mean_warmup_latency_ms": 9217.796759799967,
"median_warmup_latency_ms": 368.4162939998714,
"stddev_warmup_latency_ms": 19787.842379713344,
"warmup_iterations": 5,
"min_latency_ms": 368.2863549997819,
"max_latency_ms": 368.5404459997699,
"mean_latency_ms": 368.40360118003446,
"median_latency_ms": 368.4055305000129,
"stddev_latency_ms": 0.05533822376783956,
"benchmark_iterations": 50,
"compile_time_s": 44.615365311,
"input_data_transfer_ms": 5.538116000025184
}
}
},
{
"definition": {
"benchmark_id": "(TBD!)",
"benchmark_name": "models/T5_LARGE_FP32_JAX_512XI32_BATCH32/inputs/INPUT_DATA_T5_LARGE_JAX_SEQLEN512_I32_BATCH32/expected_outputs/OUTPUT_DATA_T5_LARGE_FP32_JAX_512X1024XF32_BATCH32/target_devices/a2-highgpu-1g",
"framework": "ModelFrameworkType.JAX",
"data_type": "fp32",
"batch_size": 32,
"inputs": [
"32x512xi32",
"32x512xi32"
],
"outputs": [
"32x512x1024xi32"
],
"compiler": "xla",
"device": "a2-highgpu-1g",
"tags": [
"fp32",
"transformer-encoder",
"transformer-decoder",
"t5",
"batch-32"
]
},
"metrics": {
"framework_level": {
"min_warmup_latency_ms": 479.11538900007145,
"max_warmup_latency_ms": 45043.387337999775,
"mean_warmup_latency_ms": 9392.066673999943,
"median_warmup_latency_ms": 479.2672799999309,
"stddev_warmup_latency_ms": 19929.694123263933,
"warmup_iterations": 5,
"min_latency_ms": 479.065078000076,
"max_latency_ms": 479.3498209996869,
"mean_latency_ms": 479.19833996001216,
"median_latency_ms": 479.20000099998106,
"stddev_latency_ms": 0.05522368829962701,
"benchmark_iterations": 50,
"compile_time_s": 45.043387337999775,
"input_data_transfer_ms": 5.668290000357956
}
}
},
{
"definition": {
"benchmark_id": "(TBD!)",
"benchmark_name": "models/T5_LARGE_FP32_JAX_512XI32_BATCH48/inputs/INPUT_DATA_T5_LARGE_JAX_SEQLEN512_I32_BATCH48/expected_outputs/OUTPUT_DATA_T5_LARGE_FP32_JAX_512X1024XF32_BATCH48/target_devices/a2-highgpu-1g",
"framework": "ModelFrameworkType.JAX",
"data_type": "fp32",
"batch_size": 48,
"inputs": [
"48x512xi32",
"48x512xi32"
],
"outputs": [
"48x512x1024xi32"
],
"compiler": "xla",
"device": "a2-highgpu-1g",
"tags": [
"fp32",
"transformer-encoder",
"transformer-decoder",
"t5",
"batch-48"
]
},
"metrics": {
"framework_level": {
"min_warmup_latency_ms": 720.0934930001495,
"max_warmup_latency_ms": 45663.77306300001,
"mean_warmup_latency_ms": 9708.928347399979,
"median_warmup_latency_ms": 720.2062629999091,
"stddev_warmup_latency_ms": 20099.369226450224,
"warmup_iterations": 5,
"min_latency_ms": 719.9351499998556,
"max_latency_ms": 720.3005269998357,
"mean_latency_ms": 720.1411012600238,
"median_latency_ms": 720.1415810000071,
"stddev_latency_ms": 0.08272307828598831,
"benchmark_iterations": 50,
"compile_time_s": 45.66377306300001,
"input_data_transfer_ms": 5.7290899999316025
}
}
},
{
"definition": {
"benchmark_id": "(TBD!)",
"benchmark_name": "models/T5_LARGE_FP32_JAX_512XI32_BATCH64/inputs/INPUT_DATA_T5_LARGE_JAX_SEQLEN512_I32_BATCH64/expected_outputs/OUTPUT_DATA_T5_LARGE_FP32_JAX_512X1024XF32_BATCH64/target_devices/a2-highgpu-1g",
"framework": "ModelFrameworkType.JAX",
"data_type": "fp32",
"batch_size": 64,
"inputs": [
"64x512xi32",
"64x512xi32"
],
"outputs": [
"64x512x1024xi32"
],
"compiler": "xla",
"device": "a2-highgpu-1g",
"tags": [
"fp32",
"transformer-encoder",
"transformer-decoder",
"t5",
"batch-64"
]
},
"metrics": {
"framework_level": {}
}
},
{
"definition": {
"benchmark_id": "(TBD!)",
"benchmark_name": "models/T5_LARGE_FP32_JAX_512XI32_BATCH512/inputs/INPUT_DATA_T5_LARGE_JAX_SEQLEN512_I32_BATCH512/expected_outputs/OUTPUT_DATA_T5_LARGE_FP32_JAX_512X1024XF32_BATCH512/target_devices/a2-highgpu-1g",
"framework": "ModelFrameworkType.JAX",
"data_type": "fp32",
"batch_size": 512,
"inputs": [
"512x512xi32",
"512x512xi32"
],
"outputs": [
"512x512x1024xi32"
],
"compiler": "xla",
"device": "a2-highgpu-1g",
"tags": [
"fp32",
"transformer-encoder",
"transformer-decoder",
"t5",
"batch-512"
]
},
"metrics": {
"framework_level": {}
}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment