Skip to content

Instantly share code, notes, and snippets.

@pzread
Created July 1, 2023 01:11
Show Gist options
  • Save pzread/2be3b2db7c0ffa14518085f08e33b815 to your computer and use it in GitHub Desktop.
Save pzread/2be3b2db7c0ffa14518085f08e33b815 to your computer and use it in GitHub Desktop.
{
"trigger": {
"timestamp": "1688166610"
},
"benchmarks": [
{
"definition": {
"benchmark_id": "173c7180-bad4-4b91-8423-4beeb13d2b0a-MODEL_T5_LARGE-fp32-JAX-512xi32-batch1",
"benchmark_name": "T5_LARGE_FP32_JAX_512XI32_BATCH1",
"framework": "ModelFrameworkType.JAX",
"data_type": "DataType.FP32",
"batch_size": 1,
"inputs": [
"1x512xi32",
"1x512xi32"
],
"outputs": [
"1x512x1024xf32"
],
"compiler": "xla",
"device": "gpu",
"tags": [
"fp32",
"transformer-encoder",
"transformer-decoder",
"t5",
"batch-1"
]
},
"metrics": {
"framework_level": {
"min_warmup_latency_ms": 20.655435044318438,
"max_warmup_latency_ms": 50176.288513001055,
"mean_warmup_latency_ms": 10054.563094791956,
"median_warmup_latency_ms": 25.850410922430456,
"stddev_warmup_latency_ms": 22428.7264534675,
"warmup_iterations": 5,
"min_latency_ms": 20.552234025672078,
"max_latency_ms": 20.809899899177253,
"mean_latency_ms": 20.659179559443146,
"median_latency_ms": 20.647069963160902,
"stddev_latency_ms": 0.05457004473826859,
"benchmark_iterations": 50,
"compile_time_s": 50.176288513001055,
"input_data_transfer_ms": 0.5347649566829205
}
}
},
{
"definition": {
"benchmark_id": "173c7180-bad4-4b91-8423-4beeb13d2b0a-MODEL_T5_LARGE-fp32-JAX-512xi32-batch16",
"benchmark_name": "T5_LARGE_FP32_JAX_512XI32_BATCH16",
"framework": "ModelFrameworkType.JAX",
"data_type": "DataType.FP32",
"batch_size": 16,
"inputs": [
"16x512xi32",
"16x512xi32"
],
"outputs": [
"16x512x1024xf32"
],
"compiler": "xla",
"device": "gpu",
"tags": [
"fp32",
"transformer-encoder",
"transformer-decoder",
"t5",
"batch-16"
]
},
"metrics": {
"framework_level": {
"min_warmup_latency_ms": 245.36785506643355,
"max_warmup_latency_ms": 44760.09083108511,
"mean_warmup_latency_ms": 9148.438019421883,
"median_warmup_latency_ms": 245.4743740381673,
"stddev_warmup_latency_ms": 19907.51912043897,
"warmup_iterations": 5,
"min_latency_ms": 245.34180504269898,
"max_latency_ms": 245.61262398492545,
"mean_latency_ms": 245.50913302926347,
"median_latency_ms": 245.5175855429843,
"stddev_latency_ms": 0.04777120683995344,
"benchmark_iterations": 50,
"compile_time_s": 44.76009083108511,
"input_data_transfer_ms": 0.4104010295122862
}
}
},
{
"definition": {
"benchmark_id": "173c7180-bad4-4b91-8423-4beeb13d2b0a-MODEL_T5_LARGE-fp32-JAX-512xi32-batch24",
"benchmark_name": "T5_LARGE_FP32_JAX_512XI32_BATCH24",
"framework": "ModelFrameworkType.JAX",
"data_type": "DataType.FP32",
"batch_size": 24,
"inputs": [
"24x512xi32",
"24x512xi32"
],
"outputs": [
"24x512x1024xf32"
],
"compiler": "xla",
"device": "gpu",
"tags": [
"fp32",
"transformer-encoder",
"transformer-decoder",
"t5",
"batch-24"
]
},
"metrics": {
"framework_level": {
"min_warmup_latency_ms": 368.98510495666414,
"max_warmup_latency_ms": 45109.30842696689,
"mean_warmup_latency_ms": 9317.142666201107,
"median_warmup_latency_ms": 369.1369229927659,
"stddev_warmup_latency_ms": 20008.42892586601,
"warmup_iterations": 5,
"min_latency_ms": 368.92264301422983,
"max_latency_ms": 369.16676396504045,
"mean_latency_ms": 369.0529297059402,
"median_latency_ms": 369.05131745152175,
"stddev_latency_ms": 0.04959245737189531,
"benchmark_iterations": 50,
"compile_time_s": 45.10930842696689,
"input_data_transfer_ms": 0.5262399790808558
}
}
},
{
"definition": {
"benchmark_id": "173c7180-bad4-4b91-8423-4beeb13d2b0a-MODEL_T5_LARGE-fp32-JAX-512xi32-batch32",
"benchmark_name": "T5_LARGE_FP32_JAX_512XI32_BATCH32",
"framework": "ModelFrameworkType.JAX",
"data_type": "DataType.FP32",
"batch_size": 32,
"inputs": [
"32x512xi32",
"32x512xi32"
],
"outputs": [
"32x512x1024xf32"
],
"compiler": "xla",
"device": "gpu",
"tags": [
"fp32",
"transformer-encoder",
"transformer-decoder",
"t5",
"batch-32"
]
},
"metrics": {
"framework_level": {
"min_warmup_latency_ms": 480.0509420456365,
"max_warmup_latency_ms": 45333.85372895282,
"mean_warmup_latency_ms": 9450.862502027303,
"median_warmup_latency_ms": 480.13286804780364,
"stddev_warmup_latency_ms": 20059.201904894326,
"warmup_iterations": 5,
"min_latency_ms": 479.95751502458006,
"max_latency_ms": 480.5134879425168,
"mean_latency_ms": 480.10662383865565,
"median_latency_ms": 480.09840404847637,
"stddev_latency_ms": 0.08080983067290791,
"benchmark_iterations": 50,
"compile_time_s": 45.33385372895282,
"input_data_transfer_ms": 0.5753289442509413
}
}
},
{
"definition": {
"benchmark_id": "173c7180-bad4-4b91-8423-4beeb13d2b0a-MODEL_T5_LARGE-fp32-JAX-512xi32-batch48",
"benchmark_name": "T5_LARGE_FP32_JAX_512XI32_BATCH48",
"framework": "ModelFrameworkType.JAX",
"data_type": "DataType.FP32",
"batch_size": 48,
"inputs": [
"48x512xi32",
"48x512xi32"
],
"outputs": [
"48x512x1024xf32"
],
"compiler": "xla",
"device": "gpu",
"tags": [
"fp32",
"transformer-encoder",
"transformer-decoder",
"t5",
"batch-48"
]
},
"metrics": {
"framework_level": {
"min_warmup_latency_ms": 721.3618629612029,
"max_warmup_latency_ms": 45887.21120392438,
"mean_warmup_latency_ms": 9754.539297986776,
"median_warmup_latency_ms": 721.3717340491712,
"stddev_warmup_latency_ms": 20198.777647594525,
"warmup_iterations": 5,
"min_latency_ms": 720.7678849808872,
"max_latency_ms": 721.505339955911,
"mean_latency_ms": 721.1524451547302,
"median_latency_ms": 721.028728003148,
"stddev_latency_ms": 0.26223423855135464,
"benchmark_iterations": 50,
"compile_time_s": 45.88721120392438,
"input_data_transfer_ms": 0.5476829828694463
}
}
},
{
"definition": {
"benchmark_id": "173c7180-bad4-4b91-8423-4beeb13d2b0a-MODEL_T5_LARGE-fp32-JAX-512xi32-batch64",
"benchmark_name": "T5_LARGE_FP32_JAX_512XI32_BATCH64",
"framework": "ModelFrameworkType.JAX",
"data_type": "DataType.FP32",
"batch_size": 64,
"inputs": [
"64x512xi32",
"64x512xi32"
],
"outputs": [
"64x512x1024xf32"
],
"compiler": "xla",
"device": "gpu",
"tags": [
"fp32",
"transformer-encoder",
"transformer-decoder",
"t5",
"batch-64"
]
},
"metrics": {
"framework_level": {}
}
},
{
"definition": {
"benchmark_id": "173c7180-bad4-4b91-8423-4beeb13d2b0a-MODEL_T5_LARGE-fp32-JAX-512xi32-batch512",
"benchmark_name": "T5_LARGE_FP32_JAX_512XI32_BATCH512",
"framework": "ModelFrameworkType.JAX",
"data_type": "DataType.FP32",
"batch_size": 512,
"inputs": [
"512x512xi32",
"512x512xi32"
],
"outputs": [
"512x512x1024xf32"
],
"compiler": "xla",
"device": "gpu",
"tags": [
"fp32",
"transformer-encoder",
"transformer-decoder",
"t5",
"batch-512"
]
},
"metrics": {
"framework_level": {}
}
}
],
"execution_environment": {
"python_environment": {
"absl-py": "1.4.0",
"certifi": "2023.5.7",
"charset-normalizer": "3.1.0",
"chex": "0.1.81",
"dm-tree": "0.1.8",
"etils": "1.3.0",
"filelock": "3.12.2",
"flax": "0.6.11",
"fsspec": "2023.6.0",
"huggingface-hub": "0.15.1",
"idna": "3.4",
"importlib-resources": "5.12.0",
"jax": "0.4.13",
"jaxlib": "0.4.13+cuda11.cudnn86",
"markdown-it-py": "3.0.0",
"mdurl": "0.1.2",
"ml-dtypes": "0.2.0",
"msgpack": "1.0.5",
"nest-asyncio": "1.5.6",
"numpy": "1.25.0",
"opt-einsum": "3.3.0",
"optax": "0.1.5",
"orbax-checkpoint": "0.2.7",
"packaging": "23.1",
"Pillow": "9.5.0",
"pip": "23.1.2",
"protobuf": "4.23.3",
"Pygments": "2.15.1",
"PyYAML": "6.0",
"regex": "2023.6.3",
"requests": "2.31.0",
"rich": "13.4.2",
"safetensors": "0.3.1",
"scipy": "1.11.1",
"setuptools": "59.6.0",
"tensorstore": "0.1.39",
"tokenizers": "0.13.3",
"toolz": "0.12.0",
"tqdm": "4.65.0",
"transformers": "4.30.2",
"typing_extensions": "4.7.0",
"urllib3": "2.0.3",
"zipp": "3.15.0"
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment