Qiao Zhang zhangqiaorjc

## sincos-remat-example.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                zhangqiaorjc
                / sincos-remat-example.ipynb
            
            
              Created
              November 10, 2023 05:27
            
              
                sincos remat example.ipynb
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## bfloat16-training.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                zhangqiaorjc
                / bfloat16-training.ipynb
            
            
              Created
              April 22, 2023 17:04
            
              
                bfloat16-training.ipynb
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## collective_matmul_allgather_lhs_non_contracting.py
import numpy as np
import os, re
import jax
from jax.experimental import maps
from jax.experimental import pjit
import jax.numpy as jnp

from jax.experimental import mesh_utils

from absl import flags

## gpu_scaling.py
"""Decoder-only LM scaling experiments on GPUs."""

from jax import numpy as jnp
from paxml import experiment_registry
from paxml.tasks.lm.params.lm_cloud import LmCloudSpmd
from paxml.tasks.lm.params.lm_cloud import LmCloudSpmdPipeline
from praxis import layers


# TODO(zhangqiaorjc): Might need to use pmap instead of pjit for smaller models.

## copy-of-jax-transformer-model-for-fp8-no-decode-cache-shared-with-nvidia.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                zhangqiaorjc
                / copy-of-jax-transformer-model-for-fp8-no-decode-cache-shared-with-nvidia.ipynb
            
            
              Created
              December 14, 2022 07:32
            
              
                Copy of JAX Transformer model for fp8 (no decode cache) shared with NVIDIA.ipynb
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## copy-of-mnist-fp8-for-sharing-with-nvidia.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                zhangqiaorjc
                / copy-of-mnist-fp8-for-sharing-with-nvidia.ipynb
            
            
              Last active
              December 14, 2022 07:25
            
              
                Copy of mnist fp8 for sharing with NVIDIA.ipynb
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## copy-of-mnist-fp8-for-sharing-with-nvidia.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                zhangqiaorjc
                / copy-of-mnist-fp8-for-sharing-with-nvidia.ipynb
            
            
              Created
              December 7, 2022 06:26
            
              
                Copy of mnist fp8 for sharing with NVIDIA.ipynb
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## mnist_keras_fp8.py
"""Runs a simple mnist model with fake FP8. FP8 scaling is used.

The HLO can be dumped by setting the environment variable:

  XLA_FLAGS='--xla_dump_disable_metadata=true --xla_dump_to=/tmp/hlo'


"""
import tensorflow as tf

## matmul_fp8_ex.py
from absl.testing import absltest
from absl import logging
import jax
import jax.numpy as jnp


def amax(x):
  return jnp.max(jnp.abs(x))


## make_hlo.py
def make_hlo(f, optimize=False, metadata=False, platform=None):
  """Utility function for printing JAX-emitted HLO and XLA-compiled HLO.

  Args:
    f: jax function to return hlo for.
    optimize: bool: whether to return platform-specific, XLA-optimized HLO
    metadata: bool: whether to include JAX metadata information
    platform: Optional[str]: None, 'cpu','gpu','tpu' - platform to compile for,
      None uses default.
	import numpy as np
	import os, re
	import jax
	from jax.experimental import maps
	from jax.experimental import pjit
	import jax.numpy as jnp

	from jax.experimental import mesh_utils

	from absl import flags
	"""Decoder-only LM scaling experiments on GPUs."""

	from jax import numpy as jnp
	from paxml import experiment_registry
	from paxml.tasks.lm.params.lm_cloud import LmCloudSpmd
	from paxml.tasks.lm.params.lm_cloud import LmCloudSpmdPipeline
	from praxis import layers


	# TODO(zhangqiaorjc): Might need to use pmap instead of pjit for smaller models.
	"""Runs a simple mnist model with fake FP8. FP8 scaling is used.

	The HLO can be dumped by setting the environment variable:

	XLA_FLAGS='--xla_dump_disable_metadata=true --xla_dump_to=/tmp/hlo'


	"""
	import tensorflow as tf
	from absl.testing import absltest
	from absl import logging
	import jax
	import jax.numpy as jnp


	def amax(x):
	return jnp.max(jnp.abs(x))
	def make_hlo(f, optimize=False, metadata=False, platform=None):
	"""Utility function for printing JAX-emitted HLO and XLA-compiled HLO.

	Args:
	f: jax function to return hlo for.
	optimize: bool: whether to return platform-specific, XLA-optimized HLO
	metadata: bool: whether to include JAX metadata information
	platform: Optional[str]: None, 'cpu','gpu','tpu' - platform to compile for,
	None uses default.