Junmin Hao hjm-aws

## xla_fully_sharded_data_parallel.py
# This file is largely inspired by and mostly follows the structure of
# ``fairscale.nn.FullyShardedDataParallel`` in
# https://github.com/facebookresearch/fairscale/blob/main/fairscale/nn/data_parallel/fully_sharded_data_parallel.py

from collections import OrderedDict
import contextlib
from enum import Enum, auto
import functools
import gc
from itertools import chain

## embedding-output-is-fp32-even-after-the-embedding-weight-is-casted-to-bf16.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                hjm-aws
                / embedding-output-is-fp32-even-after-the-embedding-weight-is-casted-to-bf16.ipynb
            
            
              Last active
              August 16, 2022 19:40
            
              
                Getting Started with PyTorch on Cloud TPUs
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## reduce_scatter_coalesce_graph_change.py
import os

import torch
import torch_xla.core.xla_model as xm
import torch_xla.distributed.xla_multiprocessing as xmp
from torch_xla.distributed.fsdp import XlaFullyShardedDataParallel as FSDP
from fairscale.nn.wrap import enable_wrap, auto_wrap, default_auto_wrap_policy
from transformers import BertTokenizer, BertForMaskedLM, BertConfig
import functools

## test
second version: i.e. my fixed version
	# This file is largely inspired by and mostly follows the structure of
	# ``fairscale.nn.FullyShardedDataParallel`` in
	# https://github.com/facebookresearch/fairscale/blob/main/fairscale/nn/data_parallel/fully_sharded_data_parallel.py

	from collections import OrderedDict
	import contextlib
	from enum import Enum, auto
	import functools
	import gc
	from itertools import chain
	import os

	import torch
	import torch_xla.core.xla_model as xm
	import torch_xla.distributed.xla_multiprocessing as xmp
	from torch_xla.distributed.fsdp import XlaFullyShardedDataParallel as FSDP
	from fairscale.nn.wrap import enable_wrap, auto_wrap, default_auto_wrap_policy
	from transformers import BertTokenizer, BertForMaskedLM, BertConfig
	import functools