Junfeng Tian rgtjf

## quantile_monitor.py
"""
The quantile monitor monitors the input and output, as well as simple transforms to them.
It logs the quantile values needed.

Link to paper reading paper:
    - Small-scale proxies for large-scale Transformer training instabilities
    - Mitchell Wortsman et al.
    - https://arxiv.org/abs/2309.14322
    - notion link: https://www.notion.so/nyonic/Small-scale-proxies-for-large-scale-Transformer-training-instabilities-95f7d37711f34d8ebae4f505bc160830  # noqa
"""

## ds2_to_ds2.py
"""This script converts a DeepSpeed checkpoint from one format to another.

It requires specifying an input_folder and a target_folder before starting the
conversion. To determine the target folder, first run the script without checkpointing
using the target cluster.

The conversion process involves the following steps:
1. Building a linked matrix on the input DeepSpeed checkpoint to establish mappings
   between tensor slices.
2. Merging the slice files based on the linked matrix.

## tensorflow 1.8, cuda 9.0, Install.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                rgtjf
                / tensorflow 1.8, cuda 9.0, Install.md
            
            
              Last active
              April 28, 2018 04:42
            
          
    Install Tensorflow>=1.5.0


Install Tensorflow on Ubuntu r1.8

Install CUDA


Note:

Install CUDA 9.0, not 9.1
already download package, in UBUNTU/home/junfeng
Remove Old Version


## tf_idf.py


def tf(sentence_list, min_cnt=1, max_cnt=None):
    doc_num = 0
    word_list = []
    for sequence in sentence_list:
        word_list += sequence
        doc_num += 1

    word_count = Counter()

## tf_utils.py
import random
import numpy as np
import re


def make_batches(size, batch_size):
    """

    :param size: the size of dataset
    :param batch_size: the size of batch
	"""
	The quantile monitor monitors the input and output, as well as simple transforms to them.
	It logs the quantile values needed.

	Link to paper reading paper:
	- Small-scale proxies for large-scale Transformer training instabilities
	- Mitchell Wortsman et al.
	- https://arxiv.org/abs/2309.14322
	- notion link: https://www.notion.so/nyonic/Small-scale-proxies-for-large-scale-Transformer-training-instabilities-95f7d37711f34d8ebae4f505bc160830 # noqa
	"""
	"""This script converts a DeepSpeed checkpoint from one format to another.

	It requires specifying an input_folder and a target_folder before starting the
	conversion. To determine the target folder, first run the script without checkpointing
	using the target cluster.

	The conversion process involves the following steps:
	1. Building a linked matrix on the input DeepSpeed checkpoint to establish mappings
	between tensor slices.
	2. Merging the slice files based on the linked matrix.


	def tf(sentence_list, min_cnt=1, max_cnt=None):
	doc_num = 0
	word_list = []
	for sequence in sentence_list:
	word_list += sequence
	doc_num += 1

	word_count = Counter()
	import random
	import numpy as np
	import re


	def make_batches(size, batch_size):
	"""

	:param size: the size of dataset
	:param batch_size: the size of batch