Dzmitry Bahdanau rizar

## residual_mlp.py
import os
import time
import torch
from torch.nn import Linear, ReLU
from torch.profiler import profile, ProfilerActivity

from codeparrot.build_table import build_table

device = torch.device('cuda')

## megatron.csv

          
            system
            GPU count
            Training time
            Model size
            Tokens
            teraWFLOP/s

            
              MegaTron + DeepSpeed
              2240
              60.1
              5.3E+11
              3932160
              92.9

            
              HyperCLOVA
              1024
              1157760
              8.2E+10
              1.5E+11
              62.2

            
              MegaTron LM GPT-3 Example
              1024
              32
              1.75E+11
              3145728
              100.8

## throughput.csv

          
            model
             precision
             throughput in teraWFLOPs

            
              matmul 8192x1600x6400
               bfloat16
               237

            
              linear MLP B=8192; 1600 -> 6400 -> 1600 -> 1
               bfloat16
               230

            
              linear MLP B=8192; 1600 -> 6400 -> 1600 -> 1
               mixed
               207.6

            
              residual ReLU MLP B=8192; 1600 -> 6400 -> 1600 -> 1
               mixed
               185.7

            
              HuggingFace GPT2 B=32 L=256; d=1600
               mixed
               68

## frisbee.py
import numpy
import sys

players = [
    'Alex Lacoste',
    'Karl Janelle',
    'JF Marcil',
    'Pat Steeves',
    'Dzmitry Bahdanau',
    'Philippe Guay',

## error.txt
> static@1.0.0 dev-build /home/dzmitry/Dist/arl-omnishop/static
> webpack --progress -d --config webpack.config.js

clean-webpack-plugin: /home/dzmitry/Dist/arl-omnishop/static/dist has been removed.
 10% building modules 4/11 modules 7 active .../Dist/arl-omnishop/static/sass/site.cssℹ ｢atl｣: Using typescript@3.1.1 from typescript
ℹ ｢atl｣: Using tsconfig.json from /home/dzmitry/Dist/arl-omnishop/static/tsconfig.json
 94% after sealℹ ｢atl｣: Checking started in a separate process...
✖ ｢atl｣: Checking finished with 47 errors
Hash: 248bef2f47830ed686dd
Version: webpack 4.20.2

## dev_build.txt
╰─$ npm run dev-build                                                                                                                 2 ↵

> static@1.0.0 dev-build /home/dzmitry/Dist/arl-omnishop/static
> webpack --progress -d --config webpack.config.js

clean-webpack-plugin: /home/dzmitry/Dist/arl-omnishop/static/dist has been removed.
 10% building modules 4/11 modules 7 active .../Dist/arl-omnishop/static/sass/site.cssℹ ｢atl｣: Using typescript@3.1.1 from typescript
ℹ ｢atl｣: Using tsconfig.json from /home/dzmitry/Dist/arl-omnishop/static/tsconfig.json
 94% after sealℹ ｢atl｣: Checking started in a separate process...
✖ ｢atl｣: Checking finished with 26 errors

## dev_build.txt
╰─$ npm run dev-build                                                                                                                 2 ↵

> static@1.0.0 dev-build /home/dzmitry/Dist/arl-omnishop/static
> webpack --progress -d --config webpack.config.js

clean-webpack-plugin: /home/dzmitry/Dist/arl-omnishop/static/dist has been removed.
 10% building modules 4/11 modules 7 active .../Dist/arl-omnishop/static/sass/site.cssℹ ｢atl｣: Using typescript@3.1.1 from typescript
ℹ ｢atl｣: Using tsconfig.json from /home/dzmitry/Dist/arl-omnishop/static/tsconfig.json
 94% after sealℹ ｢atl｣: Checking started in a separate process...
✖ ｢atl｣: Checking finished with 26 errors

## beautiful_plots.py
def compare_rl_and_gail(metric, window, ylabel=None, xlim=None, fs=15, ncol=2):
    df1 = df_rl[df_rl['config.rp_weight'] == 1.0]
    df2 = df_rl[df_rl['config.rp_weight'] == 0.0]
    df3 = df_gail[df_gail['config.rmt.top_k_percent'] == 0.]
    df4 = df_gail[df_gail['config.rmt.top_k_percent'] == 0.25]
    df5 = df_gail[df_gail['config.rmt.top_k_percent'] == 0.5]
    df6 = df_gail[df_gail['config.rmt.top_k_percent'] == 0.75]
    for i, df in enumerate([df1, df2, df3, df4, df5, df6]):
        value = 100 * pandas.rolling_mean(average_value(df, metric), window)
        print(value.iloc[-1])

## beautiful_plots.py
def compare_rl_and_gail(metric, window, ylabel=None, xlim=None, fs=15, ncol=2):
    df1 = df_rl[df_rl['config.rp_weight'] == 1.0]
    df2 = df_rl[df_rl['config.rp_weight'] == 0.0]
    df3 = df_gail[df_gail['config.rmt.top_k_percent'] == 0.]
    df4 = df_gail[df_gail['config.rmt.top_k_percent'] == 0.25]
    df5 = df_gail[df_gail['config.rmt.top_k_percent'] == 0.5]
    df6 = df_gail[df_gail['config.rmt.top_k_percent'] == 0.75]
    for i, df in enumerate([df1, df2, df3, df4, df5, df6]):
        value = 100 * pandas.rolling_mean(average_value(df, metric), window)
        print(value.iloc[-1])

## alphas_again.py
def plot_key_alphas(data, softmax=True, fixed_axis=True):
  f, axes = pyplot.subplots(1, 3)
  f.set_size_inches((15, 2))
  for k in range(3):
    arr = numpy.array(data[k])
    arr = arr[:, [4, 5, 7]]
    if softmax:
      arr = numpy.exp(arr) / numpy.exp(arr).sum(axis=1)[:, None]
    else:
      arr = arr / arr.sum(axis=1)[:, None]
	import os
	import time
	import torch
	from torch.nn import Linear, ReLU
	from torch.profiler import profile, ProfilerActivity

	from codeparrot.build_table import build_table

	device = torch.device('cuda')
system	GPU count	Training time	Model size	Tokens	teraWFLOP/s
MegaTron + DeepSpeed	2240	60.1	5.3E+11	3932160	92.9
HyperCLOVA	1024	1157760	8.2E+10	1.5E+11	62.2
MegaTron LM GPT-3 Example	1024	32	1.75E+11	3145728	100.8
model	precision	throughput in teraWFLOPs
matmul 8192x1600x6400	bfloat16	237
linear MLP B=8192; 1600 -> 6400 -> 1600 -> 1	bfloat16	230
linear MLP B=8192; 1600 -> 6400 -> 1600 -> 1	mixed	207.6
residual ReLU MLP B=8192; 1600 -> 6400 -> 1600 -> 1	mixed	185.7
HuggingFace GPT2 B=32 L=256; d=1600	mixed	68
	import numpy
	import sys

	players = [
	'Alex Lacoste',
	'Karl Janelle',
	'JF Marcil',
	'Pat Steeves',
	'Dzmitry Bahdanau',
	'Philippe Guay',
	> static@1.0.0 dev-build /home/dzmitry/Dist/arl-omnishop/static
	> webpack --progress -d --config webpack.config.js

	clean-webpack-plugin: /home/dzmitry/Dist/arl-omnishop/static/dist has been removed.
	10% building modules 4/11 modules 7 active .../Dist/arl-omnishop/static/sass/site.cssℹ ｢atl｣: Using typescript@3.1.1 from typescript
	ℹ ｢atl｣: Using tsconfig.json from /home/dzmitry/Dist/arl-omnishop/static/tsconfig.json
	94% after sealℹ ｢atl｣: Checking started in a separate process...
	✖ ｢atl｣: Checking finished with 47 errors
	Hash: 248bef2f47830ed686dd
	Version: webpack 4.20.2
	╰─$ npm run dev-build 2 ↵

	> static@1.0.0 dev-build /home/dzmitry/Dist/arl-omnishop/static
	> webpack --progress -d --config webpack.config.js

	clean-webpack-plugin: /home/dzmitry/Dist/arl-omnishop/static/dist has been removed.
	10% building modules 4/11 modules 7 active .../Dist/arl-omnishop/static/sass/site.cssℹ ｢atl｣: Using typescript@3.1.1 from typescript
	ℹ ｢atl｣: Using tsconfig.json from /home/dzmitry/Dist/arl-omnishop/static/tsconfig.json
	94% after sealℹ ｢atl｣: Checking started in a separate process...
	✖ ｢atl｣: Checking finished with 26 errors
	def compare_rl_and_gail(metric, window, ylabel=None, xlim=None, fs=15, ncol=2):
	df1 = df_rl[df_rl['config.rp_weight'] == 1.0]
	df2 = df_rl[df_rl['config.rp_weight'] == 0.0]
	df3 = df_gail[df_gail['config.rmt.top_k_percent'] == 0.]
	df4 = df_gail[df_gail['config.rmt.top_k_percent'] == 0.25]
	df5 = df_gail[df_gail['config.rmt.top_k_percent'] == 0.5]
	df6 = df_gail[df_gail['config.rmt.top_k_percent'] == 0.75]
	for i, df in enumerate([df1, df2, df3, df4, df5, df6]):
	value = 100 * pandas.rolling_mean(average_value(df, metric), window)
	print(value.iloc[-1])
	def plot_key_alphas(data, softmax=True, fixed_axis=True):
	f, axes = pyplot.subplots(1, 3)
	f.set_size_inches((15, 2))
	for k in range(3):
	arr = numpy.array(data[k])
	arr = arr[:, [4, 5, 7]]
	if softmax:
	arr = numpy.exp(arr) / numpy.exp(arr).sum(axis=1)[:, None]
	else:
	arr = arr / arr.sum(axis=1)[:, None]