denizyuret

## pyjulia custom image error with Julia 1.9.4
fatal: error thrown and no exception handler available.
InitError(mod=:PyCall, error=UndefVarError(var=:PYTHONHOME))
ijl_undefined_var_error at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-9/src/rtutils.c:132
ijl_get_binding_or_error at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-9/src/module.c:421
__init__ at /kuacc/users/dyuret/.julia/packages/PyCall/twYvK/src/pyinit.jl:155
jfptr___init___79335 at /userfiles/dyuret/sys.so (unknown line)
_jl_invoke at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-9/src/gf.c:2940
jl_apply at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-9/src/julia.h:1880 [inlined]
jl_module_run_initializer at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-9/src/toplevel.c:75

## pyjulia custom image error with Julia 1.10
[ Info: PackageCompiler: creating system image object file, this might take a while...
ERROR: Method overwriting is not permitted during Module precompilation. Use `__precompile__(false)` to opt-out of precompilation.
â”Œ Error: Failed to monkey-patch `julia`
â”‚   exception = (Declaring __precompile__(false) is not allowed in files that are being precompiled., Union{Ptr{Nothing}, Base.InterpreterIP}[Ptr{Nothing} @0x00007f4f0e546078, Ptr{Nothing} @0x00007f4f0e5acd44, Ptr{Nothing} @0x00007f4f0e561970, Ptr{Nothing} @0x00007f4f0e5624b3, Ptr{Nothing} @0x00007f4f0e562e4d, Base.InterpreterIP in top-level CodeInfo for Base at statement 6, Ptr{Nothing} @0x00007f4f0e57e984, Ptr{Nothing} @0x00007f4f0e58054b, Ptr{Nothing} @0x00007f4ef6e61c6b, Ptr{Nothing} @0x00007f4ef6e61cb3, Ptr{Nothing} @0x00007f4f0e54399d, Ptr{Nothing} @0x00007f4f0e561824, Ptr{Nothing} @0x00007f4f0e561334, Ptr{Nothing} @0x00007f4f0e562073, Ptr{Nothing} @0x00007f4f0e56245f, Ptr{Nothing} @0x00007f4f0e562e4d, Base.InterpreterIP in top-level CodeInfo for

## ds4194.log
/truba/home/dyuret/.julia/conda/3/x86_64/envs/llm/bin/deepspeed:4: DeprecationWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html
  __import__('pkg_resources').require('deepspeed==0.10.2+c69bd1f7')
[2023-08-30 00:25:45,359] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[2023-08-30 00:25:46,642] [WARNING] [runner.py:201:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only.
Detected CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7: setting --include=localhost:0,1,2,3,4,5,6,7
[2023-08-30 00:25:46,643] [INFO] [runner.py:567:main] cmd = /truba/home/dyuret/.julia/conda/3/x86_64/envs/llm/bin/python3.11 -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMCwgMSwgMiwgMywgNCwgNSwgNiwgN119 --master_addr=127.0.0.1 --master_port=29500 --enable_each_rank_log=None main.py --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward

## sample.py
# R1 regularization (Hypothetical)
with tf.GradientTape() as t2:
  with tf.GradientTape() as t1:
    # Discriminator outputs
    disc_out = forward(w, x)
  # Regular Loss
  fl = loss(disc_out)
  # Gradients with respect to the inputs
  g = t1.gradient(sum(disc_out), x)
  # Mean squared sum of the gradients

## 2017-Vaswani.jl
# Vaswani, Ashish, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Łukasz Kaiser, and Illia Polosukhin. "Attention is all you need." In Advances in neural information processing systems, pp. 5998-6008. 2017.

# [1] https://papers.nips.cc/paper/7181-attention-is-all-you-need/, https://arxiv.org/abs/1706.03762 (reference paper)
# [2] https://github.com/harvardnlp/annotated-transformer, http://nlp.seas.harvard.edu/2018/04/03/attention.html (reference implementation)
# [3] https://github.com/OpenNMT/OpenNMT-py/blob/master/onmt/modules/multi_headed_attn.py
# [4] https://github.com/tensorflow/tensor2tensor

# TODO: LabelSmoothing?

# include("debug.jl")

## plots-err.jl
┌ Info: Precompiling Plots [91a5bcdd-55d7-5caf-9e0b-520d859cae80]
└ @ Base loading.jl:1317
ERROR: LoadError: LoadError: InitError: could not load library "/root/.julia/artifacts/a84cc58d5161b950f268bb562e105bbbf4d6004a/lib/libGL.so"
/root/.julia/artifacts/a84cc58d5161b950f268bb562e105bbbf4d6004a/lib/libGL.so: undefined symbol: _glapi_tls_Current
Stacktrace:
  [1] dlopen(s::String, flags::UInt32; throw_error::Bool)
    @ Base.Libc.Libdl ./libdl.jl:114
  [2] dlopen(s::String, flags::UInt32)
    @ Base.Libc.Libdl ./libdl.jl:114
  [3] macro expansion

## sum.cu
__device__ void _sum_32_20_0(volatile float *x, int i) {
  //for optimizing warps
  //volatile must be used as register optimization will lead to wrong answers
  float ai, xi;
  ai=x[i]; xi=x[i+32]; x[i]=ai+xi;
  ai=x[i]; xi=x[i+16]; x[i]=ai+xi;
  ai=x[i]; xi=x[i+ 8]; x[i]=ai+xi;
  ai=x[i]; xi=x[i+ 4]; x[i]=ai+xi;
  ai=x[i]; xi=x[i+ 2]; x[i]=ai+xi;
  ai=x[i]; xi=x[i+ 1]; x[i]=ai+xi;

## ande.jl
using Knet
using CUDA

setoptim!(m, optimizer) = for p in params(m); p.opt = Knet.clone(optimizer); end

dice(x, y; smooth::Float32=1.f0) = (2*sum(y .* x) + smooth) / (sum(y.^2) + sum(x.^2) + smooth)
loss(x, y) = 1 - dice(x, y)

function minimize!(model, x::KnetArray, y::KnetArray)
    ld = @diff loss(model(x), y)

## attention.jl
# given some encoder states X, decoder state y, matrices Q, K, V
for i in 1:N
  keys[i]   = K * X[i]
  values[i] = V * X[i]
end
query = Q * y
for i in 1:N
  relevance[i] = query ⋅ keys[i]
end
relevance = softmax(relevance)

## emacs.txt
+buffers windows
+emacs daemon
+julia mode
+shell mode
+dired mode
+rectangle copy paste
+doctor, tetris
+calendar email
+ispell
+hexl mode
	fatal: error thrown and no exception handler available.
	InitError(mod=:PyCall, error=UndefVarError(var=:PYTHONHOME))
	ijl_undefined_var_error at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-9/src/rtutils.c:132
	ijl_get_binding_or_error at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-9/src/module.c:421
	__init__ at /kuacc/users/dyuret/.julia/packages/PyCall/twYvK/src/pyinit.jl:155
	jfptr___init___79335 at /userfiles/dyuret/sys.so (unknown line)
	_jl_invoke at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
	ijl_apply_generic at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-9/src/gf.c:2940
	jl_apply at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-9/src/julia.h:1880 [inlined]
	jl_module_run_initializer at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-9/src/toplevel.c:75
	[ Info: PackageCompiler: creating system image object file, this might take a while...
	ERROR: Method overwriting is not permitted during Module precompilation. Use `__precompile__(false)` to opt-out of precompilation.
	â”Œ Error: Failed to monkey-patch `julia`
	â”‚ exception = (Declaring __precompile__(false) is not allowed in files that are being precompiled., Union{Ptr{Nothing}, Base.InterpreterIP}[Ptr{Nothing} @0x00007f4f0e546078, Ptr{Nothing} @0x00007f4f0e5acd44, Ptr{Nothing} @0x00007f4f0e561970, Ptr{Nothing} @0x00007f4f0e5624b3, Ptr{Nothing} @0x00007f4f0e562e4d, Base.InterpreterIP in top-level CodeInfo for Base at statement 6, Ptr{Nothing} @0x00007f4f0e57e984, Ptr{Nothing} @0x00007f4f0e58054b, Ptr{Nothing} @0x00007f4ef6e61c6b, Ptr{Nothing} @0x00007f4ef6e61cb3, Ptr{Nothing} @0x00007f4f0e54399d, Ptr{Nothing} @0x00007f4f0e561824, Ptr{Nothing} @0x00007f4f0e561334, Ptr{Nothing} @0x00007f4f0e562073, Ptr{Nothing} @0x00007f4f0e56245f, Ptr{Nothing} @0x00007f4f0e562e4d, Base.InterpreterIP in top-level CodeInfo for
	/truba/home/dyuret/.julia/conda/3/x86_64/envs/llm/bin/deepspeed:4: DeprecationWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html
	__import__('pkg_resources').require('deepspeed==0.10.2+c69bd1f7')
	[2023-08-30 00:25:45,359] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
	[2023-08-30 00:25:46,642] [WARNING] [runner.py:201:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only.
	Detected CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7: setting --include=localhost:0,1,2,3,4,5,6,7
	[2023-08-30 00:25:46,643] [INFO] [runner.py:567:main] cmd = /truba/home/dyuret/.julia/conda/3/x86_64/envs/llm/bin/python3.11 -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMCwgMSwgMiwgMywgNCwgNSwgNiwgN119 --master_addr=127.0.0.1 --master_port=29500 --enable_each_rank_log=None main.py --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward
	# R1 regularization (Hypothetical)
	with tf.GradientTape() as t2:
	with tf.GradientTape() as t1:
	# Discriminator outputs
	disc_out = forward(w, x)
	# Regular Loss
	fl = loss(disc_out)
	# Gradients with respect to the inputs
	g = t1.gradient(sum(disc_out), x)
	# Mean squared sum of the gradients
	# Vaswani, Ashish, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Łukasz Kaiser, and Illia Polosukhin. "Attention is all you need." In Advances in neural information processing systems, pp. 5998-6008. 2017.

	# [1] https://papers.nips.cc/paper/7181-attention-is-all-you-need/, https://arxiv.org/abs/1706.03762 (reference paper)
	# [2] https://github.com/harvardnlp/annotated-transformer, http://nlp.seas.harvard.edu/2018/04/03/attention.html (reference implementation)
	# [3] https://github.com/OpenNMT/OpenNMT-py/blob/master/onmt/modules/multi_headed_attn.py
	# [4] https://github.com/tensorflow/tensor2tensor

	# TODO: LabelSmoothing?

	# include("debug.jl")
	┌ Info: Precompiling Plots [91a5bcdd-55d7-5caf-9e0b-520d859cae80]
	└ @ Base loading.jl:1317
	ERROR: LoadError: LoadError: InitError: could not load library "/root/.julia/artifacts/a84cc58d5161b950f268bb562e105bbbf4d6004a/lib/libGL.so"
	/root/.julia/artifacts/a84cc58d5161b950f268bb562e105bbbf4d6004a/lib/libGL.so: undefined symbol: _glapi_tls_Current
	Stacktrace:
	[1] dlopen(s::String, flags::UInt32; throw_error::Bool)
	@ Base.Libc.Libdl ./libdl.jl:114
	[2] dlopen(s::String, flags::UInt32)
	@ Base.Libc.Libdl ./libdl.jl:114
	[3] macro expansion
	__device__ void _sum_32_20_0(volatile float *x, int i) {
	//for optimizing warps
	//volatile must be used as register optimization will lead to wrong answers
	float ai, xi;
	ai=x[i]; xi=x[i+32]; x[i]=ai+xi;
	ai=x[i]; xi=x[i+16]; x[i]=ai+xi;
	ai=x[i]; xi=x[i+ 8]; x[i]=ai+xi;
	ai=x[i]; xi=x[i+ 4]; x[i]=ai+xi;
	ai=x[i]; xi=x[i+ 2]; x[i]=ai+xi;
	ai=x[i]; xi=x[i+ 1]; x[i]=ai+xi;
	using Knet
	using CUDA

	setoptim!(m, optimizer) = for p in params(m); p.opt = Knet.clone(optimizer); end

	dice(x, y; smooth::Float32=1.f0) = (2sum(y . x) + smooth) / (sum(y.^2) + sum(x.^2) + smooth)
	loss(x, y) = 1 - dice(x, y)

	function minimize!(model, x::KnetArray, y::KnetArray)
	ld = @diff loss(model(x), y)
	# given some encoder states X, decoder state y, matrices Q, K, V
	for i in 1:N
	keys[i] = K * X[i]
	values[i] = V * X[i]
	end
	query = Q * y
	for i in 1:N
	relevance[i] = query ⋅ keys[i]
	end
	relevance = softmax(relevance)
	+buffers windows
	+emacs daemon
	+julia mode
	+shell mode
	+dired mode
	+rectangle copy paste
	+doctor, tetris
	+calendar email
	+ispell
	+hexl mode