elikoga/replit.py

## replit.py
print("start")
import time
import datetime

start_dt = datetime.datetime.now()
start = start_dt.timestamp()
# format as execution start date, iso8601
print(f"importing time, finished at { start_dt.isoformat() }")
# convert start to float in epoch seconds

from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation.streamers import BaseStreamer

print(f"imported transformers, took {time.time() - start:.2f}s")
# import torch

# print("imported torch")

tokenizer = AutoTokenizer.from_pretrained(
    "replit/replit-code-v1-3b",
    trust_remote_code=True,
    revision="9eceafb041eb8abd565dabfbfadd328869140011",
)
print(f"loaded tokenizer, took {time.time() - start:.2f}s")
model = AutoModelForCausalLM.from_pretrained(
    "replit/replit-code-v1-3b",
    trust_remote_code=True,
    init_device="cuda:0",
    alibi=True,
    revision="9eceafb041eb8abd565dabfbfadd328869140011",
)
print(f"loaded model, took {time.time() - start:.2f}s")


# x = tokenizer.encode("def fibonacci(n): ", return_tensors="pt")
# print("encoded x")
# x = x.to(device="cuda:0")
# print("moved x")
# y = model.generate(
#     x,
#     max_length=100,
#     do_sample=True,
#     top_p=0.95,
#     top_k=4,
#     temperature=0.2,
#     num_return_sequences=1,
#     eos_token_id=tokenizer.eos_token_id,
# )
# print(f"generated {y=}")
# # decoding, clean_up_tokenization_spaces=False to ensure syntactical correctness
# generated_code = tokenizer.decode(
#     y[0], skip_special_tokens=True, clean_up_tokenization_spaces=False
# )
# print(generated_code)

import timeit

# generate 100 tokens, 10 times. Measure and print the average, minimum (rounded) and raw values.
lengths = []


def generate_once():
    x = tokenizer.encode("class AVeryLongClass: ", return_tensors="pt")
    x = x.to(device="cuda:0")
    input_length = x.shape[-1]
    print(f"input length: {input_length}")
    y = model.generate(
        x,
        max_length=100 + input_length,
        do_sample=True,
        top_p=0.95,
        top_k=4,
        temperature=0.2,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
    )
    output_length = y.shape[-1]
    lengths.append(output_length)
    print(
        f"output length: {output_length}, generated {output_length - input_length} tokens"
    )
    generated_code = tokenizer.decode(
        y[0], skip_special_tokens=True, clean_up_tokenization_spaces=False
    )
    print(generated_code)


# warm everything up :)
generate_once()


def measure_time():
    timers = timeit.Timer(generate_once).repeat(10, 1)
    times_with_tokens_generated = [(t, l) for t, l in zip(timers, lengths) if l > 0]
    print(
        f"Average: {sum(timers) / len(timers):.2f}s, Min: {min(timers):.2f}s, Raw: {times_with_tokens_generated}"
    )


if __name__ == "__main__":
    measure_time()

## zoutput.log
> python.exe .\replit.py
start
importing time, finished at 2023-05-04T01:42:40.666900
imported transformers, took 0.87s
loaded tokenizer, took 0.92s
C:\Users\USERNAME/.cache\huggingface\modules\transformers_modules\replit\replit-code-v1-3b\9eceafb041eb8abd565dabfbfadd328869140011\attention.py:290: UserWarning: Using `attn_impl: torch`. If your model does not use `alibi` or `prefix_lm` we recommend using `attn_impl: flash` otherwise we recommend using `attn_impl: triton`.
  warnings.warn(
You are using config.init_device='cuda:0', but you can also use config.init_device="meta" with Composer + FSDP for fast initialization.
loaded model, took 45.08s
input length: 8
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
output length: 108, generated 100 tokens
class AVeryLongClass: public AVeryLongClassBase {
public:
  AVeryLongClass() {}
  AVeryLongClass(const AVeryLongClass&) {}
  AVeryLongClass& operator=(const AVeryLongClass&) { return *this; }
  virtual ~AVeryLongClass() {}
  virtual void Print() const {}
};

class B : public A {
public:

input length: 8
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
output length: 108, generated 100 tokens
class AVeryLongClass: public AVeryLongClassBase {
public:
  AVeryLongClass(const AVeryLongClass&) = delete;
  AVeryLongClass& operator=(const AVeryLongClass&) = delete;
  AVeryLongClass(AVeryLongClass&&) = delete;
  AVeryLongClass& operator=(AVeryLongClass&&) = delete;
  ~AVeryLongClass() = default;
};

class
input length: 8
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
output length: 108, generated 100 tokens
class AVeryLongClass: public AVeryLongClassParent {
 public:
  AVeryLongClass() {}
  ~AVeryLongClass() {}
};

class BVeryLongClass: public BVeryLongClassParent {
 public:
  BVeryLongClass() {}
  ~BVeryLongClass() {}
};

class CVeryLongClass: public CVeryLongClassParent {

input length: 8
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
output length: 108, generated 100 tokens
class AVeryLongClass: public AVeryLongClassParent {
public:
  AVeryLongClass() {}
  virtual ~AVeryLongClass() {}
  virtual void foo() {}
};

class B : public AVeryLongClass {
public:
  B() {}
  virtual ~B() {}
  virtual void foo() {}
};

class C : public B {
public:

input length: 8
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
output length: 108, generated 100 tokens
class AVeryLongClass: public AVeryLongClassBase {
public:
  AVeryLongClass(const AVeryLongClass& other)
      : AVeryLongClassBase(other) {}
  AVeryLongClass(const AVeryLongClassBase& other)
      : AVeryLongClassBase(other) {}
  AVeryLongClass(const AVeryLongClassBase* other)
      : AVeryLongClassBase(other
input length: 8
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
output length: 108, generated 100 tokens
class AVeryLongClass: public AVeryLongClassBase {
public:
  AVeryLongClass(const std::string& name) : AVeryLongClassBase(name) {}
  virtual ~AVeryLongClass() {}

  virtual void Print() const {
    std::cout << "AVeryLongClass::Print()" << std::endl;
  }
};

class B : public A {
public:
  B(
input length: 8
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
output length: 108, generated 100 tokens
class AVeryLongClass: public AVeryLongClassBase {
 public:
  AVeryLongClass() : AVeryLongClassBase() {}
  AVeryLongClass(const AVeryLongClass&) : AVeryLongClassBase() {}
  AVeryLongClass(const AVeryLongClass&, const AVeryLongClass&)
      : AVeryLongClassBase() {}
  AVeryLongClass(const
input length: 8
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
output length: 108, generated 100 tokens
class AVeryLongClass: public AVeryLongClassBase {
  public:
    AVeryLongClass(const AVeryLongClass&) = delete;
    AVeryLongClass& operator=(const AVeryLongClass&) = delete;
    AVeryLongClass(AVeryLongClass&&) = delete;
    AVeryLongClass& operator=(AVeryLongClass&&) = delete;
    ~AVeryLongClass() = default;
};


input length: 8
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
output length: 108, generated 100 tokens
class AVeryLongClass: public AVeryLongClassBase {
public:
  AVeryLongClass(const AVeryLongClass&) = delete;
  AVeryLongClass& operator=(const AVeryLongClass&) = delete;
  AVeryLongClass(AVeryLongClass&&) = delete;
  AVeryLongClass& operator=(AVeryLongClass&&) = delete;
  ~AVeryLongClass() = default;
};

class
input length: 8
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
output length: 108, generated 100 tokens
class AVeryLongClass: public AVeryLongClassBase {
public:
  AVeryLongClass(const AVeryLongClass&) = delete;
  AVeryLongClass& operator=(const AVeryLongClass&) = delete;

  AVeryLongClass() {}
  ~AVeryLongClass() {}

  void foo() {}
};

class B : public A {
public:
  B(const B&
input length: 8
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
output length: 108, generated 100 tokens
class AVeryLongClass: public AVeryLongClassBase {
    public:
        AVeryLongClass(const AVeryLongClass&) = delete;
        AVeryLongClass& operator=(const AVeryLongClass&) = delete;
        AVeryLongClass(AVeryLongClass&&) = delete;
        AVeryLongClass& operator=(AVeryLongClass&&) = delete;
        AVeryLongClass() = default;
        ~AV
Average: 8.47s, Min: 8.42s, Raw: [(8.465242500002205, 108), (8.566006900000502, 108), (8.419325000002573, 108), (8.447737599999527, 108), (8.433483099994191, 108), (8.443644099999801, 108), (8.43692259999807, 108), (8.487394800002221, 108), (8.486435899998469, 108), (8.4917987000008, 108)]
	print("start")
	import time
	import datetime

	start_dt = datetime.datetime.now()
	start = start_dt.timestamp()
	# format as execution start date, iso8601
	print(f"importing time, finished at { start_dt.isoformat() }")
	# convert start to float in epoch seconds

	from transformers import AutoModelForCausalLM, AutoTokenizer
	from transformers.generation.streamers import BaseStreamer

	print(f"imported transformers, took {time.time() - start:.2f}s")
	# import torch

	# print("imported torch")

	tokenizer = AutoTokenizer.from_pretrained(
	"replit/replit-code-v1-3b",
	trust_remote_code=True,
	revision="9eceafb041eb8abd565dabfbfadd328869140011",
	)
	print(f"loaded tokenizer, took {time.time() - start:.2f}s")
	model = AutoModelForCausalLM.from_pretrained(
	"replit/replit-code-v1-3b",
	trust_remote_code=True,
	init_device="cuda:0",
	alibi=True,
	revision="9eceafb041eb8abd565dabfbfadd328869140011",
	)
	print(f"loaded model, took {time.time() - start:.2f}s")


	# x = tokenizer.encode("def fibonacci(n): ", return_tensors="pt")
	# print("encoded x")
	# x = x.to(device="cuda:0")
	# print("moved x")
	# y = model.generate(
	# x,
	# max_length=100,
	# do_sample=True,
	# top_p=0.95,
	# top_k=4,
	# temperature=0.2,
	# num_return_sequences=1,
	# eos_token_id=tokenizer.eos_token_id,
	# )
	# print(f"generated {y=}")
	# # decoding, clean_up_tokenization_spaces=False to ensure syntactical correctness
	# generated_code = tokenizer.decode(
	# y[0], skip_special_tokens=True, clean_up_tokenization_spaces=False
	# )
	# print(generated_code)

	import timeit

	# generate 100 tokens, 10 times. Measure and print the average, minimum (rounded) and raw values.
	lengths = []


	def generate_once():
	x = tokenizer.encode("class AVeryLongClass: ", return_tensors="pt")
	x = x.to(device="cuda:0")
	input_length = x.shape[-1]
	print(f"input length: {input_length}")
	y = model.generate(
	x,
	max_length=100 + input_length,
	do_sample=True,
	top_p=0.95,
	top_k=4,
	temperature=0.2,
	num_return_sequences=1,
	eos_token_id=tokenizer.eos_token_id,
	)
	output_length = y.shape[-1]
	lengths.append(output_length)
	print(
	f"output length: {output_length}, generated {output_length - input_length} tokens"
	)
	generated_code = tokenizer.decode(
	y[0], skip_special_tokens=True, clean_up_tokenization_spaces=False
	)
	print(generated_code)


	# warm everything up :)
	generate_once()


	def measure_time():
	timers = timeit.Timer(generate_once).repeat(10, 1)
	times_with_tokens_generated = [(t, l) for t, l in zip(timers, lengths) if l > 0]
	print(
	f"Average: {sum(timers) / len(timers):.2f}s, Min: {min(timers):.2f}s, Raw: {times_with_tokens_generated}"
	)


	if __name__ == "__main__":
	measure_time()
	> python.exe .\replit.py
	start
	importing time, finished at 2023-05-04T01:42:40.666900
	imported transformers, took 0.87s
	loaded tokenizer, took 0.92s
	C:\Users\USERNAME/.cache\huggingface\modules\transformers_modules\replit\replit-code-v1-3b\9eceafb041eb8abd565dabfbfadd328869140011\attention.py:290: UserWarning: Using `attn_impl: torch`. If your model does not use `alibi` or `prefix_lm` we recommend using `attn_impl: flash` otherwise we recommend using `attn_impl: triton`.
	warnings.warn(
	You are using config.init_device='cuda:0', but you can also use config.init_device="meta" with Composer + FSDP for fast initialization.
	loaded model, took 45.08s
	input length: 8
	The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
	Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
	output length: 108, generated 100 tokens
	class AVeryLongClass: public AVeryLongClassBase {
	public:
	AVeryLongClass() {}
	AVeryLongClass(const AVeryLongClass&) {}
	AVeryLongClass& operator=(const AVeryLongClass&) { return *this; }
	virtual ~AVeryLongClass() {}
	virtual void Print() const {}
	};

	class B : public A {
	public:

	input length: 8
	The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
	Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
	output length: 108, generated 100 tokens
	class AVeryLongClass: public AVeryLongClassBase {
	public:
	AVeryLongClass(const AVeryLongClass&) = delete;
	AVeryLongClass& operator=(const AVeryLongClass&) = delete;
	AVeryLongClass(AVeryLongClass&&) = delete;
	AVeryLongClass& operator=(AVeryLongClass&&) = delete;
	~AVeryLongClass() = default;
	};

	class
	input length: 8
	The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
	Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
	output length: 108, generated 100 tokens
	class AVeryLongClass: public AVeryLongClassParent {
	public:
	AVeryLongClass() {}
	~AVeryLongClass() {}
	};

	class BVeryLongClass: public BVeryLongClassParent {
	public:
	BVeryLongClass() {}
	~BVeryLongClass() {}
	};

	class CVeryLongClass: public CVeryLongClassParent {

	input length: 8
	The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
	Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
	output length: 108, generated 100 tokens
	class AVeryLongClass: public AVeryLongClassParent {
	public:
	AVeryLongClass() {}
	virtual ~AVeryLongClass() {}
	virtual void foo() {}
	};

	class B : public AVeryLongClass {
	public:
	B() {}
	virtual ~B() {}
	virtual void foo() {}
	};

	class C : public B {
	public:

	input length: 8
	The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
	Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
	output length: 108, generated 100 tokens
	class AVeryLongClass: public AVeryLongClassBase {
	public:
	AVeryLongClass(const AVeryLongClass& other)
	: AVeryLongClassBase(other) {}
	AVeryLongClass(const AVeryLongClassBase& other)
	: AVeryLongClassBase(other) {}
	AVeryLongClass(const AVeryLongClassBase* other)
	: AVeryLongClassBase(other
	input length: 8
	The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
	Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
	output length: 108, generated 100 tokens
	class AVeryLongClass: public AVeryLongClassBase {
	public:
	AVeryLongClass(const std::string& name) : AVeryLongClassBase(name) {}
	virtual ~AVeryLongClass() {}

	virtual void Print() const {
	std::cout << "AVeryLongClass::Print()" << std::endl;
	}
	};

	class B : public A {
	public:
	B(
	input length: 8
	The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
	Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
	output length: 108, generated 100 tokens
	class AVeryLongClass: public AVeryLongClassBase {
	public:
	AVeryLongClass() : AVeryLongClassBase() {}
	AVeryLongClass(const AVeryLongClass&) : AVeryLongClassBase() {}
	AVeryLongClass(const AVeryLongClass&, const AVeryLongClass&)
	: AVeryLongClassBase() {}
	AVeryLongClass(const
	input length: 8
	The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
	Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
	output length: 108, generated 100 tokens
	class AVeryLongClass: public AVeryLongClassBase {
	public:
	AVeryLongClass(const AVeryLongClass&) = delete;
	AVeryLongClass& operator=(const AVeryLongClass&) = delete;
	AVeryLongClass(AVeryLongClass&&) = delete;
	AVeryLongClass& operator=(AVeryLongClass&&) = delete;
	~AVeryLongClass() = default;
	};


	input length: 8
	The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
	Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
	output length: 108, generated 100 tokens
	class AVeryLongClass: public AVeryLongClassBase {
	public:
	AVeryLongClass(const AVeryLongClass&) = delete;
	AVeryLongClass& operator=(const AVeryLongClass&) = delete;
	AVeryLongClass(AVeryLongClass&&) = delete;
	AVeryLongClass& operator=(AVeryLongClass&&) = delete;
	~AVeryLongClass() = default;
	};

	class
	input length: 8
	The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
	Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
	output length: 108, generated 100 tokens
	class AVeryLongClass: public AVeryLongClassBase {
	public:
	AVeryLongClass(const AVeryLongClass&) = delete;
	AVeryLongClass& operator=(const AVeryLongClass&) = delete;

	AVeryLongClass() {}
	~AVeryLongClass() {}

	void foo() {}
	};

	class B : public A {
	public:
	B(const B&
	input length: 8
	The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
	Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
	output length: 108, generated 100 tokens
	class AVeryLongClass: public AVeryLongClassBase {
	public:
	AVeryLongClass(const AVeryLongClass&) = delete;
	AVeryLongClass& operator=(const AVeryLongClass&) = delete;
	AVeryLongClass(AVeryLongClass&&) = delete;
	AVeryLongClass& operator=(AVeryLongClass&&) = delete;
	AVeryLongClass() = default;
	~AV
	Average: 8.47s, Min: 8.42s, Raw: [(8.465242500002205, 108), (8.566006900000502, 108), (8.419325000002573, 108), (8.447737599999527, 108), (8.433483099994191, 108), (8.443644099999801, 108), (8.43692259999807, 108), (8.487394800002221, 108), (8.486435899998469, 108), (8.4917987000008, 108)]