Skip to content

Instantly share code, notes, and snippets.

@elikoga
Last active May 3, 2023 23:50
Show Gist options
  • Save elikoga/c300b9bf6b090fda9187644766347348 to your computer and use it in GitHub Desktop.
Save elikoga/c300b9bf6b090fda9187644766347348 to your computer and use it in GitHub Desktop.
Running the replit model on my machine gives this:
print("start")
import time
import datetime
start_dt = datetime.datetime.now()
start = start_dt.timestamp()
# format as execution start date, iso8601
print(f"importing time, finished at { start_dt.isoformat() }")
# convert start to float in epoch seconds
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation.streamers import BaseStreamer
print(f"imported transformers, took {time.time() - start:.2f}s")
# import torch
# print("imported torch")
tokenizer = AutoTokenizer.from_pretrained(
"replit/replit-code-v1-3b",
trust_remote_code=True,
revision="9eceafb041eb8abd565dabfbfadd328869140011",
)
print(f"loaded tokenizer, took {time.time() - start:.2f}s")
model = AutoModelForCausalLM.from_pretrained(
"replit/replit-code-v1-3b",
trust_remote_code=True,
init_device="cuda:0",
alibi=True,
revision="9eceafb041eb8abd565dabfbfadd328869140011",
)
print(f"loaded model, took {time.time() - start:.2f}s")
# x = tokenizer.encode("def fibonacci(n): ", return_tensors="pt")
# print("encoded x")
# x = x.to(device="cuda:0")
# print("moved x")
# y = model.generate(
# x,
# max_length=100,
# do_sample=True,
# top_p=0.95,
# top_k=4,
# temperature=0.2,
# num_return_sequences=1,
# eos_token_id=tokenizer.eos_token_id,
# )
# print(f"generated {y=}")
# # decoding, clean_up_tokenization_spaces=False to ensure syntactical correctness
# generated_code = tokenizer.decode(
# y[0], skip_special_tokens=True, clean_up_tokenization_spaces=False
# )
# print(generated_code)
import timeit
# generate 100 tokens, 10 times. Measure and print the average, minimum (rounded) and raw values.
lengths = []
def generate_once():
x = tokenizer.encode("class AVeryLongClass: ", return_tensors="pt")
x = x.to(device="cuda:0")
input_length = x.shape[-1]
print(f"input length: {input_length}")
y = model.generate(
x,
max_length=100 + input_length,
do_sample=True,
top_p=0.95,
top_k=4,
temperature=0.2,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id,
)
output_length = y.shape[-1]
lengths.append(output_length)
print(
f"output length: {output_length}, generated {output_length - input_length} tokens"
)
generated_code = tokenizer.decode(
y[0], skip_special_tokens=True, clean_up_tokenization_spaces=False
)
print(generated_code)
# warm everything up :)
generate_once()
def measure_time():
timers = timeit.Timer(generate_once).repeat(10, 1)
times_with_tokens_generated = [(t, l) for t, l in zip(timers, lengths) if l > 0]
print(
f"Average: {sum(timers) / len(timers):.2f}s, Min: {min(timers):.2f}s, Raw: {times_with_tokens_generated}"
)
if __name__ == "__main__":
measure_time()
> python.exe .\replit.py
start
importing time, finished at 2023-05-04T01:42:40.666900
imported transformers, took 0.87s
loaded tokenizer, took 0.92s
C:\Users\USERNAME/.cache\huggingface\modules\transformers_modules\replit\replit-code-v1-3b\9eceafb041eb8abd565dabfbfadd328869140011\attention.py:290: UserWarning: Using `attn_impl: torch`. If your model does not use `alibi` or `prefix_lm` we recommend using `attn_impl: flash` otherwise we recommend using `attn_impl: triton`.
warnings.warn(
You are using config.init_device='cuda:0', but you can also use config.init_device="meta" with Composer + FSDP for fast initialization.
loaded model, took 45.08s
input length: 8
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
output length: 108, generated 100 tokens
class AVeryLongClass: public AVeryLongClassBase {
public:
AVeryLongClass() {}
AVeryLongClass(const AVeryLongClass&) {}
AVeryLongClass& operator=(const AVeryLongClass&) { return *this; }
virtual ~AVeryLongClass() {}
virtual void Print() const {}
};
class B : public A {
public:
input length: 8
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
output length: 108, generated 100 tokens
class AVeryLongClass: public AVeryLongClassBase {
public:
AVeryLongClass(const AVeryLongClass&) = delete;
AVeryLongClass& operator=(const AVeryLongClass&) = delete;
AVeryLongClass(AVeryLongClass&&) = delete;
AVeryLongClass& operator=(AVeryLongClass&&) = delete;
~AVeryLongClass() = default;
};
class
input length: 8
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
output length: 108, generated 100 tokens
class AVeryLongClass: public AVeryLongClassParent {
public:
AVeryLongClass() {}
~AVeryLongClass() {}
};
class BVeryLongClass: public BVeryLongClassParent {
public:
BVeryLongClass() {}
~BVeryLongClass() {}
};
class CVeryLongClass: public CVeryLongClassParent {
input length: 8
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
output length: 108, generated 100 tokens
class AVeryLongClass: public AVeryLongClassParent {
public:
AVeryLongClass() {}
virtual ~AVeryLongClass() {}
virtual void foo() {}
};
class B : public AVeryLongClass {
public:
B() {}
virtual ~B() {}
virtual void foo() {}
};
class C : public B {
public:
input length: 8
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
output length: 108, generated 100 tokens
class AVeryLongClass: public AVeryLongClassBase {
public:
AVeryLongClass(const AVeryLongClass& other)
: AVeryLongClassBase(other) {}
AVeryLongClass(const AVeryLongClassBase& other)
: AVeryLongClassBase(other) {}
AVeryLongClass(const AVeryLongClassBase* other)
: AVeryLongClassBase(other
input length: 8
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
output length: 108, generated 100 tokens
class AVeryLongClass: public AVeryLongClassBase {
public:
AVeryLongClass(const std::string& name) : AVeryLongClassBase(name) {}
virtual ~AVeryLongClass() {}
virtual void Print() const {
std::cout << "AVeryLongClass::Print()" << std::endl;
}
};
class B : public A {
public:
B(
input length: 8
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
output length: 108, generated 100 tokens
class AVeryLongClass: public AVeryLongClassBase {
public:
AVeryLongClass() : AVeryLongClassBase() {}
AVeryLongClass(const AVeryLongClass&) : AVeryLongClassBase() {}
AVeryLongClass(const AVeryLongClass&, const AVeryLongClass&)
: AVeryLongClassBase() {}
AVeryLongClass(const
input length: 8
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
output length: 108, generated 100 tokens
class AVeryLongClass: public AVeryLongClassBase {
public:
AVeryLongClass(const AVeryLongClass&) = delete;
AVeryLongClass& operator=(const AVeryLongClass&) = delete;
AVeryLongClass(AVeryLongClass&&) = delete;
AVeryLongClass& operator=(AVeryLongClass&&) = delete;
~AVeryLongClass() = default;
};
input length: 8
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
output length: 108, generated 100 tokens
class AVeryLongClass: public AVeryLongClassBase {
public:
AVeryLongClass(const AVeryLongClass&) = delete;
AVeryLongClass& operator=(const AVeryLongClass&) = delete;
AVeryLongClass(AVeryLongClass&&) = delete;
AVeryLongClass& operator=(AVeryLongClass&&) = delete;
~AVeryLongClass() = default;
};
class
input length: 8
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
output length: 108, generated 100 tokens
class AVeryLongClass: public AVeryLongClassBase {
public:
AVeryLongClass(const AVeryLongClass&) = delete;
AVeryLongClass& operator=(const AVeryLongClass&) = delete;
AVeryLongClass() {}
~AVeryLongClass() {}
void foo() {}
};
class B : public A {
public:
B(const B&
input length: 8
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
output length: 108, generated 100 tokens
class AVeryLongClass: public AVeryLongClassBase {
public:
AVeryLongClass(const AVeryLongClass&) = delete;
AVeryLongClass& operator=(const AVeryLongClass&) = delete;
AVeryLongClass(AVeryLongClass&&) = delete;
AVeryLongClass& operator=(AVeryLongClass&&) = delete;
AVeryLongClass() = default;
~AV
Average: 8.47s, Min: 8.42s, Raw: [(8.465242500002205, 108), (8.566006900000502, 108), (8.419325000002573, 108), (8.447737599999527, 108), (8.433483099994191, 108), (8.443644099999801, 108), (8.43692259999807, 108), (8.487394800002221, 108), (8.486435899998469, 108), (8.4917987000008, 108)]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment