Last active
May 3, 2023 23:50
-
-
Save elikoga/c300b9bf6b090fda9187644766347348 to your computer and use it in GitHub Desktop.
Running the replit model on my machine gives this:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print("start") | |
import time | |
import datetime | |
start_dt = datetime.datetime.now() | |
start = start_dt.timestamp() | |
# format as execution start date, iso8601 | |
print(f"importing time, finished at { start_dt.isoformat() }") | |
# convert start to float in epoch seconds | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from transformers.generation.streamers import BaseStreamer | |
print(f"imported transformers, took {time.time() - start:.2f}s") | |
# import torch | |
# print("imported torch") | |
tokenizer = AutoTokenizer.from_pretrained( | |
"replit/replit-code-v1-3b", | |
trust_remote_code=True, | |
revision="9eceafb041eb8abd565dabfbfadd328869140011", | |
) | |
print(f"loaded tokenizer, took {time.time() - start:.2f}s") | |
model = AutoModelForCausalLM.from_pretrained( | |
"replit/replit-code-v1-3b", | |
trust_remote_code=True, | |
init_device="cuda:0", | |
alibi=True, | |
revision="9eceafb041eb8abd565dabfbfadd328869140011", | |
) | |
print(f"loaded model, took {time.time() - start:.2f}s") | |
# x = tokenizer.encode("def fibonacci(n): ", return_tensors="pt") | |
# print("encoded x") | |
# x = x.to(device="cuda:0") | |
# print("moved x") | |
# y = model.generate( | |
# x, | |
# max_length=100, | |
# do_sample=True, | |
# top_p=0.95, | |
# top_k=4, | |
# temperature=0.2, | |
# num_return_sequences=1, | |
# eos_token_id=tokenizer.eos_token_id, | |
# ) | |
# print(f"generated {y=}") | |
# # decoding, clean_up_tokenization_spaces=False to ensure syntactical correctness | |
# generated_code = tokenizer.decode( | |
# y[0], skip_special_tokens=True, clean_up_tokenization_spaces=False | |
# ) | |
# print(generated_code) | |
import timeit | |
# generate 100 tokens, 10 times. Measure and print the average, minimum (rounded) and raw values. | |
lengths = [] | |
def generate_once(): | |
x = tokenizer.encode("class AVeryLongClass: ", return_tensors="pt") | |
x = x.to(device="cuda:0") | |
input_length = x.shape[-1] | |
print(f"input length: {input_length}") | |
y = model.generate( | |
x, | |
max_length=100 + input_length, | |
do_sample=True, | |
top_p=0.95, | |
top_k=4, | |
temperature=0.2, | |
num_return_sequences=1, | |
eos_token_id=tokenizer.eos_token_id, | |
) | |
output_length = y.shape[-1] | |
lengths.append(output_length) | |
print( | |
f"output length: {output_length}, generated {output_length - input_length} tokens" | |
) | |
generated_code = tokenizer.decode( | |
y[0], skip_special_tokens=True, clean_up_tokenization_spaces=False | |
) | |
print(generated_code) | |
# warm everything up :) | |
generate_once() | |
def measure_time(): | |
timers = timeit.Timer(generate_once).repeat(10, 1) | |
times_with_tokens_generated = [(t, l) for t, l in zip(timers, lengths) if l > 0] | |
print( | |
f"Average: {sum(timers) / len(timers):.2f}s, Min: {min(timers):.2f}s, Raw: {times_with_tokens_generated}" | |
) | |
if __name__ == "__main__": | |
measure_time() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
> python.exe .\replit.py | |
start | |
importing time, finished at 2023-05-04T01:42:40.666900 | |
imported transformers, took 0.87s | |
loaded tokenizer, took 0.92s | |
C:\Users\USERNAME/.cache\huggingface\modules\transformers_modules\replit\replit-code-v1-3b\9eceafb041eb8abd565dabfbfadd328869140011\attention.py:290: UserWarning: Using `attn_impl: torch`. If your model does not use `alibi` or `prefix_lm` we recommend using `attn_impl: flash` otherwise we recommend using `attn_impl: triton`. | |
warnings.warn( | |
You are using config.init_device='cuda:0', but you can also use config.init_device="meta" with Composer + FSDP for fast initialization. | |
loaded model, took 45.08s | |
input length: 8 | |
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. | |
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation. | |
output length: 108, generated 100 tokens | |
class AVeryLongClass: public AVeryLongClassBase { | |
public: | |
AVeryLongClass() {} | |
AVeryLongClass(const AVeryLongClass&) {} | |
AVeryLongClass& operator=(const AVeryLongClass&) { return *this; } | |
virtual ~AVeryLongClass() {} | |
virtual void Print() const {} | |
}; | |
class B : public A { | |
public: | |
input length: 8 | |
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. | |
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation. | |
output length: 108, generated 100 tokens | |
class AVeryLongClass: public AVeryLongClassBase { | |
public: | |
AVeryLongClass(const AVeryLongClass&) = delete; | |
AVeryLongClass& operator=(const AVeryLongClass&) = delete; | |
AVeryLongClass(AVeryLongClass&&) = delete; | |
AVeryLongClass& operator=(AVeryLongClass&&) = delete; | |
~AVeryLongClass() = default; | |
}; | |
class | |
input length: 8 | |
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. | |
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation. | |
output length: 108, generated 100 tokens | |
class AVeryLongClass: public AVeryLongClassParent { | |
public: | |
AVeryLongClass() {} | |
~AVeryLongClass() {} | |
}; | |
class BVeryLongClass: public BVeryLongClassParent { | |
public: | |
BVeryLongClass() {} | |
~BVeryLongClass() {} | |
}; | |
class CVeryLongClass: public CVeryLongClassParent { | |
input length: 8 | |
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. | |
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation. | |
output length: 108, generated 100 tokens | |
class AVeryLongClass: public AVeryLongClassParent { | |
public: | |
AVeryLongClass() {} | |
virtual ~AVeryLongClass() {} | |
virtual void foo() {} | |
}; | |
class B : public AVeryLongClass { | |
public: | |
B() {} | |
virtual ~B() {} | |
virtual void foo() {} | |
}; | |
class C : public B { | |
public: | |
input length: 8 | |
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. | |
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation. | |
output length: 108, generated 100 tokens | |
class AVeryLongClass: public AVeryLongClassBase { | |
public: | |
AVeryLongClass(const AVeryLongClass& other) | |
: AVeryLongClassBase(other) {} | |
AVeryLongClass(const AVeryLongClassBase& other) | |
: AVeryLongClassBase(other) {} | |
AVeryLongClass(const AVeryLongClassBase* other) | |
: AVeryLongClassBase(other | |
input length: 8 | |
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. | |
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation. | |
output length: 108, generated 100 tokens | |
class AVeryLongClass: public AVeryLongClassBase { | |
public: | |
AVeryLongClass(const std::string& name) : AVeryLongClassBase(name) {} | |
virtual ~AVeryLongClass() {} | |
virtual void Print() const { | |
std::cout << "AVeryLongClass::Print()" << std::endl; | |
} | |
}; | |
class B : public A { | |
public: | |
B( | |
input length: 8 | |
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. | |
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation. | |
output length: 108, generated 100 tokens | |
class AVeryLongClass: public AVeryLongClassBase { | |
public: | |
AVeryLongClass() : AVeryLongClassBase() {} | |
AVeryLongClass(const AVeryLongClass&) : AVeryLongClassBase() {} | |
AVeryLongClass(const AVeryLongClass&, const AVeryLongClass&) | |
: AVeryLongClassBase() {} | |
AVeryLongClass(const | |
input length: 8 | |
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. | |
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation. | |
output length: 108, generated 100 tokens | |
class AVeryLongClass: public AVeryLongClassBase { | |
public: | |
AVeryLongClass(const AVeryLongClass&) = delete; | |
AVeryLongClass& operator=(const AVeryLongClass&) = delete; | |
AVeryLongClass(AVeryLongClass&&) = delete; | |
AVeryLongClass& operator=(AVeryLongClass&&) = delete; | |
~AVeryLongClass() = default; | |
}; | |
input length: 8 | |
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. | |
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation. | |
output length: 108, generated 100 tokens | |
class AVeryLongClass: public AVeryLongClassBase { | |
public: | |
AVeryLongClass(const AVeryLongClass&) = delete; | |
AVeryLongClass& operator=(const AVeryLongClass&) = delete; | |
AVeryLongClass(AVeryLongClass&&) = delete; | |
AVeryLongClass& operator=(AVeryLongClass&&) = delete; | |
~AVeryLongClass() = default; | |
}; | |
class | |
input length: 8 | |
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. | |
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation. | |
output length: 108, generated 100 tokens | |
class AVeryLongClass: public AVeryLongClassBase { | |
public: | |
AVeryLongClass(const AVeryLongClass&) = delete; | |
AVeryLongClass& operator=(const AVeryLongClass&) = delete; | |
AVeryLongClass() {} | |
~AVeryLongClass() {} | |
void foo() {} | |
}; | |
class B : public A { | |
public: | |
B(const B& | |
input length: 8 | |
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. | |
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation. | |
output length: 108, generated 100 tokens | |
class AVeryLongClass: public AVeryLongClassBase { | |
public: | |
AVeryLongClass(const AVeryLongClass&) = delete; | |
AVeryLongClass& operator=(const AVeryLongClass&) = delete; | |
AVeryLongClass(AVeryLongClass&&) = delete; | |
AVeryLongClass& operator=(AVeryLongClass&&) = delete; | |
AVeryLongClass() = default; | |
~AV | |
Average: 8.47s, Min: 8.42s, Raw: [(8.465242500002205, 108), (8.566006900000502, 108), (8.419325000002573, 108), (8.447737599999527, 108), (8.433483099994191, 108), (8.443644099999801, 108), (8.43692259999807, 108), (8.487394800002221, 108), (8.486435899998469, 108), (8.4917987000008, 108)] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment