Skip to content

Instantly share code, notes, and snippets.

View goddoe's full-sized avatar

Sungju Kim goddoe

View GitHub Profile
@goddoe
goddoe / 8k_to_4k.py
Last active December 8, 2023 05:41
8k_to_4k.py
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
input_path = "./model_in"
output_path = "./model_out"
max_shard_size = "5GB"
new_max_length = 4096
print("load model...start")
@goddoe
goddoe / extract_code.py
Created November 28, 2023 14:48
extract_code.py
def extract_code(inputs: dict) -> dict:
text = inputs["text"]
result = re.search(r'```.*?\n(.*?)\n```', text, re.DOTALL)
result = result.group(1) if result else text
return {"output": result}
@goddoe
goddoe / script_dir.sh
Created November 6, 2023 22:10
script_dir
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)
cd $SCRIPT_DIR
@goddoe
goddoe / finetune_llama_v2.py
Created October 3, 2023 08:22 — forked from younesbelkada/finetune_llama_v2.py
Fine tune Llama v2 models on Guanaco Dataset
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
@goddoe
goddoe / pip_install_fast.sh
Created September 21, 2023 14:12
pip_install_fast.sh
Tested this works https://stackoverflow.com/a/57014278/6147756
Single command:
MAKEFLAGS="-j$(nproc)" pip install xxx
Enable for all commands in a script:
export MAKEFLAGS="-j$(nproc)"
@goddoe
goddoe / find_available_ports.py
Created September 14, 2023 17:18
find_available_ports.py
def get_available_ports(port_from=5000, port_to=6000):
import socket
for port in range(port_from, port_to):
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind(('localhost', port))
s.close()
yield port
except:
continue
def strip_codeblock(code_with_codeblock):
first_newline_idx = code_with_codeblock.find("\n")
code_tmp = code_with_codeblock[first_newline_idx+1:]
code_closing_idx = code_tmp.find("```")
code_striped = code_tmp[:code_closing_idx]
return code_striped
@goddoe
goddoe / curl_streaming.sh
Created July 20, 2023 03:04
curl_streaming.sh
curl -N -X 'POST' \ [12:04:09]
'http://localhost:8080/v1/chat-streaming' \
-H 'Content-Type: application/json' \
-H 'Accept: text/event-stream' \
-d '{
"turns": [
{
"role": "user",
"content": "fastapi 사용법"
}
@goddoe
goddoe / datatype.py
Created July 13, 2023 16:17
datatype.py
class DataType(str, Enum):
WITH_SUFFIX = "WITH_SUFFIX"
WITHOUT_SUFFIX = "WITHOUT_SUFFIX"
@goddoe
goddoe / reservoir_sampling.py
Created July 13, 2023 09:09
파일이 너무 커서 한 번에 메모리에 로드할 수 없는 경우, 간단한 무작위 샘플링은 불가능합니다. 그러나, 이 문제를 해결하기 위해 Reservoir Sampling이라는 알고리즘이 있습니다. 이 알고리즘은 스트림에서 무작위로 샘플을 선택하는 데 사용됩니다.
import random
def reservoir_sampling(file_name, k):
sample = []
with open(file_name, 'r') as f:
f.seek(0, 2) # 파일의 끝으로 이동
filesize = f.tell() # 파일의 크기를 얻음 (바이트)
random_set = sorted(random.sample(range(filesize), k))