Skip to content

Instantly share code, notes, and snippets.

@amrakm
amrakm / token_counter.py
Last active April 19, 2023 19:18
gpt token counter
import tiktoken
tokenizer = tiktoken.get_encoding('p50k_base')
# create the length function
def tiktoken_len(text):
tokens = tokenizer.encode(
text,
disallowed_special=()
)
@amrakm
amrakm / langchain_text_splitter.py
Created April 11, 2023 18:47
langchain text splitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=20,
length_function=tiktoken_len,
separators=["\n\n", "\n", " ", ""]
)
@amrakm
amrakm / prettyprint_json.py
Created December 3, 2022 16:31
print json with indentation
import json
dict = json.loads(response.text)
print(json.dumps(dict, indent=2))
@amrakm
amrakm / firebase_to_pandas.py
Created November 28, 2022 03:31
firebase to pandas dataframe
import pandas as pd
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
cred = credentials.Certificate("XXX.json")
firebase_admin.initialize_app(cred)
@amrakm
amrakm / expand_tiled_images.py
Created November 26, 2022 11:37
stitch tiled images using pillow #python
from PIL import Image
def expand_tile_pil_img(pil_img):
blank_image = Image.new("RGB", (pil_img.size[0] * 2, pil_img.size[1] * 2))
blank_image.paste(pil_img, (0,0))
blank_image.paste(pil_img, (pil_img.size[0], 0))
blank_image.paste(pil_img, (0, pil_img.size[1]))
@amrakm
amrakm / stablediffusion_reproducible_seeds.py
Created November 21, 2022 21:17
reproducible stablediffusion using seeds
import torch
from diffusers import StableDiffusionPipeline
pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", use_auth_token=True).to("cuda")
seed= 111111
height =512
width = 512
device = 'cuda'
@amrakm
amrakm / colab_using_local_runtime.sh
Created October 5, 2022 09:32
connecting google colab to local run time
jupyter notebook \
--NotebookApp.allow_origin='https://colab.research.google.com' \
--port=8888 \
--NotebookApp.port_retries=0
@amrakm
amrakm / print_GPU_name_and_memory.py
Created October 4, 2022 22:04
print_GPU_name_and_memory
#@markdown **NVIDIA GPU**
import subprocess
sub_p_res = subprocess.run(['nvidia-smi', '--query-gpu=name,memory.total,memory.free', '--format=csv,noheader'], stdout=subprocess.PIPE).stdout.decode('utf-8')
print(sub_p_res)
@amrakm
amrakm / split_reviews_into_multi_parts.py
Created October 4, 2022 15:04
split reviews into multiple parts based on max_token size
# split reviews into multiple parts based on max_token size, appending one sentence at a time until the part hits the max token limit
def split_rev(rev, max_tokens = 384):
rev_sentences = rev.split('.')
parts_list = []
@amrakm
amrakm / import_or_install.py
Created September 26, 2022 10:47
import or install dependencies from within python
import pip
def import_or_install(package):
try:
__import__(package)
except ImportError:
pip.main(["install", package])