Skip to content

Instantly share code, notes, and snippets.

@aurotripathy
aurotripathy / flowsettings.py
Last active June 4, 2025 21:15
Kotaemon flowsettings.py file for configuring the project
import os
from importlib.metadata import version
from inspect import currentframe, getframeinfo
from pathlib import Path
from decouple import config
from ktem.utils.lang import SUPPORTED_LANGUAGE_MAP
from theflow.settings.default import * # noqa
cur_frame = currentframe()
# from https://community.openai.com/t/easy-way-to-get-a-context-window-for-a-model/552099/4
# the hard way...
# Makes a gigantic meaningless OpenAI chat-completion promp call into the vLLM server
# Parses the (error) return and determines the actual context window supported
# Usage: python context-window-discovery.py --model gpt-4o-mini --base-url http://localhost:8080/v1
from openai import OpenAI
import re
import argparse
@aurotripathy
aurotripathy / RNGD-sample-RAG-App.py
Last active May 7, 2025 15:57
A canonical RAG sample application. Uses (furiosa-llm server + OpenAI embeddings + Chroma Vector DB + LangChain framework)
# A canonical RAG sample application
# Uses (furiosa-llm server + OpenAI embeddings + Chroma Vector DB + LangChain framework)
# 100% cursor-generated code
# Needs a text file in the documents directory,
# I used the doc, https://gist.github.com/wey-gu/75d49362d011a0f0354d39e396404ba2
from typing import List, Dict, Optional
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from openai import OpenAI
from dotenv import load_dotenv
import numpy as np
load_dotenv()
def chat_with_gpt(messages):
client = OpenAI()
try:
completion = client.chat.completions.create(
model="gpt-3.5-turbo",
"""
MistralForCausalLM(
(model): MistralModel(
(embed_tokens): Embedding(131072, 5120)
(layers): ModuleList(
(0-39): 40 x MistralDecoderLayer(
(self_attn): MistralAttention(
(q_proj): Linear(in_features=5120, out_features=4096, bias=False)
(k_proj): Linear(in_features=5120, out_features=1024, bias=False)
(v_proj): Linear(in_features=5120, out_features=1024, bias=False)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@aurotripathy
aurotripathy / furiosa-rngd-tool-calling-example.ipynb
Last active December 16, 2024 20:53
Furiosa RNGD Tool Calling Example
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@aurotripathy
aurotripathy / llama3.1-tool-calling-with-local-furiosa-llm.py
Last active December 11, 2024 12:32
llama3.1-tool-calling-with-local-llm.py
# attribution : https://github.com/AgiFlow/llama31/blob/main/tool_calls.ipynb
from furiosa_llm import LLM, SamplingParams
prompt = """
<|begin_of_text|>
<|start_header_id|>system<|end_header_id|>
You are a helpful assistant with tool calling capabilities. When you receive a tool call response, use the output to format an answer to the orginal use question.
If you are using tools, respond in the format {"name": function name, "parameters": dictionary of function arguments}. Do not use variables.
Enter a number: 10
[0, 1, 1, 2, 3, 5, 8, 13, 21, 34]
def fibonacci(n: int):
"""Return a fibonacci series upto the argument n"""
# As prefix, we provided the function proto, a docstring...
# <codestral will fill in the middle, i.e., the algorithm>
# ...and the invocation (the suffix)