Skip to content

Instantly share code, notes, and snippets.

View alvarobartt's full-sized avatar
🦀
writing unreadable rust code atm

Alvaro Bartolome alvarobartt

🦀
writing unreadable rust code atm
View GitHub Profile
from distilabel.llms import (
AnthropicLLM,
InferenceEndpointsLLM,
OpenAILLM,
)
from distilabel.pipeline import Pipeline
from distilabel.steps import (
CombineColumns,
KeepColumns,
LoadDataFromDicts,
@alvarobartt
alvarobartt / steps-unpacked-constant-length-dataset.py
Created December 21, 2023 09:30
Unpacks a 🤗`trl.ConstantLengthDataset` to estimate how many steps is one epoch to avoid wrong epoch estimations
# Usage:
# python steps-unpacked-constant-length-dataset.py \
# --dataset-path "argilla/ultrafeedback-binarized-preferences-cleaned" \
# --hf-tokenizer "alignment-handbook/zephyr-7b-sft-full" \
# --gradient-accumulation-steps 2 \
# --per-eval-batch-size 32 \
# --num-devices 8 \
# --max-seq-length 2048 \
# --num-of-sequences 1024 \
# --chars-per-token 3.6
@alvarobartt
alvarobartt / causallm-to-hub.py
Last active December 16, 2023 17:52
Upload a 🤗`transformers.AutoModelForCausalLM` and its tokenizer from local files into the Hub
# Usage: python upload.py --dir <dir> --hub-name <hub_name>
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import argparse
def get_args():
parser = argparse.ArgumentParser()
@alvarobartt
alvarobartt / dpo-qlora-4bit.py
Last active April 25, 2024 09:42
DPO fine-tuning using `trl.DPOTrainer` and Q-LoRA (4-bit)
import torch
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
from transformers import AutoTokenizer, AutoModelForCausalLM
from trl import DPOTrainer
if __name__ == "__main__":
model_name = "..."
dataset = load_dataset(...)
@alvarobartt
alvarobartt / gmt-timezone.py
Created April 21, 2020 11:18
Calculate GMT Time Zone using Python
# Copyright 2020 Alvaro Bartolome @ alvarobartt in GitHub
# ____ ___ ___ ______ ______ ____ ___ ___ ___ _____ ___ ____ ___
# / || | || | | || || | | / _]| | / \ | \ / _]
# | __|| _ _ || | | | | | | _ _ | / [_ |__/ || || _ | / [_
# | | || \_/ ||_| |_| |_| |_| | | | \_/ || _]| __|| O || | || _]
# | |_ || | | | | | | | | | | || [_ | / || || | || [_
# | || | | | | | | | | | | || || || || | || |
# |___,_||___|___| |__| |__| |____||___|___||_____||_____| \___/ |__|__||_____|
@alvarobartt
alvarobartt / investpy_search.py
Created December 17, 2019 20:00
investpy search functionallity explained
# Required investpy version 0.9.10 or higher (0.9.12 recommended)
import investpy
# As asked on issue #86 -> https://github.com/alvarob96/investpy/issues/86
# this gist will explain `investpy.search_text` function usage for Swedish
# bonds that are not statically stored on investpy/resources/bonds/bonds.csv
# file, since its data is not provided by Investing on its general government
# bonds listing.
search_results = investpy.search_text(text='Sweden 10 Year', count=10)
@alvarobartt
alvarobartt / investpy_stocks.py
Created October 23, 2019 12:47
This is a sample on investpy usage when it comes to stock data retrieval, as current release is 0.9.7.
import investpy
print(investpy.__author__, investpy.__version__)
# Retrieve a `pandas.DataFrame` containing all the Stocks from United States
stocks = investpy.get_stocks(country='united states')
# Retrieve historical data from AAPL
df = investpy.get_stock_historical_data(stock='AAPL', # Stock symbol of the stock to retrieve data from
country='united states', # Country from where the introduced stock is from
@alvarobartt
alvarobartt / identify_all_trends.py
Last active February 4, 2023 10:00
trendet - is a Python package for trend detection on stock time series data
@alvarobartt
alvarobartt / world_etfs.py
Last active June 10, 2019 16:40
Test investpy (https://github.com/alvarob96/investpy) functions on World ETF Retrieval
#!/usr/bin/env python
# Copyright 2018-2019 Alvaro Bartolome
# See LICENSE for details.
__author__ = "Alvaro Bartolome <alvarob96@usal.es>"
import investpy
import pandas as pd
import datetime
# Python3.x
import investpy
import pandas as pd
import random
from pprint import pprint
# This function retrieves all the available equities indexed on es.Investing.com