This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## to make it easier to work with the paths in torchtune | |
path_model_downloaded: /home/mike/_torch_tune/_xmodel_downloaded/MetaLlama-3.1-8B | |
path_model_finetuned: /home/mike/_torch_tune/_xmodel_finetuned/MetaLlama-3.1-8B_finetuned | |
# Tokenizer | |
tokenizer: | |
_component_: torchtune.models.llama3.llama3_tokenizer | |
path: ${path_model_downloaded}/original/tokenizer.model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## this gist include some scripts to work with Torchtune | |
from huggingface_hub import snapshot_download | |
from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError | |
from pathlib import Path | |
def download_model(model_id: str, output_dir: str = None, hf_token: str = None, ignore_patterns: list = ["*.safetensors"]) -> None: | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pandas as pd | |
import random | |
from transformers import AutoTokenizer | |
xFld = '....' # folder with a bunch of text files | |
xtok_tl = AutoTokenizer.from_pretrained('unsloth/tinyllama-bnb-4bit') | |
xtok_ge = AutoTokenizer.from_pretrained('unsloth/gemma-2b-bnb-4bit') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
Created on Fri Feb 23 05:27:37 2024 | |
@author: mike | |
""" | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
FROM: https://gist.github.com/sobolevnrm/412763ebae5424a92d3239898b615e2a | |
Process RIS format following the standard at", | |
http://referencemanager.com/sites/rm/files/m/direct_export_ris.pdf """ | |
import re | |
ALLOWED_TAGS = {"TY" : "Record start", | |
"ER" : "Record end", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# get photo from unsplash | |
# depends on python-unsplash | |
# !pip install python-unsplash | |
from unsplash.api import Api | |
from unsplash.auth import Auth | |
import requests | |
from PIL import Image | |
from io import BytesIO |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# create style | |
from docx import Document | |
# create style | |
from docx.enum.style import WD_STYLE_TYPE | |
from docx.shared import Inches, Pt | |
## TO DO | |
## [1] add an image in a cell |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from string import Template | |
xDocHead = '''<!DOCTYPE html> | |
<html><head> | |
<meta http-equiv="content-type" content="text/html; charset=windows-1252"> | |
<style> | |
.title_orange { | |
font-family: Verdana, Geneva, sans-serif; | |
font-size: 14px; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from base64 import b64encode | |
import requests | |
import json | |
UrlAuth = 'https://ops.epo.org/3.2/auth/accesstoken' | |
UrlServiceBase = 'https://ops.epo.org/3.2/rest-services/published-data' | |
UrlServiceBaseSearch = UrlServiceBase + '/search/biblio/?q=' | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from scipy.optimize import linear_sum_assignment | |
def make_assignments(xDF): | |
''' | |
a simple wrapper around the | |
scipy.optimize.linear_sum_assignment | |
which implements the Hungarian Algorithm |
NewerOlder