This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from rapidfuzz import process, fuzz | |
def fuzzy_align(masterlist, list2, cutoff=70): | |
# Dictionary to hold matches | |
matches = {} | |
# Track used indices to avoid duplicate matches in the masterlist | |
used_indices = set() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
from email.parser import BytesParser | |
from pathlib import Path | |
import fire | |
import html2text | |
import pandas as pd | |
from tqdm import tqdm | |
# Setup logging |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import numpy as np | |
from datasets import ClassLabel, Dataset, DatasetDict | |
def split_dataset( | |
dataset: Dataset, | |
test_size=0.025, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
this script will upload a folder to Hugging Face Hub | |
python upload_folder.py --help | |
pip install fire huggingface-hub | |
""" | |
import logging |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import socket | |
import uuid | |
import yake | |
from flask import Flask, redirect, render_template_string, request, url_for | |
from markupsafe import escape | |
app = Flask(__name__) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import logging | |
def check_ampere_gpu(): | |
""" | |
Check if the GPU supports NVIDIA Ampere or later and enable FP32 in PyTorch if it does. | |
""" | |
# Check if CUDA is available | |
if not torch.cuda.is_available(): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
cli.py - Command line interface for textsum. | |
this edition: fast CPU inference with intel IPEX https://archive.ph/oY5b1 | |
Usage: | |
textsum-dir --help | |
""" | |
import os |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import logging | |
import re | |
from datetime import datetime | |
from pathlib import Path | |
import datasets | |
import evaluate | |
import fire | |
import intel_extension_for_pytorch as ipex |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding=utf-8 | |
# Copyright 2020 The HuggingFace Inc. team. All rights reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Summary | |
""" | |
import logging | |
from pathlib import Path | |
import fire | |
from datasets import Dataset, load_dataset | |
from tqdm.auto import tqdm | |
from transformers import AutoTokenizer |