Last active
March 15, 2023 22:56
-
-
Save pszemraj/df14338ba632a2879ec538e452c637c2 to your computer and use it in GitHub Desktop.
download "gauntlet" for summarization (peter's version) and run summarization inference on it with the textsum package
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
URL=https://www.dropbox.com/sh/zu1p7rhg5238a5y/AABsJN_pCYf9plSDZY8ziKATa?dl=1 | |
wget -O docs.zip $URL | |
unzip -B -j docs.zip -d gauntlet && rm -rf docs.zip |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# run pip install textsum or pip install textsum[8bit] | |
# Set-able options | |
NUM_BEAMS=4 | |
BATCH_SIZE=16384 | |
MODEL="pszemraj/long-t5-tglobal-base-16384-book-summary" | |
# Extract the base name of the model | |
MODEL_BASENAME=$(basename "${MODEL}") | |
# Input directory | |
INPUT_DIR="gauntlet/" | |
# Output directory | |
OUTPUT_DIR="${MODEL_BASENAME}_output" | |
# Create output directory if it doesn't exist | |
mkdir -p "${OUTPUT_DIR}" | |
# Run the textsum-dir command with the specified options | |
# add -8bit if you want to do LLM.int8 quantization with bitsandbytes (requires install with [8bit] | |
# add --tf32 if for TF32 computation on ampere gpu or newer | |
textsum-dir --shuffle -m "${MODEL}" \ | |
-batch "${BATCH_SIZE}" -nb "${NUM_BEAMS}" \ | |
"${INPUT_DIR}" -o "${OUTPUT_DIR}" | |
# Compress the output directory into a semantically-named zip file | |
ZIP_FILE="${MODEL_BASENAME}_gauntlet_summary-bs-${BATCH_SIZE}-nb-${NUM_BEAMS}.zip" | |
zip -r -9 -j "${ZIP_FILE}" "${OUTPUT_DIR}" | |
echo "Script finished. The output is compressed to ${ZIP_FILE}." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment