Skip to content

Instantly share code, notes, and snippets.

@mkloubert
Last active May 4, 2024 09:46
Show Gist options
  • Save mkloubert/e24fdd9249ea68b206825d87b5515e7a to your computer and use it in GitHub Desktop.
Save mkloubert/e24fdd9249ea68b206825d87b5515e7a to your computer and use it in GitHub Desktop.
#!/bin/bash
# Check if the file path argument is provided
if [ $# -eq 0 ]; then
echo "Usage: $0 <text_file> [language=eng]"
exit 1
fi
# an "echo" that directly writes to STDERR
# so that the translated output can be piped
# from STDOUT
write_line() {
local text=$1
echo "$text" >&2
}
# reads text from a files
#
# $1 => path to the file to scan
# $2 => optional information of the language that is used in the file, like "eng" or "deu"
get_text_from_file() {
local file_path=$1
local language=$2
filename=$(basename "$1")
extension="${filename##*.}"
case "$extension" in
pdf)
# PDF document
text_from_pdf=$(pdftotext "$file_path" -)
echo "$text_from_pdf"
;;
gif|jpeg|jpg|png|tif|tiff)
# image file
text_from_image=$(tesseract "$file_path" stdout -l "$language" 2>/dev/null)
echo "$text_from_image"
;;
*)
# handle anything else as text file
echo $(<"$text_file")
;;
esac
}
# calls Ollama API
#
# $1 => the prompt to send
chat_completion() {
local prompt=$1
# if you run the API on another address, you
# can customize by using `TGF_API_URL` environment variable
url="${TGF_API_URL:-http://localhost:11434/api/generate}"
# if you prefer another model, you
# can customize by using `TGF_LLM_MODEL` environment variable
model="${TGF_LLM_MODEL:-phi3}"
# if you prefer another temperature value, you
# can customize by using `TGF_LLM_TEMPERATURE` environment variable
temperature="${TGF_LLM_TEMPERATURE:-0}"
# create a JSON for the Ollama API
json_data=$(jq -n \
--arg model "$model" \
--arg prompt "$prompt" \
--argjson temperature "$temperature" \
'{model: $model, prompt: $prompt, options: {temperature: $temperature}, stream: false}')
# execute Ollama API
response=$(wget --header "Content-Type: application/json" \
--post-data "$json_data" \
"$url" \
-q \
-O - 2>&1) # Captures both the output and server response headers
# if the execution was successful, there should
# be a `response` property in the output
extracted_response=$(echo "$response" | jq -r '.response')
if [ -n "$extracted_response" ]; then
echo "$extracted_response"
else
write_line "[ERROR] No valid JSON found!"
exit 1
fi
}
# collect parameters
file_with_text="$1"
language="${2:-deu}"
url="${TGF_API_URL:-http://localhost:11434/api/generate}"
# get text file
write_line "ℹ️ Get text from '$file_with_text' ..."
text_to_translate=$(get_text_from_file "$file_with_text" "$language")
# first let LLM fix typos in text
write_line "ℹ️ Fixing text of '$file_with_text' if needed ..."
fixed_text=$(chat_completion "Fix typos in the following text: $text_to_translate")
# now translate this text by LLM
write_line "ℹ️ Translating fixed version of '$file_with_text' ..."
translated_text=$(chat_completion "Translate the following text to english: $fixed_text")
write_line "✅ '$file_with_text' successfully translated"
echo "$translated_text" # output to STDOUT
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment