-
-
Save mkloubert/e24fdd9249ea68b206825d87b5515e7a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Check if the file path argument is provided | |
if [ $# -eq 0 ]; then | |
echo "Usage: $0 <text_file> [language=eng]" | |
exit 1 | |
fi | |
# an "echo" that directly writes to STDERR | |
# so that the translated output can be piped | |
# from STDOUT | |
write_line() { | |
local text=$1 | |
echo "$text" >&2 | |
} | |
# reads text from a files | |
# | |
# $1 => path to the file to scan | |
# $2 => optional information of the language that is used in the file, like "eng" or "deu" | |
get_text_from_file() { | |
local file_path=$1 | |
local language=$2 | |
filename=$(basename "$1") | |
extension="${filename##*.}" | |
case "$extension" in | |
pdf) | |
# PDF document | |
text_from_pdf=$(pdftotext "$file_path" -) | |
echo "$text_from_pdf" | |
;; | |
gif|jpeg|jpg|png|tif|tiff) | |
# image file | |
text_from_image=$(tesseract "$file_path" stdout -l "$language" 2>/dev/null) | |
echo "$text_from_image" | |
;; | |
*) | |
# handle anything else as text file | |
echo $(<"$text_file") | |
;; | |
esac | |
} | |
# calls Ollama API | |
# | |
# $1 => the prompt to send | |
chat_completion() { | |
local prompt=$1 | |
# if you run the API on another address, you | |
# can customize by using `TGF_API_URL` environment variable | |
url="${TGF_API_URL:-http://localhost:11434/api/generate}" | |
# if you prefer another model, you | |
# can customize by using `TGF_LLM_MODEL` environment variable | |
model="${TGF_LLM_MODEL:-phi3}" | |
# if you prefer another temperature value, you | |
# can customize by using `TGF_LLM_TEMPERATURE` environment variable | |
temperature="${TGF_LLM_TEMPERATURE:-0}" | |
# create a JSON for the Ollama API | |
json_data=$(jq -n \ | |
--arg model "$model" \ | |
--arg prompt "$prompt" \ | |
--argjson temperature "$temperature" \ | |
'{model: $model, prompt: $prompt, options: {temperature: $temperature}, stream: false}') | |
# execute Ollama API | |
response=$(wget --header "Content-Type: application/json" \ | |
--post-data "$json_data" \ | |
"$url" \ | |
-q \ | |
-O - 2>&1) # Captures both the output and server response headers | |
# if the execution was successful, there should | |
# be a `response` property in the output | |
extracted_response=$(echo "$response" | jq -r '.response') | |
if [ -n "$extracted_response" ]; then | |
echo "$extracted_response" | |
else | |
write_line "[ERROR] No valid JSON found!" | |
exit 1 | |
fi | |
} | |
# collect parameters | |
file_with_text="$1" | |
language="${2:-deu}" | |
url="${TGF_API_URL:-http://localhost:11434/api/generate}" | |
# get text file | |
write_line "ℹ️ Get text from '$file_with_text' ..." | |
text_to_translate=$(get_text_from_file "$file_with_text" "$language") | |
# first let LLM fix typos in text | |
write_line "ℹ️ Fixing text of '$file_with_text' if needed ..." | |
fixed_text=$(chat_completion "Fix typos in the following text: $text_to_translate") | |
# now translate this text by LLM | |
write_line "ℹ️ Translating fixed version of '$file_with_text' ..." | |
translated_text=$(chat_completion "Translate the following text to english: $fixed_text") | |
write_line "✅ '$file_with_text' successfully translated" | |
echo "$translated_text" # output to STDOUT |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment