-
-
Save stevekrenzel/9cb6910544dc068b3e8e6abc0f375e9d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # Function to display usage | |
| show_usage() { | |
| echo "Usage: $0 -a <alias> -k <openai_api_key> -d <directory|file> [directory|file...]" | |
| echo "Options:" | |
| echo " -a Alias for the subject in the images" | |
| echo " -k OpenAI API key" | |
| echo " -d Directory or file(s) to process" | |
| echo " -h Show this help message" | |
| exit 1 | |
| } | |
| # Parse command line arguments | |
| while getopts "a:k:d:h" opt; do | |
| case $opt in | |
| a) ALIAS="$OPTARG" ;; | |
| k) API_KEY="$OPTARG" ;; | |
| d) DIRS_FILES+=("$OPTARG") ;; | |
| h) show_usage ;; | |
| ?) show_usage ;; | |
| esac | |
| done | |
| # Check if required parameters are provided | |
| if [ -z "$ALIAS" ] || [ -z "$API_KEY" ] || [ ${#DIRS_FILES[@]} -eq 0 ]; then | |
| echo "Error: Missing required parameters" | |
| show_usage | |
| fi | |
| # Function to check if a file is an image | |
| is_image() { | |
| local file="$1" | |
| case "$(lowercase "${file##*.}")" in | |
| jpg|jpeg|png|gif) return 0 ;; | |
| *) return 1 ;; | |
| esac | |
| } | |
| # Function to convert string to lowercase | |
| lowercase() { | |
| echo "$1" | tr '[:upper:]' '[:lower:]' | |
| } | |
| # Create a temporary directory | |
| temp_dir=$(mktemp -d) | |
| trap 'rm -rf "$temp_dir"' EXIT | |
| # Function to process an image file | |
| process_image() { | |
| local image_file="$1" | |
| local caption_file="${image_file%.*}.txt" | |
| # Convert image to base64 | |
| local base64_image=$(base64 -i "$image_file") | |
| # Construct the data URL | |
| local data_url="data:image/jpeg;base64,$base64_image" | |
| local temp_json="$temp_dir/payload.json" | |
| # Create JSON payload using a heredoc | |
| cat > "$temp_json" <<EOF | |
| { | |
| "model": "gpt-4o", | |
| "messages": [ | |
| { | |
| "role": "system", | |
| "content": [ | |
| { | |
| "type": "text", | |
| "text": "You are a profressional image captioner. The user will provide an image and it is your task to describe the image in great detail. The description you generate should be able to be fed back to an AI image generator and replicate a similar photo. Pay attention to little details, ambiance, palette, mood, etc. Most importantly, pay attention to the subject (person, animal, object) that is clearly the focus of the photo. These should all be portraits and in the portrait there is a subject that goes by the alias $ALIAS. In your description, be sure to mention $ALIAS explicitly by their alias. And remember, these are profressional photos so don't call them 'images'. Use words like photo, portrait, scene, etc." | |
| } | |
| ] | |
| }, | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "text", | |
| "text": "Here's an image of $ALIAS. Please describe it." | |
| }, | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": "data:image/jpeg;base64,$base64_image" | |
| } | |
| } | |
| ] | |
| } | |
| ] | |
| } | |
| EOF | |
| # Make the API call and extract the caption using jq | |
| local caption=$(curl -s https://api.openai.com/v1/chat/completions \ | |
| -H "Content-Type: application/json" \ | |
| -H "Authorization: Bearer $API_KEY" \ | |
| -d "@$temp_json" | jq -r ".choices[0].message.content") | |
| # Check if the API call was successful | |
| if [ $? -ne 0 ] || [ -z "$caption" ]; then | |
| echo "Error: Failed to generate caption for $image_file" | |
| return 1 | |
| fi | |
| # Save the caption to a file | |
| echo "$caption" > "$caption_file" | |
| echo "Generated caption for: $image_file" | |
| cat "$caption_file" | |
| } | |
| # Create a temporary file to store the list of images | |
| image_list="$temp_dir/image_list.txt" | |
| touch "$image_list" | |
| # Process each argument | |
| for arg in "${DIRS_FILES[@]}"; do | |
| if [ -d "$arg" ]; then | |
| # If argument is a directory, find all image files | |
| find "$arg" -type f \( -iname "*.jpg" -o -iname "*.jpeg" -o -iname "*.png" -o -iname "*.gif" \) >> "$image_list" | |
| elif [ -f "$arg" ]; then | |
| # If argument is a file, check if it's an image | |
| if is_image "$arg"; then | |
| echo "$arg" >> "$image_list" | |
| else | |
| echo "Warning: Skipping '$arg' - not a supported image file" | |
| fi | |
| else | |
| echo "Warning: '$arg' is not a valid file or directory" | |
| fi | |
| done | |
| # Sort and remove duplicates from the image list | |
| sort -u "$image_list" > "$temp_dir/image_list_sorted.txt" | |
| # Process each image in the sorted list | |
| while IFS= read -r image; do | |
| process_image "$image" | |
| done < "$temp_dir/image_list_sorted.txt" | |
| echo "Caption generation complete!" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment