Skip to content

Instantly share code, notes, and snippets.

@t-book
Last active January 19, 2024 20:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save t-book/1ca51b6b213a9d630b24a8bd647cec73 to your computer and use it in GitHub Desktop.
Save t-book/1ca51b6b213a9d630b24a8bd647cec73 to your computer and use it in GitHub Desktop.
#!/bin/bash
# Set to 'true' for a dry run, 'false' to actually copy files
DRY_RUN=true
geoserver_data_dir='/geoserver_data/data/workspaces/geonode/'
uploaded_data_dir='/mnt/volumes/statics/uploaded'
importer_data='/geoserver_data/data/geonode/importer_data'
date_filter="2024-01-19" # Find only coverage files after
missing_tifs_filenames=()
folder_names=()
echo "Dry Run Mode: $DRY_RUN"
echo ""
echo "--------- Analysis process ---------"
# Find coveragestore.xml files
while IFS= read -r -d '' store; do
# Only respect files modified after the specified date
if [[ "$(date -r "$store" +%Y-%m-%d)" < "$date_filter" ]]; then
continue
fi
echo "Processing XML file: $store"
# Extract URL from XML
tif_url=$(grep '<url>' "$store" | sed -e 's/<[^>]*>//g')
filename="${tif_url##*/}"
# Extract the folder name
if [[ "$tif_url" == *"/"*"/"* ]]; then
# For URLs like file:./lmqfs/2/2.tif, extract second-to-last segment
folder_name=$(echo "$tif_url" | sed -e 's|file:./||' -e 's|/[^/]*$||' | awk -F/ '{print $(NF)}')
else
# For URLs like file:1/C.tif, extract the first segment after 'file:'
folder_name=$(echo "$tif_url" | sed -e 's|file:||' -e 's|/.*$||')
fi
# Check recursively if file exists in importer_data
found_in_importer=$(find "$importer_data" -type f -name "$filename")
if [[ -z $found_in_importer ]]; then
echo "Analysis: File missing in importer directory - $filename"
missing_tifs_filenames+=("$filename")
folder_names+=("$folder_name") # Store corresponding folder name
fi
echo ""
done < <(find "$geoserver_data_dir" -name "coveragestore.xml" -print0)
echo "Analysis complete. Found ${#missing_tifs_filenames[@]} missing files."
echo ""
# Copying files based on analysis
echo "--------- Copy process ---------"
copied_files_counter=0
for i in "${!missing_tifs_filenames[@]}"; do
filename=${missing_tifs_filenames[i]}
folder_name=${folder_names[i]}
echo "Processing file: $filename"
while IFS= read -r -d '' source_dir; do
if [[ -n "$source_dir" ]]; then
destination_dir="${importer_data}/${folder_name}"
destination_file="${destination_dir}/${filename}"
if [[ -f "$destination_file" ]]; then
echo " - Skip: File already exists at destination: $destination_file"
continue
fi
if [[ "$DRY_RUN" == true ]]; then
echo " - Dry Run: Would copy from $source_dir to $destination_file"
else
mkdir -p "$destination_dir"
cp "$source_dir" "$destination_file"
echo " - Copied: $source_dir to $destination_file"
fi
((copied_files_counter++))
break
else
echo " - Warning: Source directory is empty."
fi
done < <(find "$uploaded_data_dir" -type f -name "$filename" -print0)
if [[ $copied_files_counter -eq 0 ]]; then
echo " - Failed: Could not find $filename in uploaded_data_dir"
fi
echo ""
done
if [[ "$DRY_RUN" == true ]]; then
echo "Dry run completed. $copied_files_counter files would have been copied."
else
echo "Operation completed: $copied_files_counter files copied."
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment