Last active
January 22, 2024 18:49
-
-
Save jeansymolanza/78e8f321ab31f734b71e367561b75024 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Description: This script performs an incremental copy of files and directories | |
# from a source directory to a target directory, maintaining the relative directory | |
# structure, based on a specified time range in hours. The copy operation is | |
# parallelized, capable of handling multiple files and directories simultaneously. | |
# The script is useful for backing up or synchronizing large datasets with changes | |
# limited to a specific period. | |
# | |
# Usage: ./g1_incremental_copy.sh <source_dir> <target_dir> <time_range_in_hours> [number_of_processes] | |
# | |
# Parameters: | |
# <source_dir> - The directory where files and directories are copied from. | |
# <target_dir> - The directory where files and directories are copied to. | |
# <time_range_in_hours> - Time range to consider for modified files and directories in hours. | |
# [number_of_processes] - Optional. Number of parallel copy operations. Default is 5. | |
SOURCE_DIR=$1 | |
TARGET_DIR=$2 | |
TIME_RANGE=$3 | |
NUM_PROCESSES=${4:-5} # Default to 5 if not set | |
# Function to check if a remote directory exists | |
check_remote_dir() { | |
local remote_path=$1 | |
local remote_host=${remote_path%:*} | |
local remote_dir=${remote_path#*:} | |
if ! ssh "$remote_host" test -d "$remote_dir" ; then | |
echo "Remote directory $remote_dir does not exist on server $remote_host." | |
exit 1 | |
fi | |
} | |
# Function to copy files and directories to the target | |
copy_to_target() { | |
local entity=$1 | |
local target_dir=$2 | |
local relative_path="${entity#$SOURCE_DIR/}" # Get the relative path | |
local target_path="$target_dir/$relative_path" # Construct the full target path | |
# Determine if the target is a local or remote directory | |
if [[ $target_dir == *:* ]]; then | |
# Check if the remote directory exists | |
check_remote_dir "$target_dir" | |
# Remote sync (assuming SSH keys are set up for passwordless access) | |
rsync -arv --delete "$entity" "$target_path" | |
else | |
# Local sync | |
rsync -arv --delete "$entity" "$target_path" | |
fi | |
} | |
# Build the interest list based on the time range | |
mapfile -t interest_files < <(find "$SOURCE_DIR" -type f -mmin "-$((TIME_RANGE * 60))" -printf '%T@ %p\n') | |
mapfile -t interest_dirs < <(find "$SOURCE_DIR" -mindepth 1 -type d -printf '%p\n') | |
interest_list=("${interest_files[@]}" "${interest_dirs[@]}") | |
# Check if the array is not empty | |
if [ ${#interest_list[@]} -eq 0 ]; then | |
echo "No files or directories to copy in the specified time range." | |
exit 0 | |
fi | |
# Parallel copy | |
# Using xargs with -P to specify the number of parallel processes | |
# and -I{} to replace {} in the command | |
printf "%s\n" "${filtered_list[@]}" | xargs -I{} -P "$NUM_PROCESSES" bash -c "copy_to_target '$SOURCE_DIR/{}' '$TARGET_DIR'" | |
echo "Copy operation completed." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
mapfile -t interest_dirs < <(find "$SOURCE_DIR" -mindepth 1 -type d -printf '%p\n')
interest_list=("${interest_files[@]}" "${interest_dirs[@]}")