Lixivial/map_sync.sh

## map_sync.sh
#!/bin/bash
# map_sync.sh - Jesse M. Pearson
# contact: jesall@gmail.com
#          or
#          irc.quakenet.org/#prae.nexuiz
#
# Description:
# A script to synchronise the latest files across a set of servers (server_list.txt).
# Its intended usage is through a cron task, though manually running it
# one-time via CLI would work, too.
#
# Things to note:
# 1. The script execs wget to do the syncing, however it's been noted that
#    if wget -N determines a file is older, it'll check the filesize and
#    if that's different, then pull down the file regardless.
#
#    Bundled with this script are a set of patchfiles, and compiled binaries
#    to disable this feature. If, for some reason, due to architecture or corruption
#    or whatever, it should fallback to the system's version of wget.
#
# 2. The files generated/used are mostly self-contained to the script's execution directory.
#
# 3. The default location for parsing relevant information is /tmp/, though $exec_dir would work too
#    The point here is that whichever dir is chosen, it needs to be writeable.
#
# TODO:
# 1. Split out the rate limit to become dynamic:
#    This can be accomplished by using cURL to get its content-length; specify a max threshold (something by which the rate limit will never exceed)
#    and a min threshold (something by which the rate limit will never go below), and dynamically calculate the rate limit to give larger files
#    a higher rate limit in order to download them faster.
# 2. Add OS version, and CPU bit support checking to load a proper version wget.
#
# 3. Add an override exclusion routine for stupidly named maps that we want to handle ourselves without the
#    ridiculously borked naming conventions.
#
# 4. Clean up the awk/sed's; add a check for $error != "true", so that we can actually
#    forego any awk/sedding if we use our custom wget. The custom wget has had its output
#    modified to accommodate our file listing.
#
# 5. Add some error handling routines for various commands that might not exist on certain
#    configurations.
#
# 6. Var up the extension matches, and use them where ever .pk3 exists. Clean up the script, comment it.
#
# 7. *MAYBE* add some parameter handling so as to control execution flow from cli or cron task
#    (i.e. never use custom wget; view verbose wget output in emails; retain wget log; debugging)

# Try to reliably determine the current execution path.
exec_dir=`readlink -f $0`
file_len=`basename $0`
exec_dir=${exec_dir:0:${#exec_dir}-${#file_len}}

date=`date "+%m%d%y%H%M%S"`
server_file="$exec_dir/server_list.txt"
wget="$exec_dir/wget"
parse_file="/tmp/mapsync_$date.txt"
servers=( $( cat $server_file ) )
array_size=$(( ${#servers[@]} ))
directory=$HOME/maps/
extra_params="-T 30 --limit-rate=50k"

# A flag to be set if our custom build of wget borks up due to architecture problems
error="false"

cd ${directory}
for (( i=0; i<array_size; i++ ))
do
  # If something has gone wrong with our custom wget build, fallback to system.
  if [ $error != "true" ]; then
    eval "$wget $extra_params -r -N -nd -A '*.pk3' ${servers[$i]} >> $parse_file 2>&1"
    # If something goes wrong with our custom wget build, set the error flag and fallback to system.
    if [ $? -gt 0 ]; then
	  error="true"
	  echo "Custom wget has failed."
	  wget $extra_params -r -N -nd -A '*.pk3' ${servers[$i]} >> $parse_file 2>&1
    fi
  else
	  wget $extra_params -r -N -nd -A '*.pk3' ${servers[$i]} >> $parse_file 2>&1
  fi
done

# This is based on the two different renditions of wget output produced by debian and Mac OS X.
# Hopefully it's more reliable than a static field number. If not, we can cut on .pk3 and rebuild the
# the map name.
maps=$(awk /^[0-9].*\.pk3.*saved/ $parse_file | cut -f2 -d "\`" | cut -f1 -d " " | sed "s/'//")

# This is a dirty hack, I need to figure out how to get awk to give me a more reliable output.
map_count=($(awk /^[0-9].*\.pk3.*saved/ $parse_file | cut -f2 -d "\`" | cut -f1 -d " " | sed "s/'//"))
map_count=${#map_count[@]}
rm $parse_file

if [ $map_count -gt 0 -a "${maps[0]}" != "" ]; then
   if [ $map_count -eq 1 ]; then
      echo "1 file has been downloaded/updated:"
   else
      echo "$map_count files have been downloaded/updated:"
   fi
   echo "${maps}"
#else
#   echo "No maps were downloaded/updated."
fi

## server_list.txt
http://rm.endoftheinternet.org/~nexuiz/maps/
http://maps.nexuizninjaz.com/
http://ouns.nexuizninjaz.com/misc/nexball/maptemp/
http://www.hrfdesign.com/crap/index.php?dir=maps%2Fcpma%2F
http://www.hrfdesign.com/crap/index.php?dir=maps%2Fctf%2F
http://www.hrfdesign.com/crap/index.php?dir=maps%2Fdm%2F
http://www.hrfdesign.com/crap/index.php?dir=maps%2Fmisc%2F
http://www.hrfdesign.com/crap/index.php?dir=maps%2Fnexball%2F
http://www.hrfdesign.com/crap/index.php?dir=maps%2Fnexrun%2F
http://www.hrfdesign.com/crap/index.php?dir=maps%2Fons%2F
http://www.hrfdesign.com/crap/index.php?dir=maps%2Fpb%2F
http://www.hrfdesign.com/crap/index.php?dir=maps%2Frace%2F
http://www.hrfdesign.com/crap/index.php?dir=maps%2Fcts%2F
http://www.hrfdesign.com/crap/index.php?dir=maps%2Fca%2F
http://www.nullgaming.com/maps/

## wget-1.11.2-http.c.patch
--- wget-1.11.2/src/http.c	2008-04-27 14:12:26.000000000 -0500
+++ wget-1/src/http.c	2009-09-08 08:43:13.000000000 -0500
@@ -2628,21 +2628,14 @@
                              download procedure is resumed.  */
                           if (hstat.orig_file_tstamp >= tmr)
                             {
-                              if (hstat.contlen == -1
-                                  || hstat.orig_file_size == hstat.contlen)
-                                {
+                                 logprintf (LOG_VERBOSE, _("\
+original time stamp `%i' remote time stamp `%i' contlen `%i'.\n\n"),
+                                             (int)hstat.orig_file_tstamp, (int)tmr, (int)hstat.contlen);
                                   logprintf (LOG_VERBOSE, _("\
 Server file no newer than local file `%s' -- not retrieving.\n\n"),
                                              hstat.orig_file_name);
                                   ret = RETROK;
                                   goto exit;
-                                }
-                              else
-                                {
-                                  logprintf (LOG_VERBOSE, _("\
-The sizes do not match (local %s) -- retrieving.\n"),
-                                             number_to_static_string (hstat.orig_file_size));
-                                }
-                             }
                           else
                             logputs (LOG_VERBOSE,

## wget-1.11.4-http.c.patch
--- wget-1.11.4/src/http.c	2008-06-29 20:22:54.000000000 -0500
+++ wget-1/src/http.c	2009-09-09 01:56:26.000000000 -0500
@@ -2630,21 +2630,15 @@
                              download procedure is resumed.  */
                           if (hstat.orig_file_tstamp >= tmr)
                             {
-                              if (hstat.contlen == -1
-                                  || hstat.orig_file_size == hstat.contlen)
-                                {
+                                 logprintf (LOG_VERBOSE, _("\
+original time stamp `%i' remote time stamp `%i' contlen `%i'.\n\n"),
+                                             (int)hstat.orig_file_tstamp, (int)tmr, (int)hstat.contlen);
+
                                   logprintf (LOG_VERBOSE, _("\
 Server file no newer than local file `%s' -- not retrieving.\n\n"),
                                              hstat.orig_file_name);
                                   ret = RETROK;
                                   goto exit;
-                                }
-                              else
-                                {
-                                  logprintf (LOG_VERBOSE, _("\
-The sizes do not match (local %s) -- retrieving.\n"),
-                                             number_to_static_string (hstat.orig_file_size));
-                                }
                             }
                           else
                             logputs (LOG_VERBOSE,

## wget-1.12-http.c.patch
--- wget-1.12/src/http.c	2009-09-21 22:02:18.000000000 -0500
+++ wget-1/src/http.c	2009-11-14 09:06:02.000000000 -0600
@@ -2722,21 +2722,11 @@
                              download procedure is resumed.  */
                           if (hstat.orig_file_tstamp >= tmr)
                             {
-                              if (hstat.contlen == -1
-                                  || hstat.orig_file_size == hstat.contlen)
-                                {
                                   logprintf (LOG_VERBOSE, _("\
 Server file no newer than local file %s -- not retrieving.\n\n"),
                                              quote (hstat.orig_file_name));
                                   ret = RETROK;
                                   goto exit;
-                                }
-                              else
-                                {
-                                  logprintf (LOG_VERBOSE, _("\
-The sizes do not match (local %s) -- retrieving.\n"),
-                                             number_to_static_string (hstat.orig_file_size));
-                                }
                             }
                           else
                             logputs (LOG_VERBOSE,

## wget-1.12-retr.c.patch
--- wget-1.12/src/retr.c	2009-09-04 11:31:54.000000000 -0500
+++ wget-1/src/retr.c	2009-11-14 09:06:02.000000000 -0600
@@ -871,8 +871,8 @@
   RESTORE_POST_DATA;

 bail:
-  if (register_status)
-    inform_exit_status (result);
+//  if (register_status)
+//    inform_exit_status (result);
   return result;
 }
	#!/bin/bash
	# map_sync.sh - Jesse M. Pearson
	# contact: jesall@gmail.com
	# or
	# irc.quakenet.org/#prae.nexuiz
	#
	# Description:
	# A script to synchronise the latest files across a set of servers (server_list.txt).
	# Its intended usage is through a cron task, though manually running it
	# one-time via CLI would work, too.
	#
	# Things to note:
	# 1. The script execs wget to do the syncing, however it's been noted that
	# if wget -N determines a file is older, it'll check the filesize and
	# if that's different, then pull down the file regardless.
	#
	# Bundled with this script are a set of patchfiles, and compiled binaries
	# to disable this feature. If, for some reason, due to architecture or corruption
	# or whatever, it should fallback to the system's version of wget.
	#
	# 2. The files generated/used are mostly self-contained to the script's execution directory.
	#
	# 3. The default location for parsing relevant information is /tmp/, though $exec_dir would work too
	# The point here is that whichever dir is chosen, it needs to be writeable.
	#
	# TODO:
	# 1. Split out the rate limit to become dynamic:
	# This can be accomplished by using cURL to get its content-length; specify a max threshold (something by which the rate limit will never exceed)
	# and a min threshold (something by which the rate limit will never go below), and dynamically calculate the rate limit to give larger files
	# a higher rate limit in order to download them faster.
	# 2. Add OS version, and CPU bit support checking to load a proper version wget.
	#
	# 3. Add an override exclusion routine for stupidly named maps that we want to handle ourselves without the
	# ridiculously borked naming conventions.
	#
	# 4. Clean up the awk/sed's; add a check for $error != "true", so that we can actually
	# forego any awk/sedding if we use our custom wget. The custom wget has had its output
	# modified to accommodate our file listing.
	#
	# 5. Add some error handling routines for various commands that might not exist on certain
	# configurations.
	#
	# 6. Var up the extension matches, and use them where ever .pk3 exists. Clean up the script, comment it.
	#
	# 7. MAYBE add some parameter handling so as to control execution flow from cli or cron task
	# (i.e. never use custom wget; view verbose wget output in emails; retain wget log; debugging)

	# Try to reliably determine the current execution path.
	exec_dir=`readlink -f $0`
	file_len=`basename $0`
	exec_dir=${exec_dir:0:${#exec_dir}-${#file_len}}

	date=`date "+%m%d%y%H%M%S"`
	server_file="$exec_dir/server_list.txt"
	wget="$exec_dir/wget"
	parse_file="/tmp/mapsync_$date.txt"
	servers=( $( cat $server_file ) )
	array_size=$(( ${#servers[@]} ))
	directory=$HOME/maps/
	extra_params="-T 30 --limit-rate=50k"

	# A flag to be set if our custom build of wget borks up due to architecture problems
	error="false"

	cd ${directory}
	for (( i=0; i<array_size; i++ ))
	do
	# If something has gone wrong with our custom wget build, fallback to system.
	if [ $error != "true" ]; then
	eval "$wget $extra_params -r -N -nd -A '*.pk3' ${servers[$i]} >> $parse_file 2>&1"
	# If something goes wrong with our custom wget build, set the error flag and fallback to system.
	if [ $? -gt 0 ]; then
	error="true"
	echo "Custom wget has failed."
	wget $extra_params -r -N -nd -A '*.pk3' ${servers[$i]} >> $parse_file 2>&1
	fi
	else
	wget $extra_params -r -N -nd -A '*.pk3' ${servers[$i]} >> $parse_file 2>&1
	fi
	done

	# This is based on the two different renditions of wget output produced by debian and Mac OS X.
	# Hopefully it's more reliable than a static field number. If not, we can cut on .pk3 and rebuild the
	# the map name.
	maps=$(awk /^[0-9].\.pk3.saved/ $parse_file \| cut -f2 -d "\`" \| cut -f1 -d " " \| sed "s/'//")

	# This is a dirty hack, I need to figure out how to get awk to give me a more reliable output.
	map_count=($(awk /^[0-9].\.pk3.saved/ $parse_file \| cut -f2 -d "\`" \| cut -f1 -d " " \| sed "s/'//"))
	map_count=${#map_count[@]}
	rm $parse_file

	if [ $map_count -gt 0 -a "${maps[0]}" != "" ]; then
	if [ $map_count -eq 1 ]; then
	echo "1 file has been downloaded/updated:"
	else
	echo "$map_count files have been downloaded/updated:"
	fi
	echo "${maps}"
	#else
	# echo "No maps were downloaded/updated."
	fi
	http://rm.endoftheinternet.org/~nexuiz/maps/
	http://maps.nexuizninjaz.com/
	http://ouns.nexuizninjaz.com/misc/nexball/maptemp/
	http://www.hrfdesign.com/crap/index.php?dir=maps%2Fcpma%2F
	http://www.hrfdesign.com/crap/index.php?dir=maps%2Fctf%2F
	http://www.hrfdesign.com/crap/index.php?dir=maps%2Fdm%2F
	http://www.hrfdesign.com/crap/index.php?dir=maps%2Fmisc%2F
	http://www.hrfdesign.com/crap/index.php?dir=maps%2Fnexball%2F
	http://www.hrfdesign.com/crap/index.php?dir=maps%2Fnexrun%2F
	http://www.hrfdesign.com/crap/index.php?dir=maps%2Fons%2F
	http://www.hrfdesign.com/crap/index.php?dir=maps%2Fpb%2F
	http://www.hrfdesign.com/crap/index.php?dir=maps%2Frace%2F
	http://www.hrfdesign.com/crap/index.php?dir=maps%2Fcts%2F
	http://www.hrfdesign.com/crap/index.php?dir=maps%2Fca%2F
	http://www.nullgaming.com/maps/
	--- wget-1.11.2/src/http.c 2008-04-27 14:12:26.000000000 -0500
	+++ wget-1/src/http.c 2009-09-08 08:43:13.000000000 -0500
	@@ -2628,21 +2628,14 @@
	download procedure is resumed. */
	if (hstat.orig_file_tstamp >= tmr)
	{
	- if (hstat.contlen == -1
	- \|\| hstat.orig_file_size == hstat.contlen)
	- {
	+ logprintf (LOG_VERBOSE, _("\
	+original time stamp `%i' remote time stamp `%i' contlen `%i'.\n\n"),
	+ (int)hstat.orig_file_tstamp, (int)tmr, (int)hstat.contlen);
	logprintf (LOG_VERBOSE, _("\
	Server file no newer than local file `%s' -- not retrieving.\n\n"),
	hstat.orig_file_name);
	ret = RETROK;
	goto exit;
	- }
	- else
	- {
	- logprintf (LOG_VERBOSE, _("\
	-The sizes do not match (local %s) -- retrieving.\n"),
	- number_to_static_string (hstat.orig_file_size));
	- }
	- }
	else
	logputs (LOG_VERBOSE,
	--- wget-1.11.4/src/http.c 2008-06-29 20:22:54.000000000 -0500
	+++ wget-1/src/http.c 2009-09-09 01:56:26.000000000 -0500
	@@ -2630,21 +2630,15 @@
	download procedure is resumed. */
	if (hstat.orig_file_tstamp >= tmr)
	{
	- if (hstat.contlen == -1
	- \|\| hstat.orig_file_size == hstat.contlen)
	- {
	+ logprintf (LOG_VERBOSE, _("\
	+original time stamp `%i' remote time stamp `%i' contlen `%i'.\n\n"),
	+ (int)hstat.orig_file_tstamp, (int)tmr, (int)hstat.contlen);
	+
	logprintf (LOG_VERBOSE, _("\
	Server file no newer than local file `%s' -- not retrieving.\n\n"),
	hstat.orig_file_name);
	ret = RETROK;
	goto exit;
	- }
	- else
	- {
	- logprintf (LOG_VERBOSE, _("\
	-The sizes do not match (local %s) -- retrieving.\n"),
	- number_to_static_string (hstat.orig_file_size));
	- }
	}
	else
	logputs (LOG_VERBOSE,
	--- wget-1.12/src/http.c 2009-09-21 22:02:18.000000000 -0500
	+++ wget-1/src/http.c 2009-11-14 09:06:02.000000000 -0600
	@@ -2722,21 +2722,11 @@
	download procedure is resumed. */
	if (hstat.orig_file_tstamp >= tmr)
	{
	- if (hstat.contlen == -1
	- \|\| hstat.orig_file_size == hstat.contlen)
	- {
	logprintf (LOG_VERBOSE, _("\
	Server file no newer than local file %s -- not retrieving.\n\n"),
	quote (hstat.orig_file_name));
	ret = RETROK;
	goto exit;
	- }
	- else
	- {
	- logprintf (LOG_VERBOSE, _("\
	-The sizes do not match (local %s) -- retrieving.\n"),
	- number_to_static_string (hstat.orig_file_size));
	- }
	}
	else
	logputs (LOG_VERBOSE,
	--- wget-1.12/src/retr.c 2009-09-04 11:31:54.000000000 -0500
	+++ wget-1/src/retr.c 2009-11-14 09:06:02.000000000 -0600
	@@ -871,8 +871,8 @@
	RESTORE_POST_DATA;

	bail:
	- if (register_status)
	- inform_exit_status (result);
	+// if (register_status)
	+// inform_exit_status (result);
	return result;
	}