dragontamer8740/mcomix-random.sh

## mcomix-random.sh
#! /bin/bash
# mcomix-random
#
# Requirements:
# * mcomix-recurse. The entire contents of that bash script are:
#   ---------------------------------------------------------
#   #! /bin/bash
#   find -L "$1" -type d -print0|sort -z -V | xargs -0 mcomix
#   ---------------------------------------------------------
#   ( sort -V is used because it seems the most robust way to sort photos
#   by name intelligently (e.g. it treats strings of numerals as representing
#   a single number, and also sorts alphabetically).
#
# * GNU Coreutils, or other `sort -V` implementation that does the same thing
#   as the Coreutils version.
# * MComix (obviously).
# * Bash (I think a Korn shell might also work. Use AT&T ksh if you can).
#
# Description:
# randomly chooses a subdirectory (or subdir of a subdir) to browse.
# when passed `-n [NUMBER]` as arguments, allows user to specify the minimum
# acceptible number of files that will be viewable. The program will repeatedly
# perform random searches until this requirement is met, so if you set too high
# a number, it will loop infinitely!!!!

# Things to fix/improve:
# * Currently fails if files have newlines in them. I don't ever do this myself,
#   so it's not something that will impact me personally. And by the way, you
#   shouldn't have files with names like that either. Just because you can
#   doesn't mean you should.
#
# * Maybe fix the infinite loop thing (if no subdirectories fit the required
#   minimum image count).
#
# * Make script assemble a list of qualifying directories prior to doing random
#   selection, to speed up the process of finding a match. If two directories
#   out of 200 match the minimum image count, then we've got a 1 in 100 chance of
#   the first random selection being acceptable. Since that dir could be chosen
#   again on the next run with the current setup, it remains a 1 in 100 chance for
#   the next loop as well.
#   Therefore we have a roughly 60.5% chance of not finding it with 50 searches,
#   or a 39.5% chance of finding it.
#   ( (99/100)^50 == ~0.605006; 1-0.605 == 0.395)
#
#   Obviously, if we start with a list of directories satisfying the condition,
#   picking one of them randomly has a 100% chance of satisfying the condition.
#
#   This seems to suggest (for me at least) that there is, for a computer
#   with a steady given rate of data processing and a set of directories,
#   of which a subset satisfy the condition (of having enough files),
#   there should be a ratio of total directories to the subset at which the
#   random selection will achieve better performance enough of the time to
#   be worth the risk for most users' taste.
#
#   I am ignoring how many files each directory actually has, although this may
#   also impact the processing time (if a directory has, for example, 3,000
#   files, we have to enumerate each of those 3,000 before comparing that number
#   to the user-provided minimum).
#
#   For example, if we have 200 directories, and 150 of them have enough files
#   to meet or exceed the minimum, and we take the difference in counting time
#   for each file to be insignificant on modern hardware for sane numbers of
#   files-per-directory, we have only a 25% chance of NOT finding what we want
#   on each attempt (75% chance of success). 1-(0.75^50) = a 99.9...% chance
#   of success within 50 tries (my TI-89 actually rounds this up to one with a
#   decimal point! - it's not precise enough to find a non-zero floating point
#   digit, and the fractional answer is so ridiculously long I don't want to
#   duplicate it here). n such a case, actually counting the number of files
#   in each of the 200 directories would be the time spent evaluating a good
#   selection PLUS the enumeration of 199 more directories.
#
#   Thus I think giving both methodologies as options and making one the
#   default behaviour is optimal. Let the user decide which one best
#   suits his or her use case, and trust that user to know his or her own file
#   system hierarchy. I know that isn't quite mathematician-proofing
#   tier, so I do not want to call it a proof. After all I am simply comparing
#   best and worst case examples with arbitrarily chosen numbers that do not
#   necessarily reflect reality. I also have not factored in how much longer
#   enumerating some huge directory actually takes compared to a directory with
#   a substantially smaller number of files and directories.

DIR=""
MINFILES="$2"
GOODDIR="0"

function tryDir() {
	DIR="$(find . -maxdepth 1 -type d | shuf | head -n 1)"
}

function doNormal() {
	#DIR="$(find . -maxdepth 1 -type d | shuf | head -n 1)"
	DIR="$(find . -maxdepth 1 -type d | shuf | head -n 1)"
	echo "$DIR"
	mcomix-recurse "$DIR"
}

if [ "$1" = "-n" ]; then
	# if an argument for -n was given (min. number of files in a dir tree to view)
	if [ -z "$MINFILES" ]; then
		echo "error: I need a number after -n for the minimum number of files in subdirectories."
		return 1
	else
		while [ "$GOODDIR" -eq "0" ]; do
			tryDir
			echo "$DIR"
			# check that there at least MINFILES files inside the chosen dir (recursing allowed)
			if [ "$(find "$DIR" -type f | wc -l)" -ge "$MINFILES" ]; then
				GOODDIR="1"
				mcomix-recurse "$DIR"
			fi
		done
	fi
else
	doNormal
fi
	#! /bin/bash
	# mcomix-random
	#
	# Requirements:
	# * mcomix-recurse. The entire contents of that bash script are:
	# ---------------------------------------------------------
	# #! /bin/bash
	# find -L "$1" -type d -print0\|sort -z -V \| xargs -0 mcomix
	# ---------------------------------------------------------
	# ( sort -V is used because it seems the most robust way to sort photos
	# by name intelligently (e.g. it treats strings of numerals as representing
	# a single number, and also sorts alphabetically).
	#
	# * GNU Coreutils, or other `sort -V` implementation that does the same thing
	# as the Coreutils version.
	# * MComix (obviously).
	# * Bash (I think a Korn shell might also work. Use AT&T ksh if you can).
	#
	# Description:
	# randomly chooses a subdirectory (or subdir of a subdir) to browse.
	# when passed `-n [NUMBER]` as arguments, allows user to specify the minimum
	# acceptible number of files that will be viewable. The program will repeatedly
	# perform random searches until this requirement is met, so if you set too high
	# a number, it will loop infinitely!!!!

	# Things to fix/improve:
	# * Currently fails if files have newlines in them. I don't ever do this myself,
	# so it's not something that will impact me personally. And by the way, you
	# shouldn't have files with names like that either. Just because you can
	# doesn't mean you should.
	#
	# * Maybe fix the infinite loop thing (if no subdirectories fit the required
	# minimum image count).
	#
	# * Make script assemble a list of qualifying directories prior to doing random
	# selection, to speed up the process of finding a match. If two directories
	# out of 200 match the minimum image count, then we've got a 1 in 100 chance of
	# the first random selection being acceptable. Since that dir could be chosen
	# again on the next run with the current setup, it remains a 1 in 100 chance for
	# the next loop as well.
	# Therefore we have a roughly 60.5% chance of not finding it with 50 searches,
	# or a 39.5% chance of finding it.
	# ( (99/100)^50 == ~0.605006; 1-0.605 == 0.395)
	#
	# Obviously, if we start with a list of directories satisfying the condition,
	# picking one of them randomly has a 100% chance of satisfying the condition.
	#
	# This seems to suggest (for me at least) that there is, for a computer
	# with a steady given rate of data processing and a set of directories,
	# of which a subset satisfy the condition (of having enough files),
	# there should be a ratio of total directories to the subset at which the
	# random selection will achieve better performance enough of the time to
	# be worth the risk for most users' taste.
	#
	# I am ignoring how many files each directory actually has, although this may
	# also impact the processing time (if a directory has, for example, 3,000
	# files, we have to enumerate each of those 3,000 before comparing that number
	# to the user-provided minimum).
	#
	# For example, if we have 200 directories, and 150 of them have enough files
	# to meet or exceed the minimum, and we take the difference in counting time
	# for each file to be insignificant on modern hardware for sane numbers of
	# files-per-directory, we have only a 25% chance of NOT finding what we want
	# on each attempt (75% chance of success). 1-(0.75^50) = a 99.9...% chance
	# of success within 50 tries (my TI-89 actually rounds this up to one with a
	# decimal point! - it's not precise enough to find a non-zero floating point
	# digit, and the fractional answer is so ridiculously long I don't want to
	# duplicate it here). n such a case, actually counting the number of files
	# in each of the 200 directories would be the time spent evaluating a good
	# selection PLUS the enumeration of 199 more directories.
	#
	# Thus I think giving both methodologies as options and making one the
	# default behaviour is optimal. Let the user decide which one best
	# suits his or her use case, and trust that user to know his or her own file
	# system hierarchy. I know that isn't quite mathematician-proofing
	# tier, so I do not want to call it a proof. After all I am simply comparing
	# best and worst case examples with arbitrarily chosen numbers that do not
	# necessarily reflect reality. I also have not factored in how much longer
	# enumerating some huge directory actually takes compared to a directory with
	# a substantially smaller number of files and directories.

	DIR=""
	MINFILES="$2"
	GOODDIR="0"

	function tryDir() {
	DIR="$(find . -maxdepth 1 -type d \| shuf \| head -n 1)"
	}

	function doNormal() {
	#DIR="$(find . -maxdepth 1 -type d \| shuf \| head -n 1)"
	DIR="$(find . -maxdepth 1 -type d \| shuf \| head -n 1)"
	echo "$DIR"
	mcomix-recurse "$DIR"
	}

	if [ "$1" = "-n" ]; then
	# if an argument for -n was given (min. number of files in a dir tree to view)
	if [ -z "$MINFILES" ]; then
	echo "error: I need a number after -n for the minimum number of files in subdirectories."
	return 1
	else
	while [ "$GOODDIR" -eq "0" ]; do
	tryDir
	echo "$DIR"
	# check that there at least MINFILES files inside the chosen dir (recursing allowed)
	if [ "$(find "$DIR" -type f \| wc -l)" -ge "$MINFILES" ]; then
	GOODDIR="1"
	mcomix-recurse "$DIR"
	fi
	done
	fi
	else
	doNormal
	fi