savask/preparejobs.sh Secret

## preparejobs.sh
#!/bin/bash

# This script generates jobs for global minimization runs.
# First argument of the script is the number of jobs, second argument
# is the metamath database you want to minimize, third argument is the location
# of the metamath program. For instance,
#
# >./preparejobs.sh 24 set.mm ./metamath
#
# will schedule all set.mm theorems for minimization on 24 processes and assumes
# the metamath program is in the current directory. The script
# outputs N files of the form job101.cmd, job102.cmd ... , where N is the number of jobs.
#
# The script works by applying the Longest Processing Time algorithm where it is assumed
# that the tasks (i.e. theorems) take time proportional to their proof size. See comments
# for further implementation details.

# Metamath executable location.
metamath_exe=$3

# The number of CPUs (processes, threads, etc) wanted.
cpus=$1

# Declare the 'loads' array containing estimated load for each CPU.
# Initially there's no load at all, so we set it at 0.
declare -a loads=( $(for i in $(seq 1 $cpus); do echo 0; done) )

# Declare a schedule array containing theorem labels to be processed by each CPU.
declare -a schedule

# Create a temporary file for our theorems queue.
theorems_queue=$(mktemp)

# Generate "theorem-label proof-size" pairs and sort them. The result will look like:
# itgaddnclem2 26854
# itg2addnc 25812
# itg2addnclem 17111
# ... etc
# More precisely, do the following:
$metamath_exe "read $2" 'show proof * /size' exit | # Display all proof sizes with the metamath program
tail -n +9 | # Drop first 8 lines (the header)
head -n -1 | # Drop the last line (exit)
awk '{gsub(/"/, "", $5); print $5, $7}' | # Remove quotation marks from theorem labels and select 5th and 7th columns
sort -h -r -k2 > $theorems_queue # Sort the result by the second column (proof size) in non-increasing order and send it to the temp file.

# Read the temp file line by line and schedule theorems appropriately.
while read -r line; do
	# Read the pair "label size" as a bash array
	theorem=($line)
	# Set label and size
	label=${theorem[0]}
	size=${theorem[1]}
	# Find the CPU with the smallest total load
	free_cpu=0
	min_load=${loads[0]}
	for i in "${!loads[@]}"; do
		if [ "${loads[i]}" -lt "$min_load" ]; then
			free_cpu=$i
			min_load=${loads[i]}
		fi
	done
	# Schedule the theorem to that CPU and update loads array
	schedule[$free_cpu]=$label" "${schedule[free_cpu]}
	(( loads[free_cpu]+=size ))
done < "$theorems_queue"

# Remove the temporary file.
rm ${theorems_queue}

# Turn each schedule into a job file.
for i in "${!schedule[@]}"; do
	# Name job files like job101.cmd, job102.cmd etc
        job_file=$(printf "job%03d.cmd" $((i+101)))
	# Clear the file
	> $job_file
	# Go through scheduled theorems for i-th CPU and generate the job script
	for j in ${schedule[i]}; do
		echo "prove $j" >> $job_file
                echo "minimize_with * /allow_new_axioms * /no_new_axioms_from ax-* /time" >> $job_file
		echo "_exit_pa /force" >> $job_file
	done
done
	#!/bin/bash

	# This script generates jobs for global minimization runs.
	# First argument of the script is the number of jobs, second argument
	# is the metamath database you want to minimize, third argument is the location
	# of the metamath program. For instance,
	#
	# >./preparejobs.sh 24 set.mm ./metamath
	#
	# will schedule all set.mm theorems for minimization on 24 processes and assumes
	# the metamath program is in the current directory. The script
	# outputs N files of the form job101.cmd, job102.cmd ... , where N is the number of jobs.
	#
	# The script works by applying the Longest Processing Time algorithm where it is assumed
	# that the tasks (i.e. theorems) take time proportional to their proof size. See comments
	# for further implementation details.

	# Metamath executable location.
	metamath_exe=$3

	# The number of CPUs (processes, threads, etc) wanted.
	cpus=$1

	# Declare the 'loads' array containing estimated load for each CPU.
	# Initially there's no load at all, so we set it at 0.
	declare -a loads=( $(for i in $(seq 1 $cpus); do echo 0; done) )

	# Declare a schedule array containing theorem labels to be processed by each CPU.
	declare -a schedule

	# Create a temporary file for our theorems queue.
	theorems_queue=$(mktemp)

	# Generate "theorem-label proof-size" pairs and sort them. The result will look like:
	# itgaddnclem2 26854
	# itg2addnc 25812
	# itg2addnclem 17111
	# ... etc
	# More precisely, do the following:
	$metamath_exe "read $2" 'show proof * /size' exit \| # Display all proof sizes with the metamath program
	tail -n +9 \| # Drop first 8 lines (the header)
	head -n -1 \| # Drop the last line (exit)
	awk '{gsub(/"/, "", $5); print $5, $7}' \| # Remove quotation marks from theorem labels and select 5th and 7th columns
	sort -h -r -k2 > $theorems_queue # Sort the result by the second column (proof size) in non-increasing order and send it to the temp file.

	# Read the temp file line by line and schedule theorems appropriately.
	while read -r line; do
	# Read the pair "label size" as a bash array
	theorem=($line)
	# Set label and size
	label=${theorem[0]}
	size=${theorem[1]}
	# Find the CPU with the smallest total load
	free_cpu=0
	min_load=${loads[0]}
	for i in "${!loads[@]}"; do
	if [ "${loads[i]}" -lt "$min_load" ]; then
	free_cpu=$i
	min_load=${loads[i]}
	fi
	done
	# Schedule the theorem to that CPU and update loads array
	schedule[$free_cpu]=$label" "${schedule[free_cpu]}
	(( loads[free_cpu]+=size ))
	done < "$theorems_queue"

	# Remove the temporary file.
	rm ${theorems_queue}

	# Turn each schedule into a job file.
	for i in "${!schedule[@]}"; do
	# Name job files like job101.cmd, job102.cmd etc
	job_file=$(printf "job%03d.cmd" $((i+101)))
	# Clear the file
	> $job_file
	# Go through scheduled theorems for i-th CPU and generate the job script
	for j in ${schedule[i]}; do
	echo "prove $j" >> $job_file
	echo "minimize_with * /allow_new_axioms * /no_new_axioms_from ax-* /time" >> $job_file
	echo "_exit_pa /force" >> $job_file
	done
	done