Last active
December 4, 2019 11:14
-
-
Save tueda/f44b42a12ac16c1966e9743e344615a1 to your computer and use it in GitHub Desktop.
Automatic settings of "form.set". #bin #python #form NOTE: migrating to https://github.com/tueda/formset.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
""":" . | |
exec python "$0" "$@" | |
""" | |
from __future__ import print_function | |
import argparse | |
import contextlib | |
import copy | |
import math | |
import os | |
import re | |
import subprocess | |
import sys | |
__doc__ = """\ | |
Generate form.set suited for the local machine. | |
Example | |
------- | |
$ formset.py -o | |
$ tform `formset.py -f` calcdia.frm | |
$ minos `formset.py -m` minos.file | |
Python versions | |
--------------- | |
2.7, 3.2, 3.3, 3.4, 3.5 | |
""" | |
if 'check_output' not in dir(subprocess): | |
# For old systems where Python 2.6 + argparse available. | |
def check_output(*popenargs, **kwargs): | |
"""Run a command.""" | |
if 'stdout' in kwargs: # pragma: no cover | |
raise ValueError('stdout argument not allowed, ' | |
'it will be overridden.') | |
process = subprocess.Popen(stdout=subprocess.PIPE, | |
*popenargs, **kwargs) | |
output, _ = process.communicate() | |
retcode = process.poll() | |
if retcode: | |
cmd = kwargs.get('args') | |
if cmd is None: | |
cmd = popenargs[0] | |
# `output` keyword is not available in 2.6. | |
raise subprocess.CalledProcessError(retcode, cmd) | |
return output | |
subprocess.check_output = check_output | |
@contextlib.contextmanager | |
def open_w_or_stdout(filename=None): | |
"""Context manager for a file or stdout.""" | |
if filename: | |
# See https://stackoverflow.com/a/2333979. | |
tmpfilename = '{0}.tmp{1}'.format(filename, os.getpid()) | |
f = open(tmpfilename, 'w') | |
try: | |
yield f | |
finally: | |
f.flush() | |
os.fsync(f.fileno()) | |
f.close() | |
os.rename(tmpfilename, filename) | |
else: | |
yield sys.stdout | |
def round_down(x, n): | |
"""Round down `x` to nearest `n`.""" | |
return x // n * n | |
def round_up(x, n): | |
"""Round up `x` to nearest `n`.""" | |
return (x + (n - 1)) // n * n | |
def metric_prefix(s): | |
"""Parse a metric prefix as a number.""" | |
s = s.lower() | |
if s == '': | |
return 1 | |
if s == 'k': | |
return 1000 | |
if s == 'm': | |
return 1000**2 | |
if s == 'g': | |
return 1000**3 | |
if s == 't': | |
return 1000**4 | |
return None | |
def parse_number(s): | |
"""Parse a string as a number with a possible metric prefix.""" | |
scale = 1 | |
m = re.match(r'(.*)([kmgtKMGT])$', s) | |
if m: | |
s = m.group(1) | |
scale = metric_prefix(m.group(2)) | |
# May raise ValueError. | |
return int(float(s) * scale) | |
def round_human_readable(x, up=False, tostring=True): | |
"""Round off `x` within a human readable form.""" | |
round_off = round_up if up else round_down | |
# Take 3 significant figures. | |
n = 10**(int(math.floor(math.log10(x))) - 2) | |
x = round_off(x, n) | |
# Find a good suffix which doesn't change the value. | |
xx = round_off(x, 1000**4) | |
if xx == x: | |
return '{0}T'.format(xx // 1000**4) if tostring else xx | |
xx = round_off(x, 1000**3) | |
if xx == x: | |
return '{0}G'.format(xx // 1000**3) if tostring else xx | |
xx = round_off(x, 1000**2) | |
if xx == x: | |
return '{0}M'.format(xx // 1000**2) if tostring else xx | |
xx = round_off(x, 1000) | |
if xx == x: | |
return '{0}K'.format(xx // 1000) if tostring else xx | |
return x | |
class classproperty(property): # noqa | |
"""Decorator to make a property of a class.""" | |
def __get__(self, cls, owner): | |
"""Getter.""" | |
return classmethod(self.fget).__get__(None, owner)() | |
class SystemInfo(object): | |
"""System information.""" | |
_cpu_info = None | |
_mem_info = None | |
verbose = False | |
@classproperty | |
def number_of_nodes(cls): # noqa | |
"""Return the number of nodes.""" | |
info = cls._get_cpu_info() | |
if 'NUMA node(s)' in info: | |
return int(info['NUMA node(s)']) | |
else: | |
return 1 | |
@classproperty | |
def number_of_cpus(cls): # noqa | |
"""Return the number of cpus.""" | |
info = cls._get_cpu_info() | |
return int(info['CPU(s)']) | |
@classproperty | |
def number_of_physical_cores(cls): # noqa | |
"""Return the number of physical cores.""" | |
info = cls._get_cpu_info() | |
return int(info['Socket(s)']) * int(info['Core(s) per socket']) | |
@classproperty | |
def total_memory(cls): # noqa | |
"""Return the total physical memory in bytes.""" | |
info = cls._get_mem_info() | |
return int(info['Mem'][0]) | |
@classmethod | |
def _get_cpu_info(cls): | |
if cls._cpu_info is None: | |
if cls.verbose: | |
sys.stderr.write('running lscpu...\n') | |
info = subprocess.check_output(['lscpu']) | |
info = info.decode('utf-8') | |
info = info.strip().split('\n') | |
info = [[ss.strip() for ss in s.split(':')] for s in info] | |
info = dict(info) | |
cls._cpu_info = info | |
return cls._cpu_info | |
@classmethod | |
def _get_mem_info(cls): | |
if cls._mem_info is None: | |
if cls.verbose: | |
sys.stderr.write('running free...\n') | |
info = subprocess.check_output(['free', '-b']) | |
info = info.decode('utf-8') | |
info = info.strip().split('\n') | |
info = [[ss.strip() for ss in s.split(':')] for s in info] | |
info = [s for s in info if len(s) == 2] | |
info = [[s[0], s[1].split()] for s in info] | |
info = dict(info) | |
cls._mem_info = info | |
return cls._mem_info | |
class Setup(object): | |
"""Setup parameters.""" | |
def __init__(self): | |
"""Construct a set of setup parameters.""" | |
self.compresssize = 90000 | |
self.filepatches = 256 | |
self.hidesize = 0 | |
self.largepatches = 256 | |
self.largesize = 50000000 | |
self.maxtermsize = 40000 # 64-bit | |
self.numstorecaches = 4 | |
self.scratchsize = 50000000 | |
self.sizestorecache = 32768 | |
self.smallextension = 20000000 | |
self.smallsize = 10000000 | |
self.sortiosize = 100000 | |
self.termsinsmall = 100000 | |
self.threadbucketsize = 500 | |
self.threads = -1 # form | |
self.threadscratchoutsize = 2500000 | |
self.threadscratchsize = 100000 | |
self.workspace = 40000000 # 64-bit | |
self.bracketindexsize = 200000 | |
self.constindex = 128 | |
self.continuationlines = 15 | |
self.functionlevels = 30 | |
self.maxnumbersize = 200 | |
self.maxwildcards = 100 | |
self.parentheses = 100 | |
self.processbucketsize = 1000 | |
self.subfilepatches = 64 | |
self.sublargepatches = 64 | |
self.sublargesize = 4000000 | |
self.subsmallextension = 800000 | |
self.subsmallsize = 500000 | |
self.subsortiosize = 32768 | |
self.subtermsinsmall = 10000 | |
# 64-bit | |
self._ptrsize = 8 | |
self._possize = 8 | |
self._wordsize = 4 | |
def items(self): | |
"""Return pairs of parameters and values.""" | |
items = [(k, v) for (k, v) in self.__dict__.items() if k[0] != '_'] | |
items.sort() | |
return tuple(items) | |
def __str__(self): | |
"""Return the string representaiton.""" | |
mem = self.calc() | |
params = ['{0}: {1}'.format(k, v) for (k, v) in self.items()] | |
return '<Setup: {0} bytes, {1}>'.format(mem, ', '.join(params)) | |
def copy(self): | |
"""Return a shallow copy.""" | |
return copy.copy(self) | |
def calc(self): | |
"""Return an estimation of memory usage.""" | |
self.maxtermsize = max(self.maxtermsize, 200) | |
self.compresssize = max(self.compresssize, | |
2 * self.maxtermsize * self._wordsize) | |
self.sortiosize = max(self.sortiosize, | |
self.maxtermsize * self._wordsize) | |
# The strange factor WordSize**2 is used in the FORM source... | |
self.scratchsize = max(self.scratchsize, | |
4 * self.maxtermsize * self._wordsize**2) | |
if self.hidesize > 0: | |
self.hidesize = max(self.hidesize, | |
4 * self.maxtermsize * self._wordsize**2) | |
self.threadscratchsize = max(self.threadscratchsize, | |
4 * self.maxtermsize * self._wordsize**2) | |
self.threadscratchoutsize = max(self.threadscratchoutsize, | |
4 * self.maxtermsize * | |
self._wordsize**2) | |
# constraints in RecalcSetups() | |
self.filepatches = max(self.filepatches, self.threads) | |
self.termsinsmall = round_up(self.termsinsmall, 16) | |
numberofblocksinsort = 10 | |
minimumnumberofterms = 10 | |
n = numberofblocksinsort * minimumnumberofterms | |
if self.threads >= 0: | |
minbufsize = (self.threads * (1 + n) * self.maxtermsize * | |
self._wordsize) | |
if self.largesize + self.smallextension < minbufsize: | |
self.largesize = minbufsize - self.smallextension | |
# constraints in AllocSort() | |
self.filepatches = max(self.filepatches, 4) | |
self.smallsize = max(self.smallsize, | |
16 * self.maxtermsize * self._wordsize) | |
self.smallextension = max(self.smallextension, self.smallsize * 3 // 2) | |
if self.largesize > 0: | |
self.largesize = max(self.largesize, 2 * self.smallsize) | |
compinc = 2 | |
minbufsize = self.filepatches * (self.sortiosize + | |
(compinc + 2 * self.maxtermsize) * | |
self._wordsize) | |
if self.largesize + self.smallextension < minbufsize: | |
if self.largesize == 0: | |
self.smallextension = minbufsize | |
else: | |
self.largesize = minbufsize - self.smallextension | |
iotry = (((self.largesize + self.smallextension) // self.filepatches // | |
self._wordsize) - 2 * self.maxtermsize - compinc) # in words | |
self.sortiosize = max(self.sortiosize, iotry) # bytes vs. words?? | |
# Compute the memory usage. | |
mem = 0 | |
mem += (self.scratchsize * 2 + (self.hidesize | |
if self.hidesize > 0 | |
else self.scratchsize)) | |
mem += self.workspace * self._wordsize | |
mem += (self.compresssize + 10) * self._wordsize | |
mem += (self.largesize + self.smallextension + 3 * self.termsinsmall * | |
self._ptrsize + self.sortiosize) | |
storecachesize = self._possize * 2 * self._ptrsize + self._wordsize | |
# ignore the padding | |
storecachesize += self.sizestorecache | |
mem += storecachesize * self.numstorecaches | |
if self.threads >= 1: | |
mem += ((self.threadscratchoutsize + self.threadscratchsize * 2) * | |
self.threads) | |
mem += self.workspace * self._wordsize * self.threads | |
mem += (self.compresssize + 10) * self._wordsize * self.threads | |
mem += self._thread_alloc_sort(self.largesize // self.threads, | |
self.smallsize // self.threads, | |
self.smallextension // self.threads, | |
self.termsinsmall, | |
self.largepatches, | |
self.filepatches // self.threads, | |
self.sortiosize) * self.threads | |
mem += storecachesize * self.numstorecaches * self.threads | |
sizethreadbuckets = ((self.threadbucketsize + 1) * | |
self.maxtermsize + 2) * self._wordsize | |
if self.threadbucketsize >= 250: | |
sizethreadbuckets //= 4 | |
elif self.threadbucketsize >= 90: | |
sizethreadbuckets //= 3 | |
elif self.threadbucketsize >= 40: | |
sizethreadbuckets //= 2 | |
sizethreadbuckets //= self._wordsize | |
mem += ((2 * sizethreadbuckets * self._wordsize + | |
(self.threadbucketsize + 1) * self._possize) * | |
2 * self.threads) | |
if self.threads >= 3: | |
mem += ((self.workspace * self._wordsize // 8 + | |
2 * self.maxtermsize * self._wordsize) * | |
(self.threads - 2)) | |
return mem | |
def _thread_alloc_sort(self, largesize, smallsize, smallextension, | |
termsinsmall, largepatches, filepatches, | |
sortiosize): | |
filepatches = max(filepatches, 4) | |
smallsize = max(smallsize, 16 * self.maxtermsize * self._wordsize) | |
smallextension = max(smallextension, smallsize * 3 // 2) | |
if largesize > 0: | |
largesize = max(largesize, 2 * smallsize) | |
compinc = 2 | |
minbufsize = filepatches * (sortiosize + (compinc + | |
2 * self.maxtermsize) * self._wordsize) | |
if largesize + smallextension < minbufsize: | |
if largesize == 0: | |
smallextension = minbufsize | |
else: | |
largesize = minbufsize - smallextension | |
iotry = (((largesize + smallextension) // filepatches // | |
self._wordsize) - 2 * self.maxtermsize - compinc) # in words | |
sortiosize = max(sortiosize, iotry) # bytes vs. words?? | |
return (largesize + smallextension + 3 * termsinsmall * self._ptrsize + | |
sortiosize) | |
def main(): | |
"""Entry point.""" | |
# Parse the command line arguments. | |
parser = argparse.ArgumentParser( | |
usage=('%(prog)s [options] [--] ' | |
'[par=val].. [par+=int].. [par*=float]..'), | |
epilog=('On non-Linux systems, the number of physical CPUs and memory ' | |
'available on the machine may be not automatically detected. ' | |
'In such a case, one cannot use the default parameters ' | |
'depending on those values and needs to explicitly specify ' | |
'--ncpus, --total-cpus and --total-memory.'), | |
add_help=False | |
) | |
parser.add_argument('-h', | |
'--help', | |
action='store_const', | |
const=True, | |
help='show this help message and exit') | |
parser.add_argument('-o', | |
'--output', | |
action='store', | |
nargs='?', | |
const='form.set', | |
help=('output to FILE (default: no (stdout), ' | |
'FILE=form.set)'), | |
metavar='FILE') | |
parser.add_argument('-f', | |
'--form', | |
action='store_const', | |
const=True, | |
help='print tform options (e.g., -w4) and exit') | |
parser.add_argument('-m', | |
'--minos', | |
action='store_const', | |
const=True, | |
help='print minos options (e.g., -m2x4) and exit') | |
parser.add_argument('-u', | |
'--usage', | |
action='store_const', | |
const=True, | |
help='print expected initial memory usage and exit') | |
parser.add_argument('-H', | |
'--human-readable', | |
action='store_const', | |
const=True, | |
help=('adjust to human-readable numbers ' | |
'(e.g., 1K, 23M, 456G)')) | |
parser.add_argument('-1', | |
'--one', | |
action='store_const', | |
const=-1, | |
dest='ncpus', | |
help='use cpus in a node on the machine (default)') | |
parser.add_argument('--full', | |
action='store_const', | |
const=-99999, | |
dest='ncpus', | |
help='use cpus in all nodes on the machine') | |
parser.add_argument('-n', | |
'--ncpus', | |
action='store', | |
type=int, | |
help='use N cpus', | |
metavar='N') | |
parser.add_argument('-p', | |
'--percentage', | |
action='store', | |
default=75.0, | |
type=float, | |
help=('percentage of initial memory usage ' | |
'(default: 75.0)'), | |
metavar='N') | |
parser.add_argument('--total-cpus', | |
action='store', | |
type=int, | |
help='specify the total cpus on the machine', | |
metavar='N') | |
parser.add_argument('--total-memory', | |
action='store', | |
help='specify the total memory on the machine', | |
metavar='N') | |
parser.add_argument('-v', | |
'--verbose', | |
action='store_const', | |
const=True, | |
help='verbose output') | |
parser.add_argument('args', | |
nargs='*', | |
help=argparse.SUPPRESS) | |
args = parser.parse_args() | |
pars = {} | |
# NOTE: when all of `--ncpus`, `--total-cpus` and `--total-memory` are | |
# specified, we don't need to access the system information. | |
if args.verbose: | |
SystemInfo.verbose = True | |
if args.total_cpus: | |
total_cpus = args.total_cpus | |
else: | |
total_cpus = SystemInfo.number_of_physical_cores | |
if args.total_memory: | |
try: | |
total_memory = parse_number(args.total_memory) | |
except ValueError: | |
parser.error('non-integer value for total memory: {0}'.format( | |
args.total_memory)) | |
else: | |
total_memory = SystemInfo.total_memory | |
# Help message. | |
if args.help: | |
parser.print_help() | |
exit(0) | |
# Number of CPUs. | |
if args.ncpus is not None: | |
ncpus = args.ncpus | |
else: | |
# Use 1 node for each job by default. | |
ncpus = -1 | |
if ncpus < 0: | |
# Use (-ncpus) nodes. | |
ncpus = -ncpus * (total_cpus // SystemInfo.number_of_nodes) | |
ncpus = max(ncpus, 1) | |
ncpus = min(ncpus, total_cpus) | |
sp = Setup() | |
sp.threads = ncpus if ncpus >= 2 else -1 | |
for a in args.args: | |
m = re.match(r'([a-zA-Z][a-zA-Z0-9]*)([+*]?)=(.*)', a) | |
if m: | |
par = m.group(1).lower() | |
ope = m.group(2) | |
val = m.group(3) | |
if par in sp.__dict__: | |
# Known parameter. | |
if ope == '' or ope == '+': | |
# We have par=val or par+=int. | |
try: | |
val = parse_number(val) | |
except ValueError: | |
parser.error( | |
'non-integer value for parameter: {0}'.format(a)) | |
if ope == '': | |
setattr(sp, par, val) | |
else: | |
setattr(sp, par, getattr(sp, par) + val) | |
continue | |
else: | |
# We have par*=float. | |
try: | |
val = float(val) | |
except ValueError: | |
parser.error( | |
'non-float value for parameter: {0}'.format(a)) | |
setattr(sp, par, int(getattr(sp, par) * val)) | |
continue | |
elif ope == '': | |
# Unknown parameter given by par=val. Add it to the dictionary. | |
pars[par] = val | |
continue | |
parser.error('unrecognized argument: {0}'.format(a)) | |
# Our resource. | |
cpus = max(sp.threads, 1) | |
memory = int(total_memory * args.percentage / 100.0 * cpus / total_cpus) | |
# For --form option. | |
if args.form: | |
print('-w{0}'.format(cpus)) | |
exit() | |
# For --minos option. | |
if args.minos: | |
print('-m{0}x{1}'.format(total_cpus // cpus, cpus)) | |
exit() | |
# Presumably increasing MaxTermSize requires increasing WorkSpace, too. | |
sp.workspace = max(sp.workspace, sp.maxtermsize * 250) | |
# Optimize the memory usage by bisection. | |
max_iteration = 50 | |
sp0 = sp.copy() | |
def f(x): | |
# Hopefully monochrome increasing. | |
sp = sp0.copy() | |
sp.smallsize = int(sp.smallsize * x) | |
sp.largesize = int(sp.largesize * x) | |
sp.termsinsmall = int(sp.termsinsmall * x) | |
sp.scratchsize = int(sp.scratchsize * x) | |
m = sp.calc() | |
if args.human_readable: | |
m = round_human_readable(m, True, False) | |
return (- (memory - m), sp) | |
x1 = 1.0 | |
x2 = None | |
y1 = f(x1)[0] | |
y2 = None | |
for _i in range(max_iteration): | |
if x2 is None: | |
if y1 < 0: | |
x = x1 * 2.0 | |
y = f(x)[0] | |
if y > 0: | |
x2 = x | |
y2 = y | |
else: | |
x1 = x | |
y1 = y | |
else: | |
x = x1 * 0.5 | |
y = f(x)[0] | |
if y < 0: | |
x2 = x1 | |
y2 = y1 | |
x1 = x | |
y1 = y | |
else: | |
x1 = x | |
y1 = y | |
else: | |
x = (x1 + x2) * 0.5 | |
y = f(x)[0] | |
if y < 0: | |
x1 = x | |
y1 = y | |
else: | |
x2 = x | |
y2 = y | |
if x2 is not None: | |
assert x1 < x2 and y1 < y2 | |
if x2 is None: | |
if x1 < 1.0e-12: | |
x1 = 0 | |
parser.exit(('failed to find parameters: memory({0}) = {1} ' | |
'bytes shortage').format(x1, y1)) | |
# For --usage option. | |
if args.usage: | |
m = f(x1)[1].calc() | |
if args.human_readable: | |
m = round_human_readable(m, True) | |
print(m) | |
exit() | |
# Output. | |
with open_w_or_stdout(args.output) as fi: | |
def round_memory(m): | |
return (round_human_readable(m, False) | |
if args.human_readable else m) | |
print(('# {0}{1} (cpu: {2}, mem: {3}; ' | |
'total cpu: {4}, total mem: {5}; {6}x{7})').format( | |
parser.prog, | |
(' ' if len(sys.argv) >= 2 else '') + ' '.join(sys.argv[1:]), | |
cpus, | |
round_memory(memory), | |
total_cpus, | |
round_memory(total_memory), | |
total_cpus // cpus, | |
cpus, | |
), file=fi) | |
sp = f(x1)[1] | |
sp0 = Setup() # default value | |
dic0 = dict(sp0.items()) | |
for k, v in sp.items(): | |
if k == 'threads': | |
# 'threads N' doesn't work, must be given by tform option -wN. | |
continue | |
if v == dic0[k]: | |
# Don't write when same as the default value. | |
continue | |
if args.human_readable: | |
v = round_human_readable(v, False) | |
print('{0} {1}'.format(k, v), file=fi) | |
for k, v in pars.items(): | |
print('{0} {1}'.format(k, v), file=fi) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment