Skip to content

Instantly share code, notes, and snippets.

@tueda
Last active December 4, 2019 11:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tueda/f44b42a12ac16c1966e9743e344615a1 to your computer and use it in GitHub Desktop.
Save tueda/f44b42a12ac16c1966e9743e344615a1 to your computer and use it in GitHub Desktop.
Automatic settings of "form.set". #bin #python #form NOTE: migrating to https://github.com/tueda/formset.
#!/bin/sh
""":" .
exec python "$0" "$@"
"""
from __future__ import print_function
import argparse
import contextlib
import copy
import math
import os
import re
import subprocess
import sys
__doc__ = """\
Generate form.set suited for the local machine.
Example
-------
$ formset.py -o
$ tform `formset.py -f` calcdia.frm
$ minos `formset.py -m` minos.file
Python versions
---------------
2.7, 3.2, 3.3, 3.4, 3.5
"""
if 'check_output' not in dir(subprocess):
# For old systems where Python 2.6 + argparse available.
def check_output(*popenargs, **kwargs):
"""Run a command."""
if 'stdout' in kwargs: # pragma: no cover
raise ValueError('stdout argument not allowed, '
'it will be overridden.')
process = subprocess.Popen(stdout=subprocess.PIPE,
*popenargs, **kwargs)
output, _ = process.communicate()
retcode = process.poll()
if retcode:
cmd = kwargs.get('args')
if cmd is None:
cmd = popenargs[0]
# `output` keyword is not available in 2.6.
raise subprocess.CalledProcessError(retcode, cmd)
return output
subprocess.check_output = check_output
@contextlib.contextmanager
def open_w_or_stdout(filename=None):
"""Context manager for a file or stdout."""
if filename:
# See https://stackoverflow.com/a/2333979.
tmpfilename = '{0}.tmp{1}'.format(filename, os.getpid())
f = open(tmpfilename, 'w')
try:
yield f
finally:
f.flush()
os.fsync(f.fileno())
f.close()
os.rename(tmpfilename, filename)
else:
yield sys.stdout
def round_down(x, n):
"""Round down `x` to nearest `n`."""
return x // n * n
def round_up(x, n):
"""Round up `x` to nearest `n`."""
return (x + (n - 1)) // n * n
def metric_prefix(s):
"""Parse a metric prefix as a number."""
s = s.lower()
if s == '':
return 1
if s == 'k':
return 1000
if s == 'm':
return 1000**2
if s == 'g':
return 1000**3
if s == 't':
return 1000**4
return None
def parse_number(s):
"""Parse a string as a number with a possible metric prefix."""
scale = 1
m = re.match(r'(.*)([kmgtKMGT])$', s)
if m:
s = m.group(1)
scale = metric_prefix(m.group(2))
# May raise ValueError.
return int(float(s) * scale)
def round_human_readable(x, up=False, tostring=True):
"""Round off `x` within a human readable form."""
round_off = round_up if up else round_down
# Take 3 significant figures.
n = 10**(int(math.floor(math.log10(x))) - 2)
x = round_off(x, n)
# Find a good suffix which doesn't change the value.
xx = round_off(x, 1000**4)
if xx == x:
return '{0}T'.format(xx // 1000**4) if tostring else xx
xx = round_off(x, 1000**3)
if xx == x:
return '{0}G'.format(xx // 1000**3) if tostring else xx
xx = round_off(x, 1000**2)
if xx == x:
return '{0}M'.format(xx // 1000**2) if tostring else xx
xx = round_off(x, 1000)
if xx == x:
return '{0}K'.format(xx // 1000) if tostring else xx
return x
class classproperty(property): # noqa
"""Decorator to make a property of a class."""
def __get__(self, cls, owner):
"""Getter."""
return classmethod(self.fget).__get__(None, owner)()
class SystemInfo(object):
"""System information."""
_cpu_info = None
_mem_info = None
verbose = False
@classproperty
def number_of_nodes(cls): # noqa
"""Return the number of nodes."""
info = cls._get_cpu_info()
if 'NUMA node(s)' in info:
return int(info['NUMA node(s)'])
else:
return 1
@classproperty
def number_of_cpus(cls): # noqa
"""Return the number of cpus."""
info = cls._get_cpu_info()
return int(info['CPU(s)'])
@classproperty
def number_of_physical_cores(cls): # noqa
"""Return the number of physical cores."""
info = cls._get_cpu_info()
return int(info['Socket(s)']) * int(info['Core(s) per socket'])
@classproperty
def total_memory(cls): # noqa
"""Return the total physical memory in bytes."""
info = cls._get_mem_info()
return int(info['Mem'][0])
@classmethod
def _get_cpu_info(cls):
if cls._cpu_info is None:
if cls.verbose:
sys.stderr.write('running lscpu...\n')
info = subprocess.check_output(['lscpu'])
info = info.decode('utf-8')
info = info.strip().split('\n')
info = [[ss.strip() for ss in s.split(':')] for s in info]
info = dict(info)
cls._cpu_info = info
return cls._cpu_info
@classmethod
def _get_mem_info(cls):
if cls._mem_info is None:
if cls.verbose:
sys.stderr.write('running free...\n')
info = subprocess.check_output(['free', '-b'])
info = info.decode('utf-8')
info = info.strip().split('\n')
info = [[ss.strip() for ss in s.split(':')] for s in info]
info = [s for s in info if len(s) == 2]
info = [[s[0], s[1].split()] for s in info]
info = dict(info)
cls._mem_info = info
return cls._mem_info
class Setup(object):
"""Setup parameters."""
def __init__(self):
"""Construct a set of setup parameters."""
self.compresssize = 90000
self.filepatches = 256
self.hidesize = 0
self.largepatches = 256
self.largesize = 50000000
self.maxtermsize = 40000 # 64-bit
self.numstorecaches = 4
self.scratchsize = 50000000
self.sizestorecache = 32768
self.smallextension = 20000000
self.smallsize = 10000000
self.sortiosize = 100000
self.termsinsmall = 100000
self.threadbucketsize = 500
self.threads = -1 # form
self.threadscratchoutsize = 2500000
self.threadscratchsize = 100000
self.workspace = 40000000 # 64-bit
self.bracketindexsize = 200000
self.constindex = 128
self.continuationlines = 15
self.functionlevels = 30
self.maxnumbersize = 200
self.maxwildcards = 100
self.parentheses = 100
self.processbucketsize = 1000
self.subfilepatches = 64
self.sublargepatches = 64
self.sublargesize = 4000000
self.subsmallextension = 800000
self.subsmallsize = 500000
self.subsortiosize = 32768
self.subtermsinsmall = 10000
# 64-bit
self._ptrsize = 8
self._possize = 8
self._wordsize = 4
def items(self):
"""Return pairs of parameters and values."""
items = [(k, v) for (k, v) in self.__dict__.items() if k[0] != '_']
items.sort()
return tuple(items)
def __str__(self):
"""Return the string representaiton."""
mem = self.calc()
params = ['{0}: {1}'.format(k, v) for (k, v) in self.items()]
return '<Setup: {0} bytes, {1}>'.format(mem, ', '.join(params))
def copy(self):
"""Return a shallow copy."""
return copy.copy(self)
def calc(self):
"""Return an estimation of memory usage."""
self.maxtermsize = max(self.maxtermsize, 200)
self.compresssize = max(self.compresssize,
2 * self.maxtermsize * self._wordsize)
self.sortiosize = max(self.sortiosize,
self.maxtermsize * self._wordsize)
# The strange factor WordSize**2 is used in the FORM source...
self.scratchsize = max(self.scratchsize,
4 * self.maxtermsize * self._wordsize**2)
if self.hidesize > 0:
self.hidesize = max(self.hidesize,
4 * self.maxtermsize * self._wordsize**2)
self.threadscratchsize = max(self.threadscratchsize,
4 * self.maxtermsize * self._wordsize**2)
self.threadscratchoutsize = max(self.threadscratchoutsize,
4 * self.maxtermsize *
self._wordsize**2)
# constraints in RecalcSetups()
self.filepatches = max(self.filepatches, self.threads)
self.termsinsmall = round_up(self.termsinsmall, 16)
numberofblocksinsort = 10
minimumnumberofterms = 10
n = numberofblocksinsort * minimumnumberofterms
if self.threads >= 0:
minbufsize = (self.threads * (1 + n) * self.maxtermsize *
self._wordsize)
if self.largesize + self.smallextension < minbufsize:
self.largesize = minbufsize - self.smallextension
# constraints in AllocSort()
self.filepatches = max(self.filepatches, 4)
self.smallsize = max(self.smallsize,
16 * self.maxtermsize * self._wordsize)
self.smallextension = max(self.smallextension, self.smallsize * 3 // 2)
if self.largesize > 0:
self.largesize = max(self.largesize, 2 * self.smallsize)
compinc = 2
minbufsize = self.filepatches * (self.sortiosize +
(compinc + 2 * self.maxtermsize) *
self._wordsize)
if self.largesize + self.smallextension < minbufsize:
if self.largesize == 0:
self.smallextension = minbufsize
else:
self.largesize = minbufsize - self.smallextension
iotry = (((self.largesize + self.smallextension) // self.filepatches //
self._wordsize) - 2 * self.maxtermsize - compinc) # in words
self.sortiosize = max(self.sortiosize, iotry) # bytes vs. words??
# Compute the memory usage.
mem = 0
mem += (self.scratchsize * 2 + (self.hidesize
if self.hidesize > 0
else self.scratchsize))
mem += self.workspace * self._wordsize
mem += (self.compresssize + 10) * self._wordsize
mem += (self.largesize + self.smallextension + 3 * self.termsinsmall *
self._ptrsize + self.sortiosize)
storecachesize = self._possize * 2 * self._ptrsize + self._wordsize
# ignore the padding
storecachesize += self.sizestorecache
mem += storecachesize * self.numstorecaches
if self.threads >= 1:
mem += ((self.threadscratchoutsize + self.threadscratchsize * 2) *
self.threads)
mem += self.workspace * self._wordsize * self.threads
mem += (self.compresssize + 10) * self._wordsize * self.threads
mem += self._thread_alloc_sort(self.largesize // self.threads,
self.smallsize // self.threads,
self.smallextension // self.threads,
self.termsinsmall,
self.largepatches,
self.filepatches // self.threads,
self.sortiosize) * self.threads
mem += storecachesize * self.numstorecaches * self.threads
sizethreadbuckets = ((self.threadbucketsize + 1) *
self.maxtermsize + 2) * self._wordsize
if self.threadbucketsize >= 250:
sizethreadbuckets //= 4
elif self.threadbucketsize >= 90:
sizethreadbuckets //= 3
elif self.threadbucketsize >= 40:
sizethreadbuckets //= 2
sizethreadbuckets //= self._wordsize
mem += ((2 * sizethreadbuckets * self._wordsize +
(self.threadbucketsize + 1) * self._possize) *
2 * self.threads)
if self.threads >= 3:
mem += ((self.workspace * self._wordsize // 8 +
2 * self.maxtermsize * self._wordsize) *
(self.threads - 2))
return mem
def _thread_alloc_sort(self, largesize, smallsize, smallextension,
termsinsmall, largepatches, filepatches,
sortiosize):
filepatches = max(filepatches, 4)
smallsize = max(smallsize, 16 * self.maxtermsize * self._wordsize)
smallextension = max(smallextension, smallsize * 3 // 2)
if largesize > 0:
largesize = max(largesize, 2 * smallsize)
compinc = 2
minbufsize = filepatches * (sortiosize + (compinc +
2 * self.maxtermsize) * self._wordsize)
if largesize + smallextension < minbufsize:
if largesize == 0:
smallextension = minbufsize
else:
largesize = minbufsize - smallextension
iotry = (((largesize + smallextension) // filepatches //
self._wordsize) - 2 * self.maxtermsize - compinc) # in words
sortiosize = max(sortiosize, iotry) # bytes vs. words??
return (largesize + smallextension + 3 * termsinsmall * self._ptrsize +
sortiosize)
def main():
"""Entry point."""
# Parse the command line arguments.
parser = argparse.ArgumentParser(
usage=('%(prog)s [options] [--] '
'[par=val].. [par+=int].. [par*=float]..'),
epilog=('On non-Linux systems, the number of physical CPUs and memory '
'available on the machine may be not automatically detected. '
'In such a case, one cannot use the default parameters '
'depending on those values and needs to explicitly specify '
'--ncpus, --total-cpus and --total-memory.'),
add_help=False
)
parser.add_argument('-h',
'--help',
action='store_const',
const=True,
help='show this help message and exit')
parser.add_argument('-o',
'--output',
action='store',
nargs='?',
const='form.set',
help=('output to FILE (default: no (stdout), '
'FILE=form.set)'),
metavar='FILE')
parser.add_argument('-f',
'--form',
action='store_const',
const=True,
help='print tform options (e.g., -w4) and exit')
parser.add_argument('-m',
'--minos',
action='store_const',
const=True,
help='print minos options (e.g., -m2x4) and exit')
parser.add_argument('-u',
'--usage',
action='store_const',
const=True,
help='print expected initial memory usage and exit')
parser.add_argument('-H',
'--human-readable',
action='store_const',
const=True,
help=('adjust to human-readable numbers '
'(e.g., 1K, 23M, 456G)'))
parser.add_argument('-1',
'--one',
action='store_const',
const=-1,
dest='ncpus',
help='use cpus in a node on the machine (default)')
parser.add_argument('--full',
action='store_const',
const=-99999,
dest='ncpus',
help='use cpus in all nodes on the machine')
parser.add_argument('-n',
'--ncpus',
action='store',
type=int,
help='use N cpus',
metavar='N')
parser.add_argument('-p',
'--percentage',
action='store',
default=75.0,
type=float,
help=('percentage of initial memory usage '
'(default: 75.0)'),
metavar='N')
parser.add_argument('--total-cpus',
action='store',
type=int,
help='specify the total cpus on the machine',
metavar='N')
parser.add_argument('--total-memory',
action='store',
help='specify the total memory on the machine',
metavar='N')
parser.add_argument('-v',
'--verbose',
action='store_const',
const=True,
help='verbose output')
parser.add_argument('args',
nargs='*',
help=argparse.SUPPRESS)
args = parser.parse_args()
pars = {}
# NOTE: when all of `--ncpus`, `--total-cpus` and `--total-memory` are
# specified, we don't need to access the system information.
if args.verbose:
SystemInfo.verbose = True
if args.total_cpus:
total_cpus = args.total_cpus
else:
total_cpus = SystemInfo.number_of_physical_cores
if args.total_memory:
try:
total_memory = parse_number(args.total_memory)
except ValueError:
parser.error('non-integer value for total memory: {0}'.format(
args.total_memory))
else:
total_memory = SystemInfo.total_memory
# Help message.
if args.help:
parser.print_help()
exit(0)
# Number of CPUs.
if args.ncpus is not None:
ncpus = args.ncpus
else:
# Use 1 node for each job by default.
ncpus = -1
if ncpus < 0:
# Use (-ncpus) nodes.
ncpus = -ncpus * (total_cpus // SystemInfo.number_of_nodes)
ncpus = max(ncpus, 1)
ncpus = min(ncpus, total_cpus)
sp = Setup()
sp.threads = ncpus if ncpus >= 2 else -1
for a in args.args:
m = re.match(r'([a-zA-Z][a-zA-Z0-9]*)([+*]?)=(.*)', a)
if m:
par = m.group(1).lower()
ope = m.group(2)
val = m.group(3)
if par in sp.__dict__:
# Known parameter.
if ope == '' or ope == '+':
# We have par=val or par+=int.
try:
val = parse_number(val)
except ValueError:
parser.error(
'non-integer value for parameter: {0}'.format(a))
if ope == '':
setattr(sp, par, val)
else:
setattr(sp, par, getattr(sp, par) + val)
continue
else:
# We have par*=float.
try:
val = float(val)
except ValueError:
parser.error(
'non-float value for parameter: {0}'.format(a))
setattr(sp, par, int(getattr(sp, par) * val))
continue
elif ope == '':
# Unknown parameter given by par=val. Add it to the dictionary.
pars[par] = val
continue
parser.error('unrecognized argument: {0}'.format(a))
# Our resource.
cpus = max(sp.threads, 1)
memory = int(total_memory * args.percentage / 100.0 * cpus / total_cpus)
# For --form option.
if args.form:
print('-w{0}'.format(cpus))
exit()
# For --minos option.
if args.minos:
print('-m{0}x{1}'.format(total_cpus // cpus, cpus))
exit()
# Presumably increasing MaxTermSize requires increasing WorkSpace, too.
sp.workspace = max(sp.workspace, sp.maxtermsize * 250)
# Optimize the memory usage by bisection.
max_iteration = 50
sp0 = sp.copy()
def f(x):
# Hopefully monochrome increasing.
sp = sp0.copy()
sp.smallsize = int(sp.smallsize * x)
sp.largesize = int(sp.largesize * x)
sp.termsinsmall = int(sp.termsinsmall * x)
sp.scratchsize = int(sp.scratchsize * x)
m = sp.calc()
if args.human_readable:
m = round_human_readable(m, True, False)
return (- (memory - m), sp)
x1 = 1.0
x2 = None
y1 = f(x1)[0]
y2 = None
for _i in range(max_iteration):
if x2 is None:
if y1 < 0:
x = x1 * 2.0
y = f(x)[0]
if y > 0:
x2 = x
y2 = y
else:
x1 = x
y1 = y
else:
x = x1 * 0.5
y = f(x)[0]
if y < 0:
x2 = x1
y2 = y1
x1 = x
y1 = y
else:
x1 = x
y1 = y
else:
x = (x1 + x2) * 0.5
y = f(x)[0]
if y < 0:
x1 = x
y1 = y
else:
x2 = x
y2 = y
if x2 is not None:
assert x1 < x2 and y1 < y2
if x2 is None:
if x1 < 1.0e-12:
x1 = 0
parser.exit(('failed to find parameters: memory({0}) = {1} '
'bytes shortage').format(x1, y1))
# For --usage option.
if args.usage:
m = f(x1)[1].calc()
if args.human_readable:
m = round_human_readable(m, True)
print(m)
exit()
# Output.
with open_w_or_stdout(args.output) as fi:
def round_memory(m):
return (round_human_readable(m, False)
if args.human_readable else m)
print(('# {0}{1} (cpu: {2}, mem: {3}; '
'total cpu: {4}, total mem: {5}; {6}x{7})').format(
parser.prog,
(' ' if len(sys.argv) >= 2 else '') + ' '.join(sys.argv[1:]),
cpus,
round_memory(memory),
total_cpus,
round_memory(total_memory),
total_cpus // cpus,
cpus,
), file=fi)
sp = f(x1)[1]
sp0 = Setup() # default value
dic0 = dict(sp0.items())
for k, v in sp.items():
if k == 'threads':
# 'threads N' doesn't work, must be given by tform option -wN.
continue
if v == dic0[k]:
# Don't write when same as the default value.
continue
if args.human_readable:
v = round_human_readable(v, False)
print('{0} {1}'.format(k, v), file=fi)
for k, v in pars.items():
print('{0} {1}'.format(k, v), file=fi)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment