Skip to content

Instantly share code, notes, and snippets.

@bergercookie
Created September 15, 2021 16:46
Show Gist options
  • Save bergercookie/5ce1e88ff206b13cd9c5f40abfca3737 to your computer and use it in GitHub Desktop.
Save bergercookie/5ce1e88ff206b13cd9c5f40abfca3737 to your computer and use it in GitHub Desktop.
Compute stats for time-data returned from the time shell command
#!/usr/bin/env python3
import argparse
import operator
import sys
from enum import Enum
from functools import cached_property, reduce
from pathlib import Path
from typing import List
import numpy as np
"""Compute relevant statistics (mean, standard deviation) given a list of time measurements."""
class Period(Enum):
"""A Period, a fraction (Numerator, Denominator) representing the time in seconds."""
Yocto = (1, 1_000_000_000_000_000_000_000)
Zepto = (1, 1_000_000_000_000_000_000_000)
Atto = (1, 1_000_000_000_000_000_000)
Femto = (1, 1_000_000_000_000_000)
Pico = (1, 1_000_000_000_000)
Nano = (1, 1_000_000_000)
Micro = (1, 1_000_000)
Milli = (1, 1000)
Second = (1, 1)
Minute = (60, 1)
Hour = (60 * 60, 1)
Day = (24 * 60 * 60, 1)
Week = (7 * 24 * 60 * 60, 1)
Month = (2629746, 1)
Year = (31556952, 1)
@cached_property
def num(self):
return self.value[0]
@cached_property
def denom(self):
return self.value[1]
def __ge__(self, other):
"""
Examples
--------
>>> Period.Yocto < Period.Minute
True
>>> Period.Yocto > Period.Minute
False
>>> Period.Yocto >= Period.Minute
False
>>> Period.Yocto <= Period.Minute
True
>>> Period.Minute == Period.Minute
True
>>> Period.Minute > Period.Minute
False
>>> Period.Femto > Period.Nano
False
>>> Period.Femto < Period.Nano
True
"""
return _periods.index(self) >= _periods.index(other)
def __gt__(self, other):
return _periods.index(self) > _periods.index(other)
def __lt__(self, other):
return _periods.index(self) < _periods.index(other)
def __le__(self, other):
return _periods.index(self) <= _periods.index(other)
_periods = list(Period)
class Duration:
def __init__(self, rep: float, period: Period):
self.rep = rep
self.period = period
def to(self, other_period: Period) -> "Duration":
if self.period == other_period:
return Duration(rep=self.rep, period=self.period)
rep = self.rep * int(
self.period.num * other_period.denom / (self.period.denom * other_period.num)
)
period = other_period
return Duration(rep=rep, period=period)
def __add__(self, other: "Duration") -> "Duration":
"""
Add two duration instances.
Examples
---------
>>> d1 = Duration(rep=123, period=Period.Second)
>>> d2 = Duration(rep=456, period=Period.Minute)
>>> d3 = d1 + d2
>>> d3.rep
27483
>>> d3.period.name
'Second'
"""
# same period, just add the reps
if self.period == other.period:
return Duration(rep=self.rep + other.rep, period=self.period)
# different period, convert
target_period = min(self.period, other.period)
return self.to(target_period) + other.to(target_period)
def __str__(self):
return f"{self.rep} {self.period.name.lower()}s"
def __eq__(self, other: "Duration"):
# bug?
if self.period != other.period:
other = other.to(self.period)
return self == other.to(self.period)
@staticmethod
def parse(s: str) -> "Duration":
"""
Example of incoming measurement "1m43.116s"
Examples
----------
>>> d = Duration.parse("1m43.116s")
>>> d.rep
103.116
>>> d.period.name
'Second'
>>> d = Duration.parse("12h13m42s")
>>> d.rep
44022.0
>>> d.period.name
'Second'
>>> d = Duration.parse("12h13m")
>>> d.rep
733.0
>>> d.period.name
'Minute'
>>> d = Duration.parse("13m12h")
>>> d.rep
733.0
>>> d.period.name
'Minute'
>>> d = Duration.parse("12h")
>>> d.rep
12.0
>>> d.period.name
'Hour'
"""
durations: List[Duration] = []
number_chars = "0123456789."
start = 0
pos = start
end = len(s)
assert s[start] in number_chars
while pos != end:
curr_char = s[pos]
if curr_char not in number_chars:
rep = float(s[start:pos])
durations.append(Duration(rep=rep, period=suffix_to_period[curr_char]))
start = pos + 1
pos += 1
# sum all durations
start_period = durations[0].period
return reduce(operator.add, durations, Duration(rep=0, period=start_period))
period_to_suffix = {
Period.Second: "s",
Period.Minute: "m",
Period.Hour: "h",
Period.Day: "d",
Period.Week: "w",
Period.Year: "y",
}
suffix_to_period = {v: k for k, v in period_to_suffix.items()}
def valid_file(s: str) -> Path:
p = Path(s)
if not p.is_file():
raise FileNotFoundError(p)
return p
def main():
# Argument parsing ------------------------------------------------------------------------
parser = argparse.ArgumentParser(
"Compute relevant statistics (mean, standard deviation) given a list of time durations."
)
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument(
"-f",
"--file",
help="Read durations from a file - one line per duration.",
type=valid_file,
)
group.add_argument(
"-d", "--durations", help="Read durations from the given cli arguments.", nargs="+"
)
args = vars(parser.parse_args())
if args["file"]:
with args["file"].open("r") as f:
durations_s = [m.strip() for m in f.readlines()]
else:
durations_s = args["durations"].split()
durations = [Duration.parse(s) for s in durations_s]
# Calculate the statistics ----------------------------------------------------------------
vals = np.array([d.to(Period.Second).rep for d in durations])
print(f"Mean: {np.mean(vals):.3f}")
print(f"Median: {np.median(vals):.3f}")
print(f"St.Deviation: {np.std(vals):.3f}")
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment