Skip to content

Instantly share code, notes, and snippets.

@houseofjeff
Last active August 29, 2015 14:15
Show Gist options
  • Save houseofjeff/244f4220d97c5cbc5b5f to your computer and use it in GitHub Desktop.
Save houseofjeff/244f4220d97c5cbc5b5f to your computer and use it in GitHub Desktop.
An interesting look at how easy parsing & matching a cron-style schedule string can be.
from __future__ import division
import re
import random
from datetime import datetime, timedelta
def main():
timing_test(100000)
def simple_test():
"""
This test schedule will return true on every even-numbered minute. run it
a few times and see!
"""
schedule = cron_to_schedule("*/2 * * * *")
if check_schedule(schedule):
print "It's time!"
else:
print "Not yet"
def timing_test(count):
"""
A more interesting test, generating a random time in the next 10 days and
testing that against a schedule that fires every second minute of every
third hour. As you might expect, this should lead to a 1/6 hit rate.
"""
schedule = cron_to_schedule("*/2 */3 * * *")
t1 = datetime.now()
numpositive = 0
for _ in xrange(count):
secondsoffset = random.randint(0, 86400*10)
dt = datetime.now()+timedelta(seconds=secondsoffset)
if check_schedule(schedule, dt):
numpositive += 1
t2 = datetime.now()
diffseconds = (t2-t1).total_seconds()
print "{:.2f} s = {} calls per ms".format( diffseconds, int(round(count/(diffseconds*1000))) )
print "{}/{} matched = {:.4f}. Expected value: 0.1666".format(numpositive, count, numpositive/count)
def cron_to_schedule(cronstr):
"""
Returns a list of sets of numbers to match against (using test_schedule)
"""
limits = [ (0,59), # minutes
(0,23), # hours
(1,31), # days of month
(1,12), # months of year
(1,7) ] # days of week
# break out the individual segments and make sure the string isn't malformed
segments = cronstr.split()
if len(segments) > 5:
raise Exception("the cron string can only contain up to 5 segments")
# fill in any missing segments with asterisks
while len(segments) < 5:
segments.append("*")
# convert each segment to a set of numbers
return [_parse_cron_segment(seg, limits[i]) for i, seg in enumerate(segments)]
def check_schedule(sched, dt=datetime.now()):
"""
Return True if the given (or current) minute would match the schedule.
"""
# get the numbers for the specified datetime, the position in this list
# corresponds to the position we'll check in the schedule
tvals = [ dt.minute, dt.hour, dt.day, dt.month, dt.isoweekday() ]
# compare each tval with the corresponding list in the schedule
for time_value, match_list in zip(tvals, sched):
if time_value not in match_list:
return False
return True
range_regex = re.compile(r"^(\d+)-(\d+)$")
interval_regex = re.compile(r"^\*/(\d+)$")
def _parse_cron_segment(seg, (minval, maxval)):
"""
Interprets a cron entry segment (like '*', or '9-12', or '*/5') and
return a set of numbers that will match against it, respecting any range
limits.
For instance, "*", (0,59) -> all the numbers from 0-59
"*/10", (0,23) -> the numbers 10 & 20
"1,2,7,13" (1,12) -> the numbers 1,2,7
"""
# test preconditions
assert (minval < maxval) and (minval >= 0) and (maxval <= 60)
assert isinstance(seg, basestring)
# return everything through this function and it will enforce the
# min/max values for us
def limit(nums):
return set([ n for n in nums if n >= minval and n <= maxval ])
# if it's an asterisk it matches anything, return the widest range
# and let limit() handle it.
seg = seg.strip()
if seg == "*":
return limit(range(60))
# see if it's a range (e.g. '4-10')
range_matches = range_regex.findall(seg)
if len(range_matches) == 1:
# if so, get the min & max, generate the list, send it back
rangemin, rangemax = range_matches[0]
return limit(range(int(rangemin), int(rangemax)+1))
# see if it's an interval ('*/15')
interval_matches = interval_regex.findall(seg)
if len(interval_matches) == 1:
# if so, use the convenient interval parameter on range()
return limit(range(0, 60, int(interval_matches[0])))
# if it's an integer or a list of integers, return it/them
try:
return limit([ int(e) for e in seg.split(",") ])
except:
# nope, not integers
raise Exception("Malformed cron string - don't understand {}".format(seg))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment