Skip to content

Instantly share code, notes, and snippets.

@kgaughan
Created April 25, 2012 17:54
Show Gist options
  • Star 20 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save kgaughan/2491663 to your computer and use it in GitHub Desktop.
Save kgaughan/2491663 to your computer and use it in GitHub Desktop.
Parsing a comma-separated list of numbers and range specifications in Python
from itertools import chain
def parse_range(rng):
parts = rng.split('-')
if 1 > len(parts) > 2:
raise ValueError("Bad range: '%s'" % (rng,))
parts = [int(i) for i in parts]
start = parts[0]
end = start if len(parts) == 1 else parts[1]
if start > end:
end, start = start, end
return range(start, end + 1)
def parse_range_list(rngs):
return sorted(set(chain(*[parse_range(rng) for rng in rngs.split(',')])))
@sjaek
Copy link

sjaek commented Aug 7, 2019

I made a version that supports negative numbers and sorts and collapses the ranges and only then expands them so it's easy on memory for large ranges.

from itertools import chain

def parse_range_list(rl):
    def collapse_range(ranges):
        end = None
        for value in ranges:
            yield range(max(end, value.start), max(value.stop, end)) if end else value
            end = max(end, value.stop) if end else value.stop

    def split_range(value):
        value = value.split('-')
        for val, prev in zip(value, chain((None,), value)):
            if val != '':
                val = int(val)
                if prev == '':
                    val *= -1
                yield val

    def parse_range(r):
        parts = list(split_range(r.strip()))
        if len(parts) == 0:
            return range(0, 0)
        elif len(parts) > 2:
            raise ValueError("Invalid range: {}".format(r))
        return range(parts[0], parts[-1] + 1)

    ranges = sorted(set(map(parse_range, rl.split(","))), key=lambda x: (x.start, x.stop))
    return chain.from_iterable(collapse_range(ranges))

Output:

list(parse_range_list('-5--2,7-8,7-10,99-105,1,4-5,2-5,100,  ,, -100'))
# [-100, -5, -4, -3, -2, 1, 2, 3, 4, 5, 7, 8, 9, 10, 99, 100, 101, 102, 103, 104, 105]

@BEFH
Copy link

BEFH commented Sep 26, 2019

Here's a version that allows interspersed strings and multiple separators:

import re
from itertools import chain

def parse_range_list(rgstr):
    def parse_range(rg):
        if len(rg) == 0: return []
        parts = re.split( r'[:-]', rg)
        if len(parts) > 2:
           raise ValueError("Invalid range: {}".format(rg))
        try:
            return range(int(parts[0]), int(parts[-1])+1)
        except ValueError:
            if len(parts) == 1:
                return parts
            else:
                raise ValueError("Non-integer range: {}".format(rg))
    rg = map(parse_range, re.split("\s*[,;]\s*", rgstr))
    return list(set(chain.from_iterable(rg)))

Output:

parse_range_list('2-5,7,;16 ,15-17, 12 ; X')
# [2, 3, 4, 5, 7, 12, 15, 16, 17, 'X']

@TAbdiukov
Copy link

@sjaek

I made a version that supports negative numbers and sorts and collapses the ranges and only then expands them so it's easy on memory for large ranges.

danke!

@s3rgeym
Copy link

s3rgeym commented Oct 30, 2021

In [8]: def status_code(v):
   ...:     if '*' == v:
   ...:         return list(range(200, 300))
   ...:     rv = []
   ...:     sp = v.split(',')
   ...:     for x in sp:
   ...:         if '-' in x:
   ...:             start, stop = map(int, x.split('-'))
   ...:             rv += range(start, stop + 1)
   ...:         else:
   ...:             rv += [int(x)]
   ...:     return rv
...
In [12]: status_code('200-204,206,301,302,400-404,500')
Out[12]: [200, 201, 202, 203, 204, 206, 301, 302, 400, 401, 402, 403, 404, 500]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment