Skip to content

Instantly share code, notes, and snippets.

@emasaka
Created December 28, 2016 02:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save emasaka/34a12b82af198a63e318b4aab308503c to your computer and use it in GitHub Desktop.
Save emasaka/34a12b82af198a63e318b4aab308503c to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# answer to http://ja.stackoverflow.com/questions/31353/python3-%e3%81%a7%e3%81%ae%e6%ad%a3%e8%a6%8f%e8%a1%a8%e7%8f%be%e3%81%b8%e3%81%ae%e5%a4%89%e6%8f%9b%e3%81%ab%e3%81%a4%e3%81%84%e3%81%a6
import re
def _parse_numrange(s):
m = re.search(r'\[(\d+)-(\d+)\]', s)
return m.group(1), m.group(2)
def _join_pos_digit(lst, p):
lst2 = [lst[0]] # 処理後のリスト
for i in range(1, len(lst)):
if len(lst[i]) < p or len(lst[i - 1]) < p:
# 比較する桁が存在しない
lst2.append(lst[i])
continue
# lst中の前の値と当該桁を比較
rst = lst[i][-(p - 1)] if p > 1 else []
rst1 = lst[i - 1][-(p - 1)] if p > 1 else []
if int(lst[i - 1][-p]) + 1 == int(lst[i][-p]) and rst == rst1:
# 当該桁が前の値+1で、当該桁以降が同じなら、lst2でまとめる
if isinstance(lst2[-1][-p], list):
# lst2で当該桁をまとめ済み
lst2[-1][-p].append(lst[i][-p])
else:
# lst2で当該桁はまだまとめていない
lst2[-1][-p] = [lst2[-1][-p], lst[i][-p]]
else:
# lst2に追加
lst2.append(lst[i])
return lst2
def _check_head(lst):
lst2 = [lst[0]] # 処理後のリスト
for i in range(1, len(lst)):
if len(lst[i]) == len(lst[i - 1]) + 1 and lst[i][1:] == lst[i - 1]:
lst2[i - 1] = [lst[i][0], '?', *lst[i - 1]]
else:
lst2.append(lst[i])
return lst2
def _lst2re(lst):
lst2 = []
for x in lst:
s = ''
for c in x:
if isinstance(c, list):
if len(c) > 2:
s = s + '[' + c[0] + '-' + c[-1] + ']'
else:
s = s + '[' + c[0] + c[1] + ']'
else:
s = s + c
lst2.append(s)
return '|'.join(lst2)
def numrange2re(s):
st, ed = _parse_numrange(s)
# (数値 → 文字列 → 文字のリスト) のリスト
lst = [list(str(x)) for x in range(int(st), int(ed) + 1)]
# 右端の桁から [] にまとめる処理
for p in range(1, len(ed) + 1):
lst = _join_pos_digit(lst, p)
# 左端の文字をチェックして x? にするところを探す
lst = _check_head(lst)
# リスト → 正規表現文字列
return _lst2re(lst)
print(numrange2re('[10-20]')) #=> '1[0-9]|20'
print(numrange2re('[0-100]')) #=> '[1-9]?[0-9]|100'
print(numrange2re('[23-94]')) #=> '2[3-9]|[3-8][0-9]|9[0-4]'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment