Changes to regexp_parser unicode property token names
old pattern
new pattern
old example
new example
*_any
*
number_any
number
*_cp
*_code_point
non_character_cp
noncharacter_code_point
age_*_*
age=*.*
age_6_0
age=6.0
ascii_hex
ascii_hex_digit
ascii_hex
ascii_hex_digit
block_in*
in_*
block_inadlam
in_adlam
ids_*_op
ids_*_operator
ids_binary_op
ids_binary_operator
letter_*
*_letter
letter_other
other_letter
mark_*
*_mark
mark_other
other_mark
number_*
*_number
number_other
other_number
punct_*
*_punctuation
punct_dash
dash_punctuation
script_*
*
script_thai
thai
separator_*
*_separator
separator_other
other_separator
symbol_*
*_symbol
symbol_other
other_symbol
*para*
*paragraph*
separator_para
paragraph_separator
*whitespace
*white_space
pattern_whitespace
pattern_white_space
def normalize_old_rp_property_token ( token )
token
. to_s
. sub ( /^age_(\d +)_(\d +)$/ , 'age=\1.\2' )
. sub ( /^ascii_hex$/ , 'ascii_hex_digit' )
. sub ( /^block_in/ , 'in_' )
. sub ( /^non_character/ , 'noncharacter' )
. sub ( /_cp$/ , '_code_point' )
. sub ( /^ids_(.+)_op$/ , 'ids_\1_operator' )
. sub ( /^other_grapheme_extended$/ , 'other_grapheme_extend' )
. sub ( /^punct_(.+)$/ , '\1_punctuation' )
. sub ( /^script_/ , '' )
. sub ( /^separator_para$/ , 'paragraph_separator' )
. sub ( /^(letter|number|mark|separator|symbol)_(.+)$/ , '\2_\1' )
. sub ( /^any_/ , '' )
. sub ( /whitespace$/ , 'white_space' )
. to_sym
end