# ============================= # Regex Validity Patterns # ============================= # These are for all String, Misc, and Numeric Properties # The Catalog/Enum/Binary Properties are validated using the values in PropertyValueAliases.txt, # and thus have null regular expression validity patterns. # # This file also indicates where any property is multivalued. # ============================= # Format # Field 1: property name # Field 2: cardinality # SINGLE_VALUED: has only one value # MULTI_VALUED: has multiple values, arbitrary order # ORDERED: has multiple values, significant order # EXTENSIBLE: currently single valued, but could have multiple values in future versions # Field 3: regex validity pattern # If the line starts with $, then it is a variable definition of the form $variable = . # Each regular expression validity pattern has all variables replaced before use. # ============================= # Regex patterns from UAX #44 (fixed, plus missing) # Variables $positiveDecimal = [0-9]+\.[0-9]+ $decimal = -?$positiveDecimal $rational = $decimal(/$positiveDecimal)? $optionalDecimal = -?[0-9]+(\.[0-9]+)? $name = [a-zA-Z0-9]+([-_ ][a-zA-Z0-9]+)* $nameHack = [a-zA-Z0-9]+(( -|- |[-_ ])[a-zA-Z0-9]+)* $name1 = $name( \([A-Z]{2,3}\))? $codePoint = (10|[A-F0-9])?[A-F0-9]{4} $codePoints = $codePoint(\s$codePoint)* $codePoint0 = ($codePoints)? # Main data Bidi_Mirroring_Glyph ; SINGLE_VALUED ; $codePoint Simple_Lowercase_Mapping ; SINGLE_VALUED ; $codePoint Simple_Titlecase_Mapping ; SINGLE_VALUED ; $codePoint Simple_Uppercase_Mapping ; SINGLE_VALUED ; $codePoint NFKC_Casefold ; SINGLE_VALUED ; $codePoint0 Case_Folding ; SINGLE_VALUED ; $codePoints Decomposition_Mapping ; SINGLE_VALUED ; $codePoints FC_NFKC_Closure ; SINGLE_VALUED ; $codePoints Lowercase_Mapping ; SINGLE_VALUED ; $codePoints Simple_Case_Folding ; SINGLE_VALUED ; $codePoints Titlecase_Mapping ; SINGLE_VALUED ; $codePoints Uppercase_Mapping ; SINGLE_VALUED ; $codePoints ISO_Comment ; SINGLE_VALUED ; $name Unicode_1_Name ; SINGLE_VALUED ; $name1 Name ; SINGLE_VALUED ; $nameHack Named_Sequences ; SINGLE_VALUED ; $nameHack Named_Sequences_Prov ; SINGLE_VALUED ; $name Name_Alias ; MULTI_VALUED ; $nameHack Name_Alias_Prov ; MULTI_VALUED ; $nameHack Numeric_Value ; SINGLE_VALUED ; $rational Jamo_Short_Name ; SINGLE_VALUED ; GG?|N|DD?|R|M|BB?|SS?||JJ?|C|K|T|P|H|AE?|YAE?|EO?|YEO?|O|WAE?|OE|YO|U|WEO?|WI|YU|EU|YI|I|GS|NJ|NH|L|LG|LM|LB|LS|LT|LP|LH|BS|NG Emoji_DCM ; SINGLE_VALUED ; [0-9A-F]{4} Emoji_KDDI ; SINGLE_VALUED ; [0-9A-F]{4} Emoji_SB ; SINGLE_VALUED ; [0-9A-F]{4} CJK_Radical ; SINGLE_VALUED ; [1-9][0-9]?|1[0-9][0-9]|2(0[0-9]|1[0-4])|(90|120|147|149|154|159|162|16[7-9]|178|18[1-4]|187|19[5-7]|199|201|205|21[0-3])' # Regex patterns from UAX #38 kHanyuPinyin ; MULTI_VALUED ; ^(\d{5}\.\d{2}0,)*\d{5}\.\d{2}0:([a-z\x{300}-\x{302}\x{304}\x{308}\x{30C}]+,)*[a-z\x{300}-\x{302}\x{304}\x{308}\x{30C}]+$ kIICore ; SINGLE_VALUED ; ^2\.1$ kIBMJapan ; SINGLE_VALUED ; ^F[ABC][0-9A-F]{2}$ kIRG_GSource ; SINGLE_VALUED ; ^G(4K|BK|CH|CY|FZ|HC|HZ|((BK|CH|GH|HC|XC|ZH)-[0-9]{4}\.[0-9]{2})|HZ-[0-9]{5}\.[0-9]{2}|(KX-[01][0-9]{3}\.[0-9]{2})|((CYY|FZ|JZ|ZFY|ZJW)-[0-9]{5})|([0135789ES]-[0-9A-F]{4})|IDC-[0-9]{3})$ kIRG_HSource ; SINGLE_VALUED ; ^H-[0-9A-F]{4}$ kIRG_JSource ; SINGLE_VALUED ; ^J((([0134AK]|3A|ARIB)-[0-9A-F]{4,5})|(H-(((IB|JT|[0-9]{2})[0-9A-F]{4}S?))))$ kIRG_KPSource ; SINGLE_VALUED ; ^KP[01]-[0-9A-F]{4}$ kIRG_KSource ; SINGLE_VALUED ; ^K[0-57]-[0-9A-F]{4}$ kIRG_MSource ; SINGLE_VALUED ; ^MAC-[0-9]{5}$ kIRG_TSource ; SINGLE_VALUED ; ^T[1-7B-F]-[0-9A-F]{4}$ kIRG_USource ; SINGLE_VALUED ; ^U(TC|CI)-[0-9]{5}$ kCompatibilityVariant ; SINGLE_VALUED ; ^U\+2?[0-9A-F]{4}$ kSimplifiedVariant ; MULTI_VALUED ; ^U\+2?[0-9A-F]{4}$ kTraditionalVariant ; MULTI_VALUED ; ^U\+2?[0-9A-F]{4}$ kSemanticVariant ; MULTI_VALUED ; ^U\+2?[0-9A-F]{4}(