|
|
# This set of tests checks the API, internals, and non-Perl stuff for UTF # support, including Unicode properties. However, tests that give different # results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and # 12).
#newline_default lf any anycrlf
# PCRE2 and Perl disagree about the characteristics of certain Unicode # characters. For example, 061C was considered by Perl to be Arabic, though # it was not listed as such in the Unicode Scripts.txt file for Unicode 8. # However, it *is* in that file for Unicode 10, but when I came to re-check, # Perl had changed in the meantime, with 5.026 not recognizing it as Arabic.
# 2066-2069 are graphic and printable according to Perl, though they are # actually "isolate" control characters. That is why the following tests are # here rather than in test 4.
/^[\p{Arabic}]/utf \x{061c} 0: \x{61c}
/^[[:graph:]]+$/utf,ucp \= Expect no match \x{61c} No match \x{2066} No match \x{2067} No match \x{2068} No match \x{2069} No match
/^[[:print:]]+$/utf,ucp \= Expect no match \x{61c} No match \x{2066} No match \x{2067} No match \x{2068} No match \x{2069} No match
/^[[:^graph:]]+$/utf,ucp \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680} 0: \x{09}\x{0a}\x{1d} \x{85}\x{a0}\x{61c}\x{1680} \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069} 0: \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069}
/^[[:^print:]]+$/utf,ucp \x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067} 0: \x{09}\x{1d}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067} \x{2068}\x{2069} 0: \x{2068}\x{2069}
# Perl does not consider U+180e to be a space character. It is true that it # does not appear in the Unicode PropList.txt file as such, but in many other # sources it is listed as a space, and has been treated as such in PCRE for # a long time.
/^>[[:blank:]]*/utf,ucp >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028} 0: > \x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{09}
/^A\s+Z/utf,ucp A\x{85}\x{180e}\x{2005}Z 0: A\x{85}\x{180e}\x{2005}Z
/^A[\s]+Z/utf,ucp A\x{2005}Z 0: A\x{2005}Z A\x{85}\x{2005}Z 0: A\x{85}\x{2005}Z
/^[[:graph:]]+$/utf,ucp \= Expect no match \x{180e} No match
/^[[:print:]]+$/utf,ucp \x{180e} 0: \x{180e}
/^[[:^graph:]]+$/utf,ucp \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e} 0: \x{09}\x{0a}\x{1d} \x{85}\x{a0}\x{61c}\x{1680}\x{180e}
/^[[:^print:]]+$/utf,ucp \= Expect no match \x{180e} No match
# End of U+180E tests.
# ---------------------------------------------------------------------
/\x{110000}/IB,utf Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
/\o{4200000}/IB,utf Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large
/\x{ffffffff}/utf Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
/\o{37777777777}/utf Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
/\x{100000000}/utf Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
/\o{77777777777}/utf Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
/\x{d800}/utf Failed: error 173 at offset 7: disallowed Unicode code point (>= 0xd800 && <= 0xdfff)
/\o{154000}/utf Failed: error 173 at offset 9: disallowed Unicode code point (>= 0xd800 && <= 0xdfff)
/\x{dfff}/utf Failed: error 173 at offset 7: disallowed Unicode code point (>= 0xd800 && <= 0xdfff)
/\o{157777}/utf Failed: error 173 at offset 9: disallowed Unicode code point (>= 0xd800 && <= 0xdfff)
/\x{d7ff}/utf
/\o{153777}/utf
/\x{e000}/utf
/\o{170000}/utf
/^\x{100}a\x{1234}/utf \x{100}a\x{1234}bcd 0: \x{100}a\x{1234}
/\x{0041}\x{2262}\x{0391}\x{002e}/IB,utf ------------------------------------------------------------------ Bra A\x{2262}\x{391}. Ket End ------------------------------------------------------------------ Capture group count = 0 Options: utf First code unit = 'A' Last code unit = '.' Subject length lower bound = 4 \x{0041}\x{2262}\x{0391}\x{002e} 0: A\x{2262}\x{391}.
/.{3,5}X/IB,utf ------------------------------------------------------------------ Bra Any{3} Any{0,2} X Ket End ------------------------------------------------------------------ Capture group count = 0 Options: utf Last code unit = 'X' Subject length lower bound = 4 \x{212ab}\x{212ab}\x{212ab}\x{861}X 0: \x{212ab}\x{212ab}\x{212ab}\x{861}X
/.{3,5}?/IB,utf ------------------------------------------------------------------ Bra Any{3} Any{0,2}? Ket End ------------------------------------------------------------------ Capture group count = 0 Options: utf Subject length lower bound = 3 \x{212ab}\x{212ab}\x{212ab}\x{861} 0: \x{212ab}\x{212ab}\x{212ab}
/^[ab]/IB,utf ------------------------------------------------------------------ Bra ^ [ab] Ket End ------------------------------------------------------------------ Capture group count = 0 Compile options: utf Overall options: anchored utf Starting code units: a b Subject length lower bound = 1 bar 0: b \= Expect no match c No match \x{ff} No match \x{100} No match
/\x{100}*(\d+|"(?1)")/utf 1234 0: 1234 1: 1234 "1234" 0: "1234" 1: "1234" \x{100}1234 0: \x{100}1234 1: 1234 "\x{100}1234" 0: \x{100}1234 1: 1234 \x{100}\x{100}12ab 0: \x{100}\x{100}12 1: 12 \x{100}\x{100}"12" 0: \x{100}\x{100}"12" 1: "12" \= Expect no match \x{100}\x{100}abcd No match
/\x{100}*/IB,utf ------------------------------------------------------------------ Bra \x{100}*+ Ket End ------------------------------------------------------------------ Capture group count = 0 May match empty string Options: utf Subject length lower bound = 0
/a\x{100}*/IB,utf ------------------------------------------------------------------ Bra a \x{100}*+ Ket End ------------------------------------------------------------------ Capture group count = 0 Options: utf First code unit = 'a' Subject length lower bound = 1
/ab\x{100}*/IB,utf ------------------------------------------------------------------ Bra ab \x{100}*+ Ket End ------------------------------------------------------------------ Capture group count = 0 Options: utf First code unit = 'a' Last code unit = 'b' Subject length lower bound = 2
/[\x{200}-\x{100}]/utf Failed: error 108 at offset 15: range out of order in character class
/[Ā-Ą]/utf \x{100} 0: \x{100} \x{104} 0: \x{104} \= Expect no match \x{105} No match \x{ff} No match
/[\xFF]/IB ------------------------------------------------------------------ Bra \x{ff} Ket End ------------------------------------------------------------------ Capture group count = 0 First code unit = \xff Subject length lower bound = 1 >\xff< 0: \xff
/[^\xFF]/IB ------------------------------------------------------------------ Bra [^\x{ff}] Ket End ------------------------------------------------------------------ Capture group count = 0 Subject length lower bound = 1
/[Ä-Ü]/utf Ö # Matches without Study 0: \x{d6} \x{d6} 0: \x{d6}
/[Ä-Ü]/utf Ö <-- Same with Study 0: \x{d6} \x{d6} 0: \x{d6}
/[\x{c4}-\x{dc}]/utf Ö # Matches without Study 0: \x{d6} \x{d6} 0: \x{d6}
/[\x{c4}-\x{dc}]/utf Ö <-- Same with Study 0: \x{d6} \x{d6} 0: \x{d6}
/[^\x{100}]abc(xyz(?1))/IB,utf ------------------------------------------------------------------ Bra [^\x{100}] abc CBra 1 xyz Recurse Ket Ket End ------------------------------------------------------------------ Capture group count = 1 Options: utf Last code unit = 'z' Subject length lower bound = 7
/(\x{100}(b(?2)c))?/IB,utf ------------------------------------------------------------------ Bra Brazero CBra 1 \x{100} CBra 2 b Recurse c Ket Ket Ket End ------------------------------------------------------------------ Capture group count = 2 May match empty string Options: utf Subject length lower bound = 0
/(\x{100}(b(?2)c)){0,2}/IB,utf ------------------------------------------------------------------ Bra Brazero Bra CBra 1 \x{100} CBra 2 b Recurse c Ket Ket Brazero CBra 1 \x{100} CBra 2 b Recurse c Ket Ket Ket Ket End ------------------------------------------------------------------ Capture group count = 2 May match empty string Options: utf Subject length lower bound = 0
/(\x{100}(b(?1)c))?/IB,utf ------------------------------------------------------------------ Bra Brazero CBra 1 \x{100} CBra 2 b Recurse c Ket Ket Ket End ------------------------------------------------------------------ Capture group count = 2 May match empty string Options: utf Subject length lower bound = 0
/(\x{100}(b(?1)c)){0,2}/IB,utf ------------------------------------------------------------------ Bra Brazero Bra CBra 1 \x{100} CBra 2 b Recurse c Ket Ket Brazero CBra 1 \x{100} CBra 2 b Recurse c Ket Ket Ket Ket End ------------------------------------------------------------------ Capture group count = 2 May match empty string Options: utf Subject length lower bound = 0
/\W/utf A.B 0: . A\x{100}B 0: \x{100}
/\w/utf \x{100}X 0: X
# Use no_start_optimize because the first code unit is different in 8-bit from # the wider modes.
/^\ሴ/IB,utf,no_start_optimize ------------------------------------------------------------------ Bra ^ \x{1234} Ket End ------------------------------------------------------------------ Capture group count = 0 Compile options: no_start_optimize utf Overall options: anchored no_start_optimize utf
/()()()()()()()()()() ()()()()()()()()()() ()()()()()()()()()() ()()()()()()()()()() A (x) (?41) B/x,utf AxxB Matched, but too many substrings 0: AxxB 1: 2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14:
/^[\x{100}\E-\Q\E\x{150}]/B,utf ------------------------------------------------------------------ Bra ^ [\x{100}-\x{150}] Ket End ------------------------------------------------------------------
/^[\QĀ\E-\QŐ\E]/B,utf ------------------------------------------------------------------ Bra ^ [\x{100}-\x{150}] Ket End ------------------------------------------------------------------
/^abc./gmx,newline=any,utf abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK 0: abc1 0: abc2 0: abc3 0: abc4 0: abc5 0: abc6 0: abc7 0: abc8 0: abc9
/abc.$/gmx,newline=any,utf abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9 0: abc1 0: abc2 0: abc3 0: abc4 0: abc5 0: abc6 0: abc7 0: abc8 0: abc9
/^a\Rb/bsr=unicode,utf a\nb 0: a\x{0a}b a\rb 0: a\x{0d}b a\r\nb 0: a\x{0d}\x{0a}b a\x0bb 0: a\x{0b}b a\x0cb 0: a\x{0c}b a\x{85}b 0: a\x{85}b a\x{2028}b 0: a\x{2028}b a\x{2029}b 0: a\x{2029}b \= Expect no match a\n\rb No match
/^a\R*b/bsr=unicode,utf ab 0: ab a\nb 0: a\x{0a}b a\rb 0: a\x{0d}b a\r\nb 0: a\x{0d}\x{0a}b a\x0bb 0: a\x{0b}b a\x0c\x{2028}\x{2029}b 0: a\x{0c}\x{2028}\x{2029}b a\x{85}b 0: a\x{85}b a\n\rb 0: a\x{0a}\x{0d}b a\n\r\x{85}\x0cb 0: a\x{0a}\x{0d}\x{85}\x{0c}b
/^a\R+b/bsr=unicode,utf a\nb 0: a\x{0a}b a\rb 0: a\x{0d}b a\r\nb 0: a\x{0d}\x{0a}b a\x0bb 0: a\x{0b}b a\x0c\x{2028}\x{2029}b 0: a\x{0c}\x{2028}\x{2029}b a\x{85}b 0: a\x{85}b a\n\rb 0: a\x{0a}\x{0d}b a\n\r\x{85}\x0cb 0: a\x{0a}\x{0d}\x{85}\x{0c}b \= Expect no match ab No match
/^a\R{1,3}b/bsr=unicode,utf a\nb 0: a\x{0a}b a\n\rb 0: a\x{0a}\x{0d}b a\n\r\x{85}b 0: a\x{0a}\x{0d}\x{85}b a\r\n\r\nb 0: a\x{0d}\x{0a}\x{0d}\x{0a}b a\r\n\r\n\r\nb 0: a\x{0d}\x{0a}\x{0d}\x{0a}\x{0d}\x{0a}b a\n\r\n\rb 0: a\x{0a}\x{0d}\x{0a}\x{0d}b a\n\n\r\nb 0: a\x{0a}\x{0a}\x{0d}\x{0a}b \= Expect no match a\n\n\n\rb No match a\r No match
/\H\h\V\v/utf X X\x0a 0: X X\x{0a} X\x09X\x0b 0: X\x{09}X\x{0b} \= Expect no match \x{a0} X\x0a No match
/\H*\h+\V?\v{3,4}/utf \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a 0: \x{09} \x{a0}X\x{0a}\x{0b}\x{0c}\x{0d} \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a 0: \x{09} \x{a0}\x{0a}\x{0b}\x{0c}\x{0d} \x09\x20\x{a0}\x0a\x0b\x0c 0: \x{09} \x{a0}\x{0a}\x{0b}\x{0c} \= Expect no match \x09\x20\x{a0}\x0a\x0b No match
/\H\h\V\v/utf \x{3001}\x{3000}\x{2030}\x{2028} 0: \x{3001}\x{3000}\x{2030}\x{2028} X\x{180e}X\x{85} 0: X\x{180e}X\x{85} \= Expect no match \x{2009} X\x0a No match
/\H*\h+\V?\v{3,4}/utf \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a 0: \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x{0c}\x{0d} \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a 0: \x{09}\x{205f}\x{a0}\x{0a}\x{2029}\x{0c}\x{2028} \x09\x20\x{202f}\x0a\x0b\x0c 0: \x{09} \x{202f}\x{0a}\x{0b}\x{0c} \= Expect no match \x09\x{200a}\x{a0}\x{2028}\x0b No match
/[\h]/B,utf ------------------------------------------------------------------ Bra [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}] Ket End ------------------------------------------------------------------ >\x{1680} 0: \x{1680}
/[\h]{3,}/B,utf ------------------------------------------------------------------ Bra [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]{3,}+ Ket End ------------------------------------------------------------------ >\x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000}< 0: \x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000}
/[\v]/B,utf ------------------------------------------------------------------ Bra [\x0a-\x0d\x85\x{2028}-\x{2029}] Ket End ------------------------------------------------------------------
/[\H]/B,utf ------------------------------------------------------------------ Bra [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}] Ket End ------------------------------------------------------------------
/[\V]/B,utf ------------------------------------------------------------------ Bra [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}] Ket End ------------------------------------------------------------------
/.*$/newline=any,utf \x{1ec5} 0: \x{1ec5}
/a\Rb/I,bsr=anycrlf,utf Capture group count = 0 Options: utf \R matches CR, LF, or CRLF First code unit = 'a' Last code unit = 'b' Subject length lower bound = 3 a\rb 0: a\x{0d}b a\nb 0: a\x{0a}b a\r\nb 0: a\x{0d}\x{0a}b \= Expect no match a\x{85}b No match a\x0bb No match
/a\Rb/I,bsr=unicode,utf Capture group count = 0 Options: utf \R matches any Unicode newline First code unit = 'a' Last code unit = 'b' Subject length lower bound = 3 a\rb 0: a\x{0d}b a\nb 0: a\x{0a}b a\r\nb 0: a\x{0d}\x{0a}b a\x{85}b 0: a\x{85}b a\x0bb 0: a\x{0b}b
/a\R?b/I,bsr=anycrlf,utf Capture group count = 0 Options: utf \R matches CR, LF, or CRLF First code unit = 'a' Last code unit = 'b' Subject length lower bound = 2 a\rb 0: a\x{0d}b a\nb 0: a\x{0a}b a\r\nb 0: a\x{0d}\x{0a}b \= Expect no match a\x{85}b No match a\x0bb No match
/a\R?b/I,bsr=unicode,utf Capture group count = 0 Options: utf \R matches any Unicode newline First code unit = 'a' Last code unit = 'b' Subject length lower bound = 2 a\rb 0: a\x{0d}b a\nb 0: a\x{0a}b a\r\nb 0: a\x{0d}\x{0a}b a\x{85}b 0: a\x{85}b a\x0bb 0: a\x{0b}b
/.*a.*=.b.*/utf,newline=any QQQ\x{2029}ABCaXYZ=!bPQR 0: ABCaXYZ=!bPQR \= Expect no match a\x{2029}b No match \x61\xe2\x80\xa9\x62 No match
/[[:a\x{100}b:]]/utf Failed: error 130 at offset 3: unknown POSIX class name
/a[^]b/utf,allow_empty_class,match_unset_backref a\x{1234}b 0: a\x{1234}b a\nb 0: a\x{0a}b \= Expect no match ab No match
/a[^]+b/utf,allow_empty_class,match_unset_backref aXb 0: aXb a\nX\nX\x{1234}b 0: a\x{0a}X\x{0a}X\x{1234}b \= Expect no match ab No match
/(\x{de})\1/ \x{de}\x{de} 0: \xde\xde 1: \xde
/X/newline=any,utf,firstline A\x{1ec5}ABCXYZ 0: X
/Xa{2,4}b/utf X\=ps Partial match: X Xa\=ps Partial match: Xa Xaa\=ps Partial match: Xaa Xaaa\=ps Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa
/Xa{2,4}?b/utf X\=ps Partial match: X Xa\=ps Partial match: Xa Xaa\=ps Partial match: Xaa Xaaa\=ps Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa
/Xa{2,4}+b/utf X\=ps Partial match: X Xa\=ps Partial match: Xa Xaa\=ps Partial match: Xaa Xaaa\=ps Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa
/X\x{123}{2,4}b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123}
/X\x{123}{2,4}?b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123}
/X\x{123}{2,4}+b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123}
/X\x{123}{2,4}b/utf \= Expect no match Xx\=ps No match X\x{123}x\=ps No match X\x{123}\x{123}x\=ps No match X\x{123}\x{123}\x{123}x\=ps No match X\x{123}\x{123}\x{123}\x{123}x\=ps No match
/X\x{123}{2,4}?b/utf \= Expect no match Xx\=ps No match X\x{123}x\=ps No match X\x{123}\x{123}x\=ps No match X\x{123}\x{123}\x{123}x\=ps No match X\x{123}\x{123}\x{123}\x{123}x\=ps No match
/X\x{123}{2,4}+b/utf \= Expect no match Xx\=ps No match X\x{123}x\=ps No match X\x{123}\x{123}x\=ps No match X\x{123}\x{123}\x{123}x\=ps No match X\x{123}\x{123}\x{123}\x{123}x\=ps No match
/X\d{2,4}b/utf X\=ps Partial match: X X3\=ps Partial match: X3 X33\=ps Partial match: X33 X333\=ps Partial match: X333 X3333\=ps Partial match: X3333
/X\d{2,4}?b/utf X\=ps Partial match: X X3\=ps Partial match: X3 X33\=ps Partial match: X33 X333\=ps Partial match: X333 X3333\=ps Partial match: X3333
/X\d{2,4}+b/utf X\=ps Partial match: X X3\=ps Partial match: X3 X33\=ps Partial match: X33 X333\=ps Partial match: X333 X3333\=ps Partial match: X3333
/X\D{2,4}b/utf X\=ps Partial match: X Xa\=ps Partial match: Xa Xaa\=ps Partial match: Xaa Xaaa\=ps Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa
/X\D{2,4}?b/utf X\=ps Partial match: X Xa\=ps Partial match: Xa Xaa\=ps Partial match: Xaa Xaaa\=ps Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa
/X\D{2,4}+b/utf X\=ps Partial match: X Xa\=ps Partial match: Xa Xaa\=ps Partial match: Xaa Xaaa\=ps Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa
/X\D{2,4}b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123}
/X\D{2,4}?b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123}
/X\D{2,4}+b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123}
/X[abc]{2,4}b/utf X\=ps Partial match: X Xa\=ps Partial match: Xa Xaa\=ps Partial match: Xaa Xaaa\=ps Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa
/X[abc]{2,4}?b/utf X\=ps Partial match: X Xa\=ps Partial match: Xa Xaa\=ps Partial match: Xaa Xaaa\=ps Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa
/X[abc]{2,4}+b/utf X\=ps Partial match: X Xa\=ps Partial match: Xa Xaa\=ps Partial match: Xaa Xaaa\=ps Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa
/X[abc\x{123}]{2,4}b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123}
/X[abc\x{123}]{2,4}?b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123}
/X[abc\x{123}]{2,4}+b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123}
/X[^a]{2,4}b/utf X\=ps Partial match: X Xz\=ps Partial match: Xz Xzz\=ps Partial match: Xzz Xzzz\=ps Partial match: Xzzz Xzzzz\=ps Partial match: Xzzzz
/X[^a]{2,4}?b/utf X\=ps Partial match: X Xz\=ps Partial match: Xz Xzz\=ps Partial match: Xzz Xzzz\=ps Partial match: Xzzz Xzzzz\=ps Partial match: Xzzzz
/X[^a]{2,4}+b/utf X\=ps Partial match: X Xz\=ps Partial match: Xz Xzz\=ps Partial match: Xzz Xzzz\=ps Partial match: Xzzz Xzzzz\=ps Partial match: Xzzzz
/X[^a]{2,4}b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123}
/X[^a]{2,4}?b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123}
/X[^a]{2,4}+b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123}
/(Y)X\1{2,4}b/utf YX\=ps Partial match: YX YXY\=ps Partial match: YXY YXYY\=ps Partial match: YXYY YXYYY\=ps Partial match: YXYYY YXYYYY\=ps Partial match: YXYYYY
/(Y)X\1{2,4}?b/utf YX\=ps Partial match: YX YXY\=ps Partial match: YXY YXYY\=ps Partial match: YXYY YXYYY\=ps Partial match: YXYYY YXYYYY\=ps Partial match: YXYYYY
/(Y)X\1{2,4}+b/utf YX\=ps Partial match: YX YXY\=ps Partial match: YXY YXYY\=ps Partial match: YXYY YXYYY\=ps Partial match: YXYYY YXYYYY\=ps Partial match: YXYYYY
/(\x{123})X\1{2,4}b/utf \x{123}X\=ps Partial match: \x{123}X \x{123}X\x{123}\=ps Partial match: \x{123}X\x{123} \x{123}X\x{123}\x{123}\=ps Partial match: \x{123}X\x{123}\x{123} \x{123}X\x{123}\x{123}\x{123}\=ps Partial match: \x{123}X\x{123}\x{123}\x{123} \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: \x{123}X\x{123}\x{123}\x{123}\x{123}
/(\x{123})X\1{2,4}?b/utf \x{123}X\=ps Partial match: \x{123}X \x{123}X\x{123}\=ps Partial match: \x{123}X\x{123} \x{123}X\x{123}\x{123}\=ps Partial match: \x{123}X\x{123}\x{123} \x{123}X\x{123}\x{123}\x{123}\=ps Partial match: \x{123}X\x{123}\x{123}\x{123} \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: \x{123}X\x{123}\x{123}\x{123}\x{123}
/(\x{123})X\1{2,4}+b/utf \x{123}X\=ps Partial match: \x{123}X \x{123}X\x{123}\=ps Partial match: \x{123}X\x{123} \x{123}X\x{123}\x{123}\=ps Partial match: \x{123}X\x{123}\x{123} \x{123}X\x{123}\x{123}\x{123}\=ps Partial match: \x{123}X\x{123}\x{123}\x{123} \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: \x{123}X\x{123}\x{123}\x{123}\x{123}
/\bthe cat\b/utf the cat\=ps 0: the cat the cat\=ph Partial match: the cat
/abcd*/utf xxxxabcd\=ps 0: abcd xxxxabcd\=ph Partial match: abcd
/abcd*/i,utf xxxxabcd\=ps 0: abcd xxxxabcd\=ph Partial match: abcd XXXXABCD\=ps 0: ABCD XXXXABCD\=ph Partial match: ABCD
/abc\d*/utf xxxxabc1\=ps 0: abc1 xxxxabc1\=ph Partial match: abc1
/(a)bc\1*/utf xxxxabca\=ps 0: abca 1: a xxxxabca\=ph Partial match: abca
/abc[de]*/utf xxxxabcde\=ps 0: abcde xxxxabcde\=ph Partial match: abcde
/X\W{3}X/utf X\=ps Partial match: X
/\sxxx\s/utf,tables=2 AB\x{85}xxx\x{a0}XYZ 0: \x{85}xxx\x{a0} AB\x{a0}xxx\x{85}XYZ 0: \x{a0}xxx\x{85}
/\S \S/utf,tables=2 \x{a2} \x{84} 0: \x{a2} \x{84}
'A#хц'Bx,newline=any,utf ------------------------------------------------------------------ Bra A Ket End ------------------------------------------------------------------
'A#хц PQ'Bx,newline=any,utf ------------------------------------------------------------------ Bra APQ Ket End ------------------------------------------------------------------
/a+#хaa z#XX?/Bx,newline=any,utf ------------------------------------------------------------------ Bra a++ z Ket End ------------------------------------------------------------------
/a+#хaa z#х?/Bx,newline=any,utf ------------------------------------------------------------------ Bra a++ z Ket End ------------------------------------------------------------------
/\g{A}xxx#bXX(?'A'123)
(?'A'456)/Bx,newline=any,utf ------------------------------------------------------------------ Bra \1 xxx CBra 1 456 Ket Ket End ------------------------------------------------------------------
/\g{A}xxx#bх(?'A'123)
(?'A'456)/Bx,newline=any,utf ------------------------------------------------------------------ Bra \1 xxx CBra 1 456 Ket Ket End ------------------------------------------------------------------
/^\cģ/utf Failed: error 168 at offset 3: \c must be followed by a printable ASCII character
/(\R*)(.)/s,utf \r\n 0: \x{0d} 1: 2: \x{0d} \r\r\n\n\r 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d} 1: \x{0d}\x{0d}\x{0a}\x{0a} 2: \x{0d} \r\r\n\n\r\n 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d} 1: \x{0d}\x{0d}\x{0a}\x{0a} 2: \x{0d}
/(\R)*(.)/s,utf \r\n 0: \x{0d} 1: <unset> 2: \x{0d} \r\r\n\n\r 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d} 1: \x{0a} 2: \x{0d} \r\r\n\n\r\n 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d} 1: \x{0a} 2: \x{0d}
/[^\x{1234}]+/Ii,utf Capture group count = 0 Options: caseless utf Subject length lower bound = 1
/[^\x{1234}]+?/Ii,utf Capture group count = 0 Options: caseless utf Subject length lower bound = 1
/[^\x{1234}]++/Ii,utf Capture group count = 0 Options: caseless utf Subject length lower bound = 1
/[^\x{1234}]{2}/Ii,utf Capture group count = 0 Options: caseless utf Subject length lower bound = 2
/f.*/ for\=ph Partial match: for
/f.*/s for\=ph Partial match: for
/f.*/utf for\=ph Partial match: for
/f.*/s,utf for\=ph Partial match: for
/\x{d7ff}\x{e000}/utf
/\x{d800}/utf Failed: error 173 at offset 7: disallowed Unicode code point (>= 0xd800 && <= 0xdfff)
/\x{dfff}/utf Failed: error 173 at offset 7: disallowed Unicode code point (>= 0xd800 && <= 0xdfff)
/\h+/utf \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} 0: \x{1680}\x{2000}\x{202f}\x{3000} \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000} 0: \x{200a}\x{a0}\x{2000}
/[\h\x{e000}]+/B,utf ------------------------------------------------------------------ Bra [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{e000}]++ Ket End ------------------------------------------------------------------ \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} 0: \x{1680}\x{2000}\x{202f}\x{3000} \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000} 0: \x{200a}\x{a0}\x{2000}
/\H+/utf \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} 0: \x{167f}\x{1681}\x{180d}\x{180f} \x{2000}\x{200a}\x{1fff}\x{200b} 0: \x{1fff}\x{200b} \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} 0: \x{202e}\x{2030}\x{205e}\x{2060} \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001} 0: \x{9f}\x{a1}\x{2fff}\x{3001}
/[\H\x{d7ff}]+/B,utf ------------------------------------------------------------------ Bra [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}\x{d7ff}]++ Ket End ------------------------------------------------------------------ \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} 0: \x{167f}\x{1681}\x{180d}\x{180f} \x{2000}\x{200a}\x{1fff}\x{200b} 0: \x{1fff}\x{200b} \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} 0: \x{202e}\x{2030}\x{205e}\x{2060} \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001} 0: \x{9f}\x{a1}\x{2fff}\x{3001}
/\v+/utf \x{2027}\x{2030}\x{2028}\x{2029} 0: \x{2028}\x{2029} \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d 0: \x{85}\x{0a}\x{0b}\x{0c}\x{0d}
/[\v\x{e000}]+/B,utf ------------------------------------------------------------------ Bra [\x0a-\x0d\x85\x{2028}-\x{2029}\x{e000}]++ Ket End ------------------------------------------------------------------ \x{2027}\x{2030}\x{2028}\x{2029} 0: \x{2028}\x{2029} \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d 0: \x{85}\x{0a}\x{0b}\x{0c}\x{0d}
/\V+/utf \x{2028}\x{2029}\x{2027}\x{2030} 0: \x{2027}\x{2030} \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86} 0: \x{09}\x{0e}\x{84}\x{86}
/[\V\x{d7ff}]+/B,utf ------------------------------------------------------------------ Bra [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}\x{d7ff}]++ Ket End ------------------------------------------------------------------ \x{2028}\x{2029}\x{2027}\x{2030} 0: \x{2027}\x{2030} \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86} 0: \x{09}\x{0e}\x{84}\x{86}
/\R+/bsr=unicode,utf \x{2027}\x{2030}\x{2028}\x{2029} 0: \x{2028}\x{2029} \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d 0: \x{85}\x{0a}\x{0b}\x{0c}\x{0d}
/(..)\1/utf ab\=ps Partial match: ab aba\=ps Partial match: aba abab\=ps 0: abab 1: ab
/(..)\1/i,utf ab\=ps Partial match: ab abA\=ps Partial match: abA aBAb\=ps 0: aBAb 1: aB
/(..)\1{2,}/utf ab\=ps Partial match: ab aba\=ps Partial match: aba abab\=ps Partial match: abab ababa\=ps Partial match: ababa ababab\=ps 0: ababab 1: ab ababab\=ph Partial match: ababab abababa\=ps 0: ababab 1: ab abababa\=ph Partial match: abababa
/(..)\1{2,}/i,utf ab\=ps Partial match: ab aBa\=ps Partial match: aBa aBAb\=ps Partial match: aBAb AbaBA\=ps Partial match: AbaBA abABAb\=ps 0: abABAb 1: ab aBAbaB\=ph Partial match: aBAbaB abABabA\=ps 0: abABab 1: ab abaBABa\=ph Partial match: abaBABa
/(..)\1{2,}?x/i,utf ab\=ps Partial match: ab abA\=ps Partial match: abA aBAb\=ps Partial match: aBAb abaBA\=ps Partial match: abaBA abAbaB\=ps Partial match: abAbaB abaBabA\=ps Partial match: abaBabA abAbABaBx\=ps 0: abAbABaBx 1: ab
/./utf,newline=crlf \r\=ps 0: \x{0d} \r\=ph Partial match: \x{0d}
/.{2,3}/utf,newline=crlf \r\=ps Partial match: \x{0d} \r\=ph Partial match: \x{0d} \r\r\=ps 0: \x{0d}\x{0d} \r\r\=ph Partial match: \x{0d}\x{0d} \r\r\r\=ps 0: \x{0d}\x{0d}\x{0d} \r\r\r\=ph Partial match: \x{0d}\x{0d}\x{0d}
/.{2,3}?/utf,newline=crlf \r\=ps Partial match: \x{0d} \r\=ph Partial match: \x{0d} \r\r\=ps 0: \x{0d}\x{0d} \r\r\=ph Partial match: \x{0d}\x{0d} \r\r\r\=ps 0: \x{0d}\x{0d} \r\r\r\=ph 0: \x{0d}\x{0d}
/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/B,utf ------------------------------------------------------------------ Bra [^\x{100}] [^\x{1234}] [^\x{ffff}] [^\x{10000}] [^\x{10ffff}] Ket End ------------------------------------------------------------------
/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/Bi,utf ------------------------------------------------------------------ Bra /i [^\x{100}] /i [^\x{1234}] /i [^\x{ffff}] /i [^\x{10000}] /i [^\x{10ffff}] Ket End ------------------------------------------------------------------
/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/B,utf ------------------------------------------------------------------ Bra [^\x{100}]* [^\x{10000}]+ [^\x{10ffff}]?? [^\x{8000}]{4} [^\x{8000}]* [^\x{7fff}]{2} [^\x{7fff}]{0,7}? [^\x{fffff}]{5} [^\x{fffff}]?+ Ket End ------------------------------------------------------------------
/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/Bi,utf ------------------------------------------------------------------ Bra /i [^\x{100}]* /i [^\x{10000}]+ /i [^\x{10ffff}]?? /i [^\x{8000}]{4} /i [^\x{8000}]* /i [^\x{7fff}]{2} /i [^\x{7fff}]{0,7}? /i [^\x{fffff}]{5} /i [^\x{fffff}]?+ Ket End ------------------------------------------------------------------
/(?<=\x{1234}\x{1234})\bxy/I,utf Capture group count = 0 Max lookbehind = 2 Options: utf First code unit = 'x' Last code unit = 'y' Subject length lower bound = 2
/(?<!^)ETA/utf \= Expect no match ETA No match
/\u0100/B,utf,alt_bsux,allow_empty_class,match_unset_backref ------------------------------------------------------------------ Bra \x{100} Ket End ------------------------------------------------------------------
/[\u0100-\u0200]/B,utf,alt_bsux,allow_empty_class,match_unset_backref ------------------------------------------------------------------ Bra [\x{100}-\x{200}] Ket End ------------------------------------------------------------------
/\ud800/utf,alt_bsux,allow_empty_class,match_unset_backref Failed: error 173 at offset 6: disallowed Unicode code point (>= 0xd800 && <= 0xdfff)
/^\u{0000000000010ffff}/utf,extra_alt_bsux \x{10ffff} 0: \x{10ffff}
/\u{ 1bb1}/utf,extra_alt_bsux u{ 1bb1} 0: u{ 1bb1} \= Expect no match \x{1bb1} No match
/\u/utf,alt_bsux \\u 0: u
/^a+[a\x{200}]/B,utf ------------------------------------------------------------------ Bra ^ a+ [a\x{200}] Ket End ------------------------------------------------------------------ aa 0: aa
/[b-d\x{200}-\x{250}]*[ae-h]?#[\x{200}-\x{250}]{0,8}[\x00-\xff]*#[\x{200}-\x{250}]+[a-z]/B,utf ------------------------------------------------------------------ Bra [b-d\x{200}-\x{250}]*+ [ae-h]?+ # [\x{200}-\x{250}]{0,8}+ [\x00-\xff]* # [\x{200}-\x{250}]++ [a-z] Ket End ------------------------------------------------------------------
/[\p{L}]/IB ------------------------------------------------------------------ Bra [\p{L}] Ket End ------------------------------------------------------------------ Capture group count = 0 Subject length lower bound = 1
/[\p{^L}]/IB ------------------------------------------------------------------ Bra [\P{L}] Ket End ------------------------------------------------------------------ Capture group count = 0 Subject length lower bound = 1
/[\P{L}]/IB ------------------------------------------------------------------ Bra [\P{L}] Ket End ------------------------------------------------------------------ Capture group count = 0 Subject length lower bound = 1
/[\P{^L}]/IB ------------------------------------------------------------------ Bra [\p{L}] Ket End ------------------------------------------------------------------ Capture group count = 0 Subject length lower bound = 1
/[abc\p{L}\x{0660}]/IB,utf ------------------------------------------------------------------ Bra [a-c\p{L}\x{660}] Ket End ------------------------------------------------------------------ Capture group count = 0 Options: utf Subject length lower bound = 1
/[\p{Nd}]/IB,utf ------------------------------------------------------------------ Bra [\p{Nd}] Ket End ------------------------------------------------------------------ Capture group count = 0 Options: utf Subject length lower bound = 1 1234 0: 1
/[\p{Nd}+-]+/IB,utf ------------------------------------------------------------------ Bra [+\-\p{Nd}]++ Ket End ------------------------------------------------------------------ Capture group count = 0 Options: utf Subject length lower bound = 1 1234 0: 1234 12-34 0: 12-34 12+\x{661}-34 0: 12+\x{661}-34 \= Expect no match abcd No match
/(?:[\PPa*]*){8,}/
/[\P{Any}]/B ------------------------------------------------------------------ Bra [\P{Any}] Ket End ------------------------------------------------------------------
/[\P{Any}\E]/B ------------------------------------------------------------------ Bra [\P{Any}] Ket End ------------------------------------------------------------------
/(\P{Yi}+\277)/
/(\P{Yi}+\277)?/
/(?<=\P{Yi}{3}A)X/
/\p{Yi}+(\P{Yi}+)(?1)/
/(\P{Yi}{2}\277)?/
/[\P{Yi}A]/
/[\P{Yi}\P{Yi}\P{Yi}A]/
/[^\P{Yi}A]/
/[^\P{Yi}\P{Yi}\P{Yi}A]/
/(\P{Yi}*\277)*/
/(\P{Yi}*?\277)*/
/(\p{Yi}*+\277)*/
/(\P{Yi}?\277)*/
/(\P{Yi}??\277)*/
/(\p{Yi}?+\277)*/
/(\P{Yi}{0,3}\277)*/
/(\P{Yi}{0,3}?\277)*/
/(\p{Yi}{0,3}+\277)*/
/\p{Zl}{2,3}+/B,utf ------------------------------------------------------------------ Bra prop Zl {2} prop Zl ?+ Ket End ------------------------------------------------------------------
0: \x{2028}\x{2028} \x{2028}\x{2028}\x{2028} 0: \x{2028}\x{2028}\x{2028}
/\p{Zl}/B,utf ------------------------------------------------------------------ Bra prop Zl Ket End ------------------------------------------------------------------
/\p{Lu}{3}+/B,utf ------------------------------------------------------------------ Bra prop Lu {3} Ket End ------------------------------------------------------------------
/\pL{2}+/B,utf ------------------------------------------------------------------ Bra prop L {2} Ket End ------------------------------------------------------------------
/\p{Cc}{2}+/B,utf ------------------------------------------------------------------ Bra prop Cc {2} Ket End ------------------------------------------------------------------
/^\p{Cf}/utf \x{180e} 0: \x{180e} \x{061c} 0: \x{61c} \x{2066} 0: \x{2066} \x{2067} 0: \x{2067} \x{2068} 0: \x{2068} \x{2069} 0: \x{2069}
/^\p{Cs}/utf \x{dfff}\=no_utf_check 0: \x{dfff} \= Expect no match \x{09f} No match
/^\p{Mn}/utf \x{1a1b} 0: \x{1a1b}
/^\p{Pe}/utf \x{2309} 0: \x{2309} \x{230b} 0: \x{230b}
/^\p{Ps}/utf \x{2308} 0: \x{2308} \x{230a} 0: \x{230a}
/^\p{Sc}+/utf $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6} 0: $\x{a2}\x{a3}\x{a4}\x{a5} \x{9f2} 0: \x{9f2} \= Expect no match X No match \x{2c2} No match
/^\p{Zs}/utf \ \ 0: \x{a0} 0: \x{a0} \x{1680} 0: \x{1680} \x{2000} 0: \x{2000} \x{2001} 0: \x{2001} \= Expect no match \x{2028} No match \x{200d} No match
# These are here because Perl has problems with the negative versions of the # properties and has changed how it behaves for caseless matching.
/\p{^Lu}/i,utf 1234 0: 1 \= Expect no match ABC No match
/\P{Lu}/i,utf 1234 0: 1 \= Expect no match ABC No match
/\p{Ll}/i,utf a 0: a Az 0: z \= Expect no match ABC No match
/\p{Lu}/i,utf A 0: A a\x{10a0}B 0: \x{10a0} \= Expect no match a No match \x{1d00} No match
/\p{Lu}/i,utf A 0: A aZ 0: Z \= Expect no match abc No match
/[\x{c0}\x{391}]/i,utf \x{c0} 0: \x{c0} \x{e0} 0: \x{e0}
# The next two are special cases where the lengths of the different cases of # the same character differ. The first went wrong with heap frame storage; the # second was broken in all cases.
/^\x{023a}+?(\x{0130}+)/i,utf \x{023a}\x{2c65}\x{0130} 0: \x{23a}\x{2c65}\x{130} 1: \x{130}
/^\x{023a}+([^X])/i,utf \x{023a}\x{2c65}X 0: \x{23a}\x{2c65} 1: \x{2c65}
/\x{c0}+\x{116}+/i,utf \x{c0}\x{e0}\x{116}\x{117} 0: \x{c0}\x{e0}\x{116}\x{117}
/[\x{c0}\x{116}]+/i,utf \x{c0}\x{e0}\x{116}\x{117} 0: \x{c0}\x{e0}\x{116}\x{117}
/(\x{de})\1/i,utf \x{de}\x{de} 0: \x{de}\x{de} 1: \x{de} \x{de}\x{fe} 0: \x{de}\x{fe} 1: \x{de} \x{fe}\x{fe} 0: \x{fe}\x{fe} 1: \x{fe} \x{fe}\x{de} 0: \x{fe}\x{de} 1: \x{fe}
/^\x{c0}$/i,utf \x{c0} 0: \x{c0} \x{e0} 0: \x{e0}
/^\x{e0}$/i,utf \x{c0} 0: \x{c0} \x{e0} 0: \x{e0}
# The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE # will match it only with UCP support, because without that it has no notion # of case for anything other than the ASCII letters.
/((?i)[\x{c0}])/utf \x{c0} 0: \x{c0} 1: \x{c0} \x{e0} 0: \x{e0} 1: \x{e0}
/(?i:[\x{c0}])/utf \x{c0} 0: \x{c0} \x{e0} 0: \x{e0}
# These are PCRE's extra properties to help with Unicodizing \d etc.
/^\p{Xan}/utf ABCD 0: A 1234 0: 1 \x{6ca} 0: \x{6ca} \x{a6c} 0: \x{a6c} \x{10a7} 0: \x{10a7} \= Expect no match _ABC No match
/^\p{Xan}+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 0: ABCD1234\x{6ca}\x{a6c}\x{10a7} \= Expect no match _ABC No match
/^\p{Xan}+?/utf \x{6ca}\x{a6c}\x{10a7}_ 0: \x{6ca}
/^\p{Xan}*/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}
/^\p{Xan}{2,9}/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 0: ABCD1234\x{6ca}
/^\p{Xan}{2,9}?/utf \x{6ca}\x{a6c}\x{10a7}_ 0: \x{6ca}\x{a6c}
/^[\p{Xan}]/utf ABCD1234_ 0: A 1234abcd_ 0: 1 \x{6ca} 0: \x{6ca} \x{a6c} 0: \x{a6c} \x{10a7} 0: \x{10a7} \= Expect no match _ABC No match
/^[\p{Xan}]+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 0: ABCD1234\x{6ca}\x{a6c}\x{10a7} \= Expect no match _ABC No match
/^>\p{Xsp}/utf >\x{1680}\x{2028}\x{0b} 0: >\x{1680} >\x{a0} 0: >\x{a0} \= Expect no match \x{0b} No match
/^>\p{Xsp}+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xsp}+?/utf >\x{1680}\x{2028}\x{0b} 0: >\x{1680}
/^>\p{Xsp}*/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xsp}{2,9}/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xsp}{2,9}?/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}
/^>[\p{Xsp}]/utf >\x{2028}\x{0b} 0: >\x{2028}
/^>[\p{Xsp}]+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xps}/utf >\x{1680}\x{2028}\x{0b} 0: >\x{1680} >\x{a0} 0: >\x{a0} \= Expect no match \x{0b} No match
/^>\p{Xps}+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xps}+?/utf >\x{1680}\x{2028}\x{0b} 0: >\x{1680}
/^>\p{Xps}*/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xps}{2,9}/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xps}{2,9}?/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}
/^>[\p{Xps}]/utf >\x{2028}\x{0b} 0: >\x{2028}
/^>[\p{Xps}]+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^\p{Xwd}/utf ABCD 0: A 1234 0: 1 \x{6ca} 0: \x{6ca} \x{a6c} 0: \x{a6c} \x{10a7} 0: \x{10a7} _ABC 0: _ \= Expect no match [] No match
/^\p{Xwd}+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
/^\p{Xwd}+?/utf \x{6ca}\x{a6c}\x{10a7}_ 0: \x{6ca}
/^\p{Xwd}*/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
/^\p{Xwd}{2,9}/utf A_B12\x{6ca}\x{a6c}\x{10a7} 0: A_B12\x{6ca}\x{a6c}\x{10a7}
/^\p{Xwd}{2,9}?/utf \x{6ca}\x{a6c}\x{10a7}_ 0: \x{6ca}\x{a6c}
/^[\p{Xwd}]/utf ABCD1234_ 0: A 1234abcd_ 0: 1 \x{6ca} 0: \x{6ca} \x{a6c} 0: \x{a6c} \x{10a7} 0: \x{10a7} _ABC 0: _ \= Expect no match [] No match
/^[\p{Xwd}]+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
# A check not in UTF-8 mode
/^[\p{Xwd}]+/ ABCD1234_ 0: ABCD1234_
# Some negative checks
/^[\P{Xwd}]+/utf !.+\x{019}\x{482}AB 0: !.+\x{19}\x{482}
/^[\p{^Xwd}]+/utf !.+\x{019}\x{589}AB 0: !.+\x{19}\x{589}
/[\D]/B,utf,ucp ------------------------------------------------------------------ Bra [\P{Nd}] Ket End ------------------------------------------------------------------ 1\x{3c8}2 0: \x{3c8}
/[\d]/B,utf,ucp ------------------------------------------------------------------ Bra [\p{Nd}] Ket End ------------------------------------------------------------------ >\x{6f4}< 0: \x{6f4}
/[\S]/B,utf,ucp ------------------------------------------------------------------ Bra [\P{Xsp}] Ket End ------------------------------------------------------------------ \x{1680}\x{6f4}\x{1680} 0: \x{6f4}
/[\s]/B,utf,ucp ------------------------------------------------------------------ Bra [\p{Xsp}] Ket End ------------------------------------------------------------------ >\x{1680}< 0: \x{1680}
/[\W]/B,utf,ucp ------------------------------------------------------------------ Bra [\P{Xwd}] Ket End ------------------------------------------------------------------ A\x{1735}B 0: \x{1735}
/[\w]/B,utf,ucp ------------------------------------------------------------------ Bra [\p{Xwd}] Ket End ------------------------------------------------------------------ >\x{1723}< 0: \x{1723}
/\D/B,utf,ucp ------------------------------------------------------------------ Bra notprop Nd Ket End ------------------------------------------------------------------ 1\x{3c8}2 0: \x{3c8}
/\d/B,utf,ucp ------------------------------------------------------------------ Bra prop Nd Ket End ------------------------------------------------------------------ >\x{6f4}< 0: \x{6f4}
/\S/B,utf,ucp ------------------------------------------------------------------ Bra notprop Xsp Ket End ------------------------------------------------------------------ \x{1680}\x{6f4}\x{1680} 0: \x{6f4}
/\s/B,utf,ucp ------------------------------------------------------------------ Bra prop Xsp Ket End ------------------------------------------------------------------ >\x{1680}> 0: \x{1680}
/\W/B,utf,ucp ------------------------------------------------------------------ Bra notprop Xwd Ket End ------------------------------------------------------------------ A\x{1735}B 0: \x{1735}
/\w/B,utf,ucp ------------------------------------------------------------------ Bra prop Xwd Ket End ------------------------------------------------------------------ >\x{1723}< 0: \x{1723}
/[[:alpha:]]/B,ucp ------------------------------------------------------------------ Bra [\p{L}] Ket End ------------------------------------------------------------------
/[[:lower:]]/B,ucp ------------------------------------------------------------------ Bra [\p{Ll}] Ket End ------------------------------------------------------------------
/[[:upper:]]/B,ucp ------------------------------------------------------------------ Bra [\p{Lu}] Ket End ------------------------------------------------------------------
/[[:alnum:]]/B,ucp ------------------------------------------------------------------ Bra [\p{Xan}] Ket End ------------------------------------------------------------------
/[[:ascii:]]/B,ucp ------------------------------------------------------------------ Bra [\x00-\x7f] Ket End ------------------------------------------------------------------
/[[:cntrl:]]/B,ucp ------------------------------------------------------------------ Bra [\p{Cc}] Ket End ------------------------------------------------------------------
/[[:digit:]]/B,ucp ------------------------------------------------------------------ Bra [\p{Nd}] Ket End ------------------------------------------------------------------
/[[:digit:]]/B,ucp,ascii_digit ------------------------------------------------------------------ Bra [0-9] Ket End ------------------------------------------------------------------
/[[:graph:]]/B,ucp ------------------------------------------------------------------ Bra [[:graph:]] Ket End ------------------------------------------------------------------
/[[:print:]]/B,ucp ------------------------------------------------------------------ Bra [[:print:]] Ket End ------------------------------------------------------------------
/[[:punct:]]/B,ucp ------------------------------------------------------------------ Bra [[:punct:]] Ket End ------------------------------------------------------------------
/[[:space:]]/B,ucp ------------------------------------------------------------------ Bra [\p{Xps}] Ket End ------------------------------------------------------------------
/[[:word:]]/B,ucp ------------------------------------------------------------------ Bra [\p{Xwd}] Ket End ------------------------------------------------------------------
/[[:xdigit:]]/B,ucp ------------------------------------------------------------------ Bra [[:xdigit:]] Ket End ------------------------------------------------------------------
/[[:xdigit:]]/B,ucp,ascii_digit ------------------------------------------------------------------ Bra [0-9A-Fa-f] Ket End ------------------------------------------------------------------
# Unicode properties for \b and \B
/\b...\B/utf,ucp abc_ 0: abc \x{37e}abc\x{376} 0: abc \x{37e}\x{376}\x{371}\x{393}\x{394} 0: \x{376}\x{371}\x{393} !\x{c0}++\x{c1}\x{c2} 0: ++\x{c1} !\x{c0}+++++ 0: \x{c0}++
# Without PCRE_UCP, non-ASCII always fail, even if < 256
/\b...\B/utf abc_ 0: abc \= Expect no match \x{37e}abc\x{376} No match \x{37e}\x{376}\x{371}\x{393}\x{394} No match !\x{c0}++\x{c1}\x{c2} No match !\x{c0}+++++ No match
# With PCRE_UCP, non-UTF8 chars that are < 256 still check properties
/\b...\B/ucp abc_ 0: abc !\x{c0}++\x{c1}\x{c2} 0: ++\xc1 !\x{c0}+++++ 0: \xc0++
# Some of these are silly, but they check various combinations
/[[:^alpha:][:^cntrl:]]+/B,utf,ucp ------------------------------------------------------------------ Bra [\P{L}\P{Cc}]++ Ket End ------------------------------------------------------------------ 123 0: 123 abc 0: abc
/[[:^cntrl:][:^alpha:]]+/B,utf,ucp ------------------------------------------------------------------ Bra [\P{Cc}\P{L}]++ Ket End ------------------------------------------------------------------ 123 0: 123 abc 0: abc
/[[:alpha:]]+/B,utf,ucp ------------------------------------------------------------------ Bra [\p{L}]++ Ket End ------------------------------------------------------------------ abc 0: abc
/[[:^alpha:]\S]+/B,utf,ucp ------------------------------------------------------------------ Bra [\P{L}\P{Xsp}]++ Ket End ------------------------------------------------------------------ 123 0: 123 abc 0: abc
/[^\d]+/B,utf,ucp ------------------------------------------------------------------ Bra [^\p{Nd}]++ Ket End ------------------------------------------------------------------ abc123 0: abc abc\x{123} 0: abc\x{123} \x{660}abc 0: abc
/\p{Lu}+9\p{Lu}+B\p{Lu}+b/B ------------------------------------------------------------------ Bra prop Lu ++ 9 prop Lu + B prop Lu ++ b Ket End ------------------------------------------------------------------
/\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/B ------------------------------------------------------------------ Bra notprop Lu + 9 notprop Lu ++ B notprop Lu + b Ket End ------------------------------------------------------------------
/\P{Lu}+9\P{Lu}+B\P{Lu}+b/B ------------------------------------------------------------------ Bra notprop Lu + 9 notprop Lu ++ B notprop Lu + b Ket End ------------------------------------------------------------------
/\p{Han}+X\p{Greek}+\x{370}/B,utf ------------------------------------------------------------------ Bra prop Han ++ X prop Greek + \x{370} Ket End ------------------------------------------------------------------
/\p{Xan}+!\p{Xan}+A/B ------------------------------------------------------------------ Bra prop Xan ++ ! prop Xan + A Ket End ------------------------------------------------------------------
/\p{Xsp}+!\p{Xsp}\t/B ------------------------------------------------------------------ Bra prop Xsp ++ ! prop Xsp \x09 Ket End ------------------------------------------------------------------
/\p{Xps}+!\p{Xps}\t/B ------------------------------------------------------------------ Bra prop Xps ++ ! prop Xps \x09 Ket End ------------------------------------------------------------------
/\p{Xwd}+!\p{Xwd}_/B ------------------------------------------------------------------ Bra prop Xwd ++ ! prop Xwd _ Ket End ------------------------------------------------------------------
/A+\p{N}A+\dB+\p{N}*B+\d*/B,ucp ------------------------------------------------------------------ Bra A++ prop N A++ prop Nd B+ prop N *+ B++ prop Nd *+ Ket End ------------------------------------------------------------------
# These behaved oddly in Perl, so they are kept in this test
/(\x{23a}\x{23a}\x{23a})?\1/i,utf \= Expect no match \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} No match
/(ȺȺȺ)?\1/i,utf \= Expect no match ȺȺȺⱥⱥ No match
/(\x{23a}\x{23a}\x{23a})?\1/i,utf \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} 1: \x{23a}\x{23a}\x{23a}
/(ȺȺȺ)?\1/i,utf ȺȺȺⱥⱥⱥ 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} 1: \x{23a}\x{23a}\x{23a}
/(\x{23a}\x{23a}\x{23a})\1/i,utf \= Expect no match \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} No match
/(ȺȺȺ)\1/i,utf \= Expect no match ȺȺȺⱥⱥ No match
/(\x{23a}\x{23a}\x{23a})\1/i,utf \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} 1: \x{23a}\x{23a}\x{23a}
/(ȺȺȺ)\1/i,utf ȺȺȺⱥⱥⱥ 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} 1: \x{23a}\x{23a}\x{23a}
/(\x{2c65}\x{2c65})\1/i,utf \x{2c65}\x{2c65}\x{23a}\x{23a} 0: \x{2c65}\x{2c65}\x{23a}\x{23a} 1: \x{2c65}\x{2c65}
/(ⱥⱥ)\1/i,utf ⱥⱥȺȺ 0: \x{2c65}\x{2c65}\x{23a}\x{23a} 1: \x{2c65}\x{2c65}
/(\x{23a}\x{23a}\x{23a})\1Y/i,utf X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}Y 1: \x{23a}\x{23a}\x{23a}
/(\x{2c65}\x{2c65})\1Y/i,utf X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ 0: \x{2c65}\x{2c65}\x{23a}\x{23a}Y 1: \x{2c65}\x{2c65}
# These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE
/^[\p{Batak}]/utf \x{1bc0} 0: \x{1bc0} \x{1bff} 0: \x{1bff} \= Expect no match \x{1bf4} No match
/^[\p{Brahmi}]/utf \x{11000} 0: \x{11000} \x{1106f} 0: \x{1106f} \= Expect no match \x{1104e} No match
/^[\p{Mandaic}]/utf \x{840} 0: \x{840} \x{85e} 0: \x{85e} \= Expect no match \x{85c} No match \x{85d} No match
/(\X*)(.)/s,utf A\x{300} 0: A 1: 2: A
/^S(\X*)e(\X*)$/utf Stéréo 0: Ste\x{301}re\x{301}o 1: te\x{301}r 2: \x{301}o
/^\X/utf ́réo 0: \x{301}
/^a\X41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames aX41z 0: aX41z \= Expect no match aAz No match
/\X/ a\=ps 0: a a\=ph Partial match: a
/\Xa/ aa\=ps 0: aa aa\=ph 0: aa
/\X{2}/ aa\=ps 0: aa aa\=ph Partial match: aa
/\X+a/ a\=ps Partial match: a aa\=ps 0: aa aa\=ph Partial match: aa
/\X+?a/ a\=ps Partial match: a ab\=ps Partial match: ab aa\=ps 0: aa aa\=ph 0: aa aba\=ps 0: aba
# These Unicode 6.1.0 scripts are not known to Perl.
/\p{Chakma}\d/utf,ucp \x{11100}\x{1113c} 0: \x{11100}\x{1113c}
/\p{Takri}\d/utf,ucp \x{11680}\x{116c0} 0: \x{11680}\x{116c0}
/^\X/utf A\=ps 0: A A\=ph Partial match: A A\x{300}\x{301}\=ps 0: A\x{300}\x{301} A\x{300}\x{301}\=ph Partial match: A\x{300}\x{301} A\x{301}\=ps 0: A\x{301} A\x{301}\=ph Partial match: A\x{301}
/^\X{2,3}/utf A\=ps Partial match: A A\=ph Partial match: A AA\=ps 0: AA AA\=ph Partial match: AA A\x{300}\x{301}\=ps Partial match: A\x{300}\x{301} A\x{300}\x{301}\=ph Partial match: A\x{300}\x{301} A\x{300}\x{301}A\x{300}\x{301}\=ps 0: A\x{300}\x{301}A\x{300}\x{301} A\x{300}\x{301}A\x{300}\x{301}\=ph Partial match: A\x{300}\x{301}A\x{300}\x{301}
/^\X{2}/utf AA\=ps 0: AA AA\=ph Partial match: AA A\x{300}\x{301}A\x{300}\x{301}\=ps 0: A\x{300}\x{301}A\x{300}\x{301} A\x{300}\x{301}A\x{300}\x{301}\=ph Partial match: A\x{300}\x{301}A\x{300}\x{301}
/^\X+/utf AA\=ps 0: AA AA\=ph Partial match: AA
/^\X+?Z/utf AA\=ps Partial match: AA AA\=ph Partial match: AA
/A\x{3a3}B/IBi,utf ------------------------------------------------------------------ Bra /i A clist 03a3 03c2 03c3 /i B Ket End ------------------------------------------------------------------ Capture group count = 0 Options: caseless utf First code unit = 'A' (caseless) Last code unit = 'B' (caseless) Subject length lower bound = 3
/[\x{3a3}]/Bi,utf ------------------------------------------------------------------ Bra clist 03a3 03c2 03c3 Ket End ------------------------------------------------------------------
/[^\x{3a3}]/Bi,utf ------------------------------------------------------------------ Bra not clist 03a3 03c2 03c3 Ket End ------------------------------------------------------------------
/[\x{3a3}]+/Bi,utf ------------------------------------------------------------------ Bra clist 03a3 03c2 03c3 ++ Ket End ------------------------------------------------------------------
/[^\x{3a3}]+/Bi,utf ------------------------------------------------------------------ Bra not clist 03a3 03c2 03c3 ++ Ket End ------------------------------------------------------------------
/a*\x{3a3}/Bi,utf ------------------------------------------------------------------ Bra /i a*+ clist 03a3 03c2 03c3 Ket End ------------------------------------------------------------------
/\x{3a3}+a/Bi,utf ------------------------------------------------------------------ Bra clist 03a3 03c2 03c3 ++ /i a Ket End ------------------------------------------------------------------
/\x{3a3}*\x{3c2}/Bi,utf ------------------------------------------------------------------ Bra clist 03a3 03c2 03c3 * clist 03a3 03c2 03c3 Ket End ------------------------------------------------------------------
/\x{3a3}{3}/i,utf,aftertext \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 0: \x{3a3}\x{3c3}\x{3c2} 0+ \x{3a3}\x{3c3}\x{3c2}
/\x{3a3}{2,4}/i,utf,aftertext \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 0: \x{3a3}\x{3c3}\x{3c2}\x{3a3} 0+ \x{3c3}\x{3c2}
/\x{3a3}{2,4}?/i,utf,aftertext \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 0: \x{3a3}\x{3c3} 0+ \x{3c2}\x{3a3}\x{3c3}\x{3c2}
/\x{3a3}+./i,utf,aftertext \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 0: \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 0+
/\x{3a3}++./i,utf,aftertext \= Expect no match \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} No match
/\x{3a3}*\x{3c2}/Bi,utf ------------------------------------------------------------------ Bra clist 03a3 03c2 03c3 * clist 03a3 03c2 03c3 Ket End ------------------------------------------------------------------
/[^\x{3a3}]*\x{3c2}/Bi,utf ------------------------------------------------------------------ Bra not clist 03a3 03c2 03c3 *+ clist 03a3 03c2 03c3 Ket End ------------------------------------------------------------------
/[^a]*\x{3c2}/Bi,utf ------------------------------------------------------------------ Bra /i [^a]* clist 03a3 03c2 03c3 Ket End ------------------------------------------------------------------
/ist/Bi,utf ------------------------------------------------------------------ Bra /i i clist 0053 0073 017f /i t Ket End ------------------------------------------------------------------ \= Expect no match ikt No match
/is+t/i,utf iSs\x{17f}t 0: iSs\x{17f}t \= Expect no match ikt No match
/is+?t/i,utf \= Expect no match ikt No match
/is?t/i,utf \= Expect no match ikt No match
/is{2}t/i,utf \= Expect no match iskt No match
# This property is a PCRE special
/^\p{Xuc}/utf $abc 0: $ @abc 0: @ `abc 0: ` \x{1234}abc 0: \x{1234} \= Expect no match abc No match
/^\p{Xuc}+/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@`\x{a0}\x{1234}\x{e000} \= Expect no match \x{9f} No match
/^\p{Xuc}+?/utf $@`\x{a0}\x{1234}\x{e000}** 0: $ \= Expect no match \x{9f} No match
/^\p{Xuc}+?\*/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@`\x{a0}\x{1234}\x{e000}* \= Expect no match \x{9f} No match
/^\p{Xuc}++/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@`\x{a0}\x{1234}\x{e000} \= Expect no match \x{9f} No match
/^\p{Xuc}{3,5}/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@`\x{a0}\x{1234} \= Expect no match \x{9f} No match
/^\p{Xuc}{3,5}?/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@` \= Expect no match \x{9f} No match
/^[\p{Xuc}]/utf $@`\x{a0}\x{1234}\x{e000}** 0: $ \= Expect no match \x{9f} No match
/^[\p{Xuc}]+/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@`\x{a0}\x{1234}\x{e000} \= Expect no match \x{9f} No match
/^\P{Xuc}/utf abc 0: a \= Expect no match $abc No match @abc No match `abc No match \x{1234}abc No match
/^[\P{Xuc}]/utf abc 0: a \= Expect no match $abc No match @abc No match `abc No match \x{1234}abc No match
# Some auto-possessification tests
/\pN+\z/B ------------------------------------------------------------------ Bra prop N ++ \z Ket End ------------------------------------------------------------------
/\PN+\z/B ------------------------------------------------------------------ Bra notprop N ++ \z Ket End ------------------------------------------------------------------
/\pN+/B ------------------------------------------------------------------ Bra prop N ++ Ket End ------------------------------------------------------------------
/\PN+/B ------------------------------------------------------------------ Bra notprop N ++ Ket End ------------------------------------------------------------------
/\p{Any}+\p{Any} \p{Any}+\P{Any} \p{Any}+\p{L&} \p{Any}+\p{L} \p{Any}+\p{Lu} \p{Any}+\p{Han} \p{Any}+\p{Xan} \p{Any}+\p{Xsp} \p{Any}+\p{Xps} \p{Xwd}+\p{Any} \p{Any}+\p{Xuc}/Bx,ucp ------------------------------------------------------------------ Bra AllAny+ AllAny AllAny+ notprop Any AllAny+ prop Lc AllAny+ prop L AllAny+ prop Lu AllAny+ prop Han AllAny+ prop Xan AllAny+ prop Xsp AllAny+ prop Xps prop Xwd + AllAny AllAny+ prop Xuc Ket End ------------------------------------------------------------------
/\p{L&}+\p{Any} \p{L&}+\p{L&} \P{L&}+\p{L&} \p{L&}+\p{L} \p{L&}+\p{Lu} \p{L&}+\p{Han} \p{L&}+\p{Xan} \p{L&}+\P{Xan} \p{L&}+\p{Xsp} \p{L&}+\p{Xps} \p{Xwd}+\p{L&} \p{L&}+\p{Xuc}/Bx,ucp ------------------------------------------------------------------ Bra prop Lc + AllAny prop Lc + prop Lc notprop Lc ++ prop Lc prop Lc + prop L prop Lc + prop Lu prop Lc + prop Han prop Lc + prop Xan prop Lc ++ notprop Xan prop Lc ++ prop Xsp prop Lc ++ prop Xps prop Xwd + prop Lc prop Lc + prop Xuc Ket End ------------------------------------------------------------------
/\p{N}+\p{Any} \p{N}+\p{L&} \p{N}+\p{L} \p{N}+\P{L} \p{N}+\P{N} \p{N}+\p{Lu} \p{N}+\p{Han} \p{N}+\p{Xan} \p{N}+\p{Xsp} \p{N}+\p{Xps} \p{Xwd}+\p{N} \p{N}+\p{Xuc}/Bx,ucp ------------------------------------------------------------------ Bra prop N + AllAny prop N + prop Lc prop N ++ prop L prop N + notprop L prop N ++ notprop N prop N ++ prop Lu prop N + prop Han prop N + prop Xan prop N ++ prop Xsp prop N ++ prop Xps prop Xwd + prop N prop N + prop Xuc Ket End ------------------------------------------------------------------
/\p{Lu}+\p{Any} \p{Lu}+\p{L&} \p{Lu}+\p{L} \p{Lu}+\p{Lu} \P{Lu}+\p{Lu} \p{Lu}+\p{Nd} \p{Lu}+\P{Nd} \p{Lu}+\p{Han} \p{Lu}+\p{Xan} \p{Lu}+\p{Xsp} \p{Lu}+\p{Xps} \p{Xwd}+\p{Lu} \p{Lu}+\p{Xuc}/Bx,ucp ------------------------------------------------------------------ Bra prop Lu + AllAny prop Lu + prop Lc prop Lu + prop L prop Lu + prop Lu notprop Lu ++ prop Lu prop Lu ++ prop Nd prop Lu + notprop Nd prop Lu + prop Han prop Lu + prop Xan prop Lu ++ prop Xsp prop Lu ++ prop Xps prop Xwd + prop Lu prop Lu + prop Xuc Ket End ------------------------------------------------------------------
/\p{Han}+\p{Lu} \p{Han}+\p{L&} \p{Han}+\p{L} \p{Han}+\p{Lu} \p{Han}+\p{Arabic} \p{Arabic}+\p{Arabic} \p{Han}+\p{Xan} \p{Han}+\p{Xsp} \p{Han}+\p{Xps} \p{Xwd}+\p{Han} \p{Han}+\p{Xuc}/Bx,ucp ------------------------------------------------------------------ Bra prop Han + prop Lu prop Han + prop Lc prop Han + prop L prop Han + prop Lu prop Han ++ prop Arabic prop Arabic + prop Arabic prop Han + prop Xan prop Han + prop Xsp prop Han + prop Xps prop Xwd + prop Han prop Han + prop Xuc Ket End ------------------------------------------------------------------
/\p{Xan}+\p{Any} \p{Xan}+\p{L&} \P{Xan}+\p{L&} \p{Xan}+\p{L} \p{Xan}+\p{Lu} \p{Xan}+\p{Han} \p{Xan}+\p{Xan} \p{Xan}+\P{Xan} \p{Xan}+\p{Xsp} \p{Xan}+\p{Xps} \p{Xwd}+\p{Xan} \p{Xan}+\p{Xuc}/Bx,ucp ------------------------------------------------------------------ Bra prop Xan + AllAny prop Xan + prop Lc notprop Xan ++ prop Lc prop Xan + prop L prop Xan + prop Lu prop Xan + prop Han prop Xan + prop Xan prop Xan ++ notprop Xan prop Xan ++ prop Xsp prop Xan ++ prop Xps prop Xwd + prop Xan prop Xan + prop Xuc Ket End ------------------------------------------------------------------
/\p{Xsp}+\p{Any} \p{Xsp}+\p{L&} \p{Xsp}+\p{L} \p{Xsp}+\p{Lu} \p{Xsp}+\p{Han} \p{Xsp}+\p{Xan} \p{Xsp}+\p{Xsp} \P{Xsp}+\p{Xsp} \p{Xsp}+\p{Xps} \p{Xwd}+\p{Xsp} \p{Xsp}+\p{Xuc}/Bx,ucp ------------------------------------------------------------------ Bra prop Xsp + AllAny prop Xsp ++ prop Lc prop Xsp ++ prop L prop Xsp ++ prop Lu prop Xsp + prop Han prop Xsp ++ prop Xan prop Xsp + prop Xsp notprop Xsp ++ prop Xsp prop Xsp + prop Xps prop Xwd ++ prop Xsp prop Xsp + prop Xuc Ket End ------------------------------------------------------------------
/\p{Xwd}+\p{Any} \p{Xwd}+\p{L&} \p{Xwd}+\p{L} \p{Xwd}+\p{Lu} \p{Xwd}+\p{Han} \p{Xwd}+\p{Xan} \p{Xwd}+\p{Xsp} \p{Xwd}+\p{Xps} \p{Xwd}+\p{Xwd} \p{Xwd}+\P{Xwd} \p{Xwd}+\p{Xuc}/Bx,ucp ------------------------------------------------------------------ Bra prop Xwd + AllAny prop Xwd + prop Lc prop Xwd + prop L prop Xwd + prop Lu prop Xwd + prop Han prop Xwd + prop Xan prop Xwd ++ prop Xsp prop Xwd ++ prop Xps prop Xwd + prop Xwd prop Xwd ++ notprop Xwd prop Xwd + prop Xuc Ket End ------------------------------------------------------------------
/\p{Xuc}+\p{Any} \p{Xuc}+\p{L&} \p{Xuc}+\p{L} \p{Xuc}+\p{Lu} \p{Xuc}+\p{Han} \p{Xuc}+\p{Xan} \p{Xuc}+\p{Xsp} \p{Xuc}+\p{Xps} \p{Xwd}+\p{Xuc} \p{Xuc}+\p{Xuc} \p{Xuc}+\P{Xuc}/Bx,ucp ------------------------------------------------------------------ Bra prop Xuc + AllAny prop Xuc + prop Lc prop Xuc + prop L prop Xuc + prop Lu prop Xuc + prop Han prop Xuc + prop Xan prop Xuc + prop Xsp prop Xuc + prop Xps prop Xwd + prop Xuc prop Xuc + prop Xuc prop Xuc ++ notprop Xuc Ket End ------------------------------------------------------------------
/\p{N}+\p{Ll} \p{N}+\p{Nd} \p{N}+\P{Nd}/Bx,ucp ------------------------------------------------------------------ Bra prop N ++ prop Ll prop N + prop Nd prop N + notprop Nd Ket End ------------------------------------------------------------------
/\p{Xan}+\p{L} \p{Xan}+\p{N} \p{Xan}+\p{C} \p{Xan}+\P{L} \P{Xan}+\p{N} \p{Xan}+\P{C}/Bx,ucp ------------------------------------------------------------------ Bra prop Xan + prop L prop Xan + prop N prop Xan ++ prop C prop Xan + notprop L notprop Xan ++ prop N prop Xan + notprop C Ket End ------------------------------------------------------------------
/\p{L}+\p{Xan} \p{N}+\p{Xan} \p{C}+\p{Xan} \P{L}+\p{Xan} \p{N}+\p{Xan} \P{C}+\p{Xan} \p{L}+\P{Xan}/Bx,ucp ------------------------------------------------------------------ Bra prop L + prop Xan prop N + prop Xan prop C ++ prop Xan notprop L + prop Xan prop N + prop Xan notprop C + prop Xan prop L ++ notprop Xan Ket End ------------------------------------------------------------------
/\p{Xan}+\p{Lu} \p{Xan}+\p{Nd} \p{Xan}+\p{Cc} \p{Xan}+\P{Ll} \P{Xan}+\p{No} \p{Xan}+\P{Cf}/Bx,ucp ------------------------------------------------------------------ Bra prop Xan + prop Lu prop Xan + prop Nd prop Xan ++ prop Cc prop Xan + notprop Ll notprop Xan ++ prop No prop Xan + notprop Cf Ket End ------------------------------------------------------------------
/\p{Lu}+\p{Xan} \p{Nd}+\p{Xan} \p{Cs}+\p{Xan} \P{Lt}+\p{Xan} \p{Nl}+\p{Xan} \P{Cc}+\p{Xan} \p{Lt}+\P{Xan}/Bx,ucp ------------------------------------------------------------------ Bra prop Lu + prop Xan prop Nd + prop Xan prop Cs ++ prop Xan notprop Lt + prop Xan prop Nl + prop Xan notprop Cc + prop Xan prop Lt ++ notprop Xan Ket End ------------------------------------------------------------------
/\w+\p{P} \w+\p{Po} \w+\s \p{Xan}+\s \s+\p{Xan} \s+\w/Bx,ucp ------------------------------------------------------------------ Bra prop Xwd + prop P prop Xwd + prop Po prop Xwd ++ prop Xsp prop Xan ++ prop Xsp prop Xsp ++ prop Xan prop Xsp ++ prop Xwd Ket End ------------------------------------------------------------------
/\w+\P{P} \W+\p{Po} \w+\S \P{Xan}+\s \s+\P{Xan} \s+\W/Bx,ucp ------------------------------------------------------------------ Bra prop Xwd + notprop P notprop Xwd + prop Po prop Xwd + notprop Xsp notprop Xan + prop Xsp prop Xsp + notprop Xan prop Xsp + notprop Xwd Ket End ------------------------------------------------------------------
/\w+\p{Po} \w+\p{Pc} \W+\p{Po} \W+\p{Pc} \w+\P{Po} \w+\P{Pc}/Bx,ucp ------------------------------------------------------------------ Bra prop Xwd + prop Po prop Xwd ++ prop Pc notprop Xwd + prop Po notprop Xwd + prop Pc prop Xwd + notprop Po prop Xwd + notprop Pc Ket End ------------------------------------------------------------------
/\p{Nl}+\p{Xan} \P{Nl}+\p{Xan} \p{Nl}+\P{Xan} \P{Nl}+\P{Xan}/Bx,ucp ------------------------------------------------------------------ Bra prop Nl + prop Xan notprop Nl + prop Xan prop Nl ++ notprop Xan notprop Nl + notprop Xan Ket End ------------------------------------------------------------------
/\p{Xan}+\p{Nl} \P{Xan}+\p{Nl} \p{Xan}+\P{Nl} \P{Xan}+\P{Nl}/Bx,ucp ------------------------------------------------------------------ Bra prop Xan + prop Nl notprop Xan ++ prop Nl prop Xan + notprop Nl notprop Xan + notprop Nl Ket End ------------------------------------------------------------------
/\p{Xan}+\p{Nd} \P{Xan}+\p{Nd} \p{Xan}+\P{Nd} \P{Xan}+\P{Nd}/Bx,ucp ------------------------------------------------------------------ Bra prop Xan + prop Nd notprop Xan ++ prop Nd prop Xan + notprop Nd notprop Xan + notprop Nd Ket End ------------------------------------------------------------------
# End auto-possessification tests
/\w+/B,utf,ucp,auto_callout ------------------------------------------------------------------ Bra Callout 255 0 3 prop Xwd ++ Callout 255 3 0 Ket End ------------------------------------------------------------------ abcd --->abcd +0 ^ \w+ +3 ^ ^ End of pattern 0: abcd
/[\p{N}]?+/B,no_auto_possess ------------------------------------------------------------------ Bra [\p{N}]?+ Ket End ------------------------------------------------------------------
/[\p{L}ab]{2,3}+/B,no_auto_possess ------------------------------------------------------------------ Bra [ab\p{L}]{2,3}+ Ket End ------------------------------------------------------------------
/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx ------------------------------------------------------------------ Bra \D+ extuni \d+ extuni \S+ extuni \s+ extuni \W+ extuni \w+ extuni \R+ extuni \H+ extuni \h+ extuni \V+ extuni \v+ extuni a+ extuni \x0a+ extuni Any+ extuni Ket End ------------------------------------------------------------------
/.+\X/Bsx ------------------------------------------------------------------ Bra AllAny+ extuni Ket End ------------------------------------------------------------------
/\X+$/Bmx ------------------------------------------------------------------ Bra extuni+ /m $ Ket End ------------------------------------------------------------------
/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx ------------------------------------------------------------------ Bra extuni+ \D extuni+ \d extuni+ \S extuni+ \s extuni+ \W extuni+ \w extuni+ Any extuni+ \R extuni+ \H extuni+ \h extuni+ \V extuni+ \v extuni+ extuni extuni+ \Z extuni++ \z extuni+ $ Ket End ------------------------------------------------------------------
/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp ------------------------------------------------------------------ Bra prop Nd ++ prop Xsp {0,5}+ = prop Xsp *+ notprop Xsp ? = prop Xwd {0,4}+ notprop Xwd *+ Ket End ------------------------------------------------------------------
/[RST]+/Bi,utf,ucp ------------------------------------------------------------------ Bra [R-Tr-t\x{17f}]++ Ket End ------------------------------------------------------------------
/[R-T]+/Bi,utf,ucp ------------------------------------------------------------------ Bra [R-Tr-t\x{17f}]++ Ket End ------------------------------------------------------------------
/[Q-U]+/Bi,utf,ucp ------------------------------------------------------------------ Bra [Q-Uq-u\x{17f}]++ Ket End ------------------------------------------------------------------
/^s?c/Iim,utf Capture group count = 0 Options: caseless multiline utf First code unit at start or follows newline Last code unit = 'c' (caseless) Subject length lower bound = 1 scat 0: sc
/\X?abc/utf,no_start_optimize \xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06 0: A\x{300}abc
/\x{100}\x{200}\K\x{300}/utf,startchar \x{100}\x{200}\x{300} 0: \x{100}\x{200}\x{300} ^^^^^^^^^^^^^^
# Test UTF characters in a substitution
/ábc/utf,replace=XሴZ 123ábc123 1: 123X\x{1234}Z123
/(?<=abc)(|def)/g,utf,replace=<$0> 123abcáyzabcdef789abcሴqr 4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr
/[A-`]/iB,utf ------------------------------------------------------------------ Bra [A-z\x{212a}\x{17f}] Ket End ------------------------------------------------------------------ abcdefghijklmno 0: a
/(?<=\K\x{17f})/g,utf,aftertext,allow_lookaround_bsk \x{17f}\x{17f}\x{17f}\x{17f}\x{17f} 0: \x{17f} 0+ \x{17f}\x{17f}\x{17f}\x{17f} 0: \x{17f} 0+ \x{17f}\x{17f}\x{17f} 0: \x{17f} 0+ \x{17f}\x{17f} 0: \x{17f} 0+ \x{17f} 0: \x{17f} 0+
/(?<=\K\x{17f})/altglobal,utf,aftertext,allow_lookaround_bsk \x{17f}\x{17f}\x{17f}\x{17f}\x{17f} 0: \x{17f} 0+ \x{17f}\x{17f}\x{17f}\x{17f} 0: \x{17f} 0+ \x{17f}\x{17f}\x{17f} 0: \x{17f} 0+ \x{17f}\x{17f} 0: \x{17f} 0+ \x{17f} 0: \x{17f} 0+
"\xa\xf<(.\pZ*\P{Xwd}+^\xa8\3'3yq.::?(?J:()\xd1+!~:3'(8?:)':(?'d'(?'d'^u]!.+.+\\A\Ah(n+?9){7}+\K;(?'X'u'(?'c'(?'z'(?<y>\xb::\xf0'|\xd3(\xae?'w(z\x8?P>l)\x8?P>a)'\H\R\xd1+!!~:3'(?:h$N{26875}\W+?\\=D{2}\x89(?i:Uy0\N({2\xa(\v\x85*){y*\A(()\p{L}+?\P{^Xan}'+?\xff\+pS\?|).{;y*\A(()\p{L}+?\8}\d?1(|)(/1){7}.+[Lp{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(\xbf(R))\x8a\X*?\x8a\xb\xd1^9\3*+(\xc1,\k'R'\xb4)\xcc(z\z(?J)(?'X'\x1b(\xb\xd1^9\?'3*+P{^Xan}+?\xff\+(\xc1.]k+\xb'Pm'\xb4)\xcc4f\xa7'\xd1V(?i:U,{2,2})'(?'X'))?-%--\x95$9*\4'|\xd1(\x9c''%\x94$9)#(?'R')3\x7?('P\xed7'\xa8\xb1^u\xeaw\1\0\0\(|(?1){7}.+[\p{Me}].\s\xdcC*^\x14?(?(<y>))(?<!^)$C((;*?(R*?))+(?(R)\x8a\X*?\x8a\xb\xd1^9\3*+|(\xc1,\k'R'\xb4)\xcc! z)\z(?JJ)(?'X';(\xb\xd1^9\?'3*+(\xc1.]k+\xb'Pm'\xb4))':(?'d')(?'RD'(d')|)|$)'|(?<x>\g{d});\g{x}\x11\g{d}\x81\|$((?'X'\'X'(?'W''\x92()'9'\x83*))\xba*\!?^ <){)':;\xcc4'\xd1'(?'X'28))?-%--\x95$9*\4'|\xd1((''e\x94*$9:)*#(?'R')3)\x7?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+0!~:(?)'d'E:yD!\s(?'R'\x1e;\x10:U))|'\x9g!\xb0*){)\\x16:;()\x1e\x10\x87*:(?<y>)\xd1+!~:(?)'}'\d'E:yD!\s(?'R'\x1e;\x10:U))|'))|)g!\xb0*R+9{29+)#(?'P'})*?pS\{3,}\x85,{0,}l{*UTF)(\xe{7}){3722,{9,}d{2,?|))|{)\(A?&d}}{\xa,}2}){3,}7,l{)22}(,}l:7{2,4}}29\x19+)#?'P'})*v?))\x5" Failed: error 122 at offset 1227: unmatched closing parenthesis
/$(&.+[\p{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(?(R)){0,6}?|){12\x8a\X*?\x8a\x0b\xd1^9\3*+(\xc1,\k'P'\xb4)\xcc(z\z(?JJ)(?'X'8};(\x0b\xd1^9\?'3*+(\xc1.]k+\x0b'Pm'\xb4\xcc4'\xd1'(?'X'))?-%--\x95$9*\4'|\xd1(''%\x95*$9)#(?'R')3\x07?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/
"(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'" Failed: error 162 at offset 113: subpattern name expected
/[\pS#moq]/ = 0: =
/(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark cxxxz 0: xxx MK: a\x{12345}b\x{09}(d)c
/abcd/utf,replace=x\x{824}y\o{3333}z(\Q12\$34$$\x34\E5$$),substitute_extended abcd 1: x\x{824}y\x{6db}z(12\$34$$\x345$)
/a(\x{e0}\x{101})(\x{c0}\x{102})/utf,replace=a\u$1\U$1\E$1\l$2\L$2\Eab\U\x{e0}\x{101}\L\x{d0}\x{160}\EDone,substitute_extended a\x{e0}\x{101}\x{c0}\x{102} 1: a\x{c0}\x{101}\x{c0}\x{100}\x{e0}\x{101}\x{e0}\x{102}\x{e0}\x{103}ab\x{c0}\x{100}\x{f0}\x{161}Done
/((?<digit>\d)|(?<letter>\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}> ab12cde 7: <not digit; letter><not digit; letter><digit; not a letter><digit; not a letter><not digit; letter><not digit; letter><not digit; letter>
/(*UCP)(*UTF)[[:>:]]X/B ------------------------------------------------------------------ Bra \b (ucp) Assert back Reverse prop Xwd Ket X Ket End ------------------------------------------------------------------
/abc/utf,replace=xyz abc\=zero_terminate 1: xyz
/a[[:punct:]b]/ucp,bincode ------------------------------------------------------------------ Bra a [b[:punct:]] Ket End ------------------------------------------------------------------
/a[[:punct:]b]/utf,ucp,bincode ------------------------------------------------------------------ Bra a [b[:punct:]] Ket End ------------------------------------------------------------------
/a[b[:punct:]]/utf,ucp,bincode ------------------------------------------------------------------ Bra a [b[:punct:]] Ket End ------------------------------------------------------------------
/[[:^ascii:]]/utf,ucp,bincode ------------------------------------------------------------------ Bra [\x80-\xff] (neg) Ket End ------------------------------------------------------------------
/[[:^ascii:]\w]/utf,ucp,bincode ------------------------------------------------------------------ Bra [\x80-\xff\p{Xwd}\x{100}-\x{10ffff}] Ket End ------------------------------------------------------------------
/[\w[:^ascii:]]/utf,ucp,bincode ------------------------------------------------------------------ Bra [\x80-\xff\p{Xwd}\x{100}-\x{10ffff}] Ket End ------------------------------------------------------------------
/[^[:ascii:]\W]/utf,ucp,bincode ------------------------------------------------------------------ Bra [^\x00-\x7f\P{Xwd}] Ket End ------------------------------------------------------------------ \x{de} 0: \x{de} \x{200} 0: \x{200} \= Expect no match \x{589} No match \x{37e} No match
/[[:^ascii:]a]/utf,ucp,bincode ------------------------------------------------------------------ Bra [a\x80-\xff] (neg) Ket End ------------------------------------------------------------------
/L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout ------------------------------------------------------------------ Bra Callout 255 0 14 L? Callout 255 14 0 Ket End ------------------------------------------------------------------
/L(?#(|++<!(2)?/B,utf,ucp,auto_callout ------------------------------------------------------------------ Bra Callout 255 0 14 L?+ Callout 255 14 0 Ket End ------------------------------------------------------------------
/(*UTF)C\x09((?<!'(?x)!*H? #\xcc\x9a[^$]/ Failed: error 114 at offset 39: missing closing parenthesis
/[\D]/utf \x{1d7cf} 0: \x{1d7cf}
/[\D\P{Nd}]/utf \x{1d7cf} 0: \x{1d7cf}
/[^\D]/utf a9b 0: 9 \= Expect no match \x{1d7cf} No match
/[^\D\P{Nd}]/utf a9b 0: 9 \x{1d7cf} 0: \x{1d7cf} \= Expect no match \x{10000} No match
# Hex uses pattern length, not zero-terminated. This tests for overrunning # the given length of a pattern.
/'(*UTF)'/hex
/'#('/hex,extended,utf
/a(?<=A\XB)/utf Failed: error 125 at offset 1: length of lookbehind assertion is not limited
/../utf,auto_callout \n\x{123}\x{123}\x{123}\x{123} --->\x{0a}\x{123}\x{123}\x{123}\x{123} +0 ^ . +0 ^ . +1 ^ ^ . +2 ^ ^ End of pattern 0: \x{123}\x{123}
# This tests processing wide characters in extended mode.
/XȀ/x,utf
# These three test a bug fix that was not clearing up after a locale setting # when the test or a subsequent one matched a wide character.
//locale=C
/[\P{Yi}]/utf \x{2f000} 0: \x{2f000}
/[\P{Yi}]/utf,locale=C \x{2f000} 0: \x{2f000}
/^(?<!(?=))/B,utf ------------------------------------------------------------------ Bra ^ Assert back not Assert \x{10385c} Ket Ket Ket End ------------------------------------------------------------------
# Horizontal and vertical space lists ignore caseless
/[\HH]/Bi,utf ------------------------------------------------------------------ Bra [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}] Ket End ------------------------------------------------------------------
/[^\HH]/Bi,utf ------------------------------------------------------------------ Bra [^\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}] Ket End ------------------------------------------------------------------
//g,utf \=zero_terminate 0:
/^(?1)\p{Nd}{3}(a)/ a123a 0: a123a 1: a
/\p{Nd}{0,3}[\pL](*:abc)(?C1)xxx/callout_info Callout 1 x
# ---------------------------------------------------------------------------
# A bunch of tests that hit lines of code that others do not (at least when # these were created).
/^[^a]{3,}?x/i,utf,no_start_optimize,no_auto_possess \= Expect no match bbb No match cc No match
/^[ac]{3,}?x/i,utf,no_start_optimize,no_auto_possess \= Expect no match aaa\x{100} No match
/^X\X/no_start_optimize,no_auto_possess \= Expect no match X No match
/^X\p{L&}+?/no_start_optimize,no_auto_possess \= Expect no match X No match
/^X\p{L}+?/no_start_optimize,no_auto_possess \= Expect no match X No match
/^X\p{Lu}+?/no_start_optimize,no_auto_possess \= Expect no match X No match
/^X\p{Arabic}+?/no_start_optimize,no_auto_possess \= Expect no match X No match
/^X\p{Xan}+?/ucp,no_start_optimize,no_auto_possess \= Expect no match X No match
/^X\s+?/ucp,no_start_optimize,no_auto_possess \= Expect no match X No match XX No match
/^X\S+?/ucp,no_start_optimize,no_auto_possess XX 0: XX \= Expect no match X No match
/^X\w+?/ucp,no_start_optimize,no_auto_possess \= Expect no match X No match
/^X[^\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess \= Expect no match X No match
/^X[\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess \= Expect no match X No match
/^X\p{Xuc}+?/utf,no_start_optimize,no_auto_possess \= Expect no match X No match
/^X.+?Z/s,utf,no_start_optimize,no_auto_possess \= Expect no match X No match
/^X\R+?/utf,no_start_optimize,no_auto_possess \= Expect no match X No match
/^X\H+?/utf,no_start_optimize,no_auto_possess \= Expect no match X No match
/^X\V+?/utf,no_start_optimize,no_auto_possess \= Expect no match X No match
/^X\s+?/utf,no_start_optimize,no_auto_possess \= Expect no match X No match XX No match
/^X\S+?/utf,no_start_optimize,no_auto_possess \= Expect no match X No match
/^X\p{Any}{1,3}?Z/s,no_start_optimize,no_auto_possess XYYYZ 0: XYYYZ \= Expect no match XY No match XYY No match XYYY No match XYYYYZ No match
/^X\p{L&}{1,3}?Z/s,no_start_optimize,no_auto_possess \= Expect no match XY No match XY! No match
/^X\p{L}{1,3}?Z/s,no_start_optimize,no_auto_possess \= Expect no match XY No match XY! No match
/^X\p{Lu}{1,3}?Z/s,no_start_optimize,no_auto_possess \= Expect no match XY No match XY! No match
/^X\P{Han}{1,3}?Z/s,utf,no_start_optimize,no_auto_possess \= Expect no match XY No match XY! No match XY\x{2f00}! No match
/^X\p{Xan}{1,3}?Z/s,no_start_optimize,no_auto_possess \= Expect no match XY No match XY! No match
/^X\p{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess \= Expect no match X\n No match X\n! No match X\n\n! No match
/^X\P{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess \= Expect no match XYY\n No match
/^X\p{Xwd}{1,3}?Z/s,no_start_optimize,no_auto_possess \= Expect no match XY No match XY! No match XYY! No match
/^X\x{b5}+?Z/i,utf,no_start_optimize,no_auto_possess \= Expect no match X No match X\x{b5} No match X\x{b5}\x{b5}Y No match
/^X\p{Xuc}+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X No match X$ No match X@@Y No match
/(*CRLF)^X.+?Z/utf,no_start_optimize,no_auto_possess \= Expect partial match XYY\r\=ph Partial match: XYY\x{0d} \= Expect no match X No match
/^X.+?Z/s,utf,no_start_optimize,no_auto_possess \= Expect no match X No match XYY No match
/^X\R+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X\nX No match X\n\rX No match X\n\r\nX No match X\n\n No match X\n\x{0c} No match
/(*BSR_ANYCRLF)^X\R+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X\nX No match X\n\rX No match X\n\r\nX No match X\n\n No match X\n\x{0c} No match
/^X\H+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match XY\t No match XYY No match
/^X\h+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X\t\t No match X\tY No match
/^X\V+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match XY\n No match XYY No match
/^X\v+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X\n\n No match X\nY No match
/^X\D+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match XY9 No match XYY No match
/^X\d+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X99 No match X9Y No match
/^X\S+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match XY\n No match XYY No match
/^X\s+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X\n\n No match X\nY No match
/^X\W+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X.A No match X++ No match
/^X\p{L&}{1,3}Z/no_start_optimize,no_auto_possess \= Expect no match XY No match XY! No match
/^X\p{L}{1,3}Z/no_start_optimize,no_auto_possess \= Expect no match XY No match
/^X\p{Xan}{1,3}Z/no_start_optimize,no_auto_possess \= Expect no match XY No match
/^X\P{Xsp}{1,3}Z/no_start_optimize,no_auto_possess \= Expect no match XYY No match
/^X\p{Xuc}+Z/utf,no_start_optimize,no_auto_possess \= Expect no match X$ No match
# ---------------------------------------------------------------------- # These test the dangerous PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL option.
/\x{d800}/B,utf,bad_escape_is_literal ------------------------------------------------------------------ Bra x{d800} Ket End ------------------------------------------------------------------
/\ud800/B,utf,alt_bsux,bad_escape_is_literal ------------------------------------------------------------------ Bra ud800 Ket End ------------------------------------------------------------------
# ----------------------------------------------------------------------
/Aሴ+B/literal,utf,no_utf_check Aሴ+B 0: A\x{1234}+B # These are here because I upgraded to Unicode 10.0.0 before Perl did, so it # doesn't recognize all these scripts. In time these three tests can be moved # to test 4.
/^(\p{Adlam}+)(\p{Bhaiksuki}+)(\p{Marchen}+)(\p{Newa}+)(\p{Osage}+) (\p{Tangut}+)(\p{Masaram_Gondi}+)(\p{Nushu}+)(\p{Soyombo}+) (\p{Zanabazar_Square}+)/x,utf \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47} 0: \x{1e900}\x{1e924}\x{1e953}\x{11c00}\x{11c2d}\x{11c3e}\x{11c70}\x{11c77}\x{11cab}\x{11400}\x{1142f}\x{11455}\x{104b0}\x{104d8}\x{104fb}\x{16fe0}\x{18800}\x{18af2}\x{11d00}\x{11d3a}\x{11d59}\x{16fe1}\x{1b170}\x{1b2fb}\x{11a50}\x{11a58}\x{11aa2}\x{11a00}\x{11a07}\x{11a47} 1: \x{1e900}\x{1e924}\x{1e953} 2: \x{11c00}\x{11c2d}\x{11c3e} 3: \x{11c70}\x{11c77}\x{11cab} 4: \x{11400}\x{1142f}\x{11455} 5: \x{104b0}\x{104d8}\x{104fb} 6: \x{16fe0}\x{18800}\x{18af2} 7: \x{11d00}\x{11d3a}\x{11d59} 8: \x{16fe1}\x{1b170}\x{1b2fb} 9: \x{11a50}\x{11a58}\x{11aa2} 10: \x{11a00}\x{11a07}\x{11a47}
/^\x{1E900}\x{104B0}/i,utf \x{1E900}\x{104B0} 0: \x{1e900}\x{104b0} \x{1E922}\x{104D8} 0: \x{1e922}\x{104d8} /^(?:(\X)(?C))+$/utf \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}\=callout_capture,callout_no_where Callout 0: last capture = 1 1: \x{1e900} Callout 0: last capture = 1 1: \x{1e924} Callout 0: last capture = 1 1: \x{1e953} Callout 0: last capture = 1 1: \x{11c00} Callout 0: last capture = 1 1: \x{11c2d}\x{11c3e} Callout 0: last capture = 1 1: \x{11c70} Callout 0: last capture = 1 1: \x{11c77}\x{11cab} Callout 0: last capture = 1 1: \x{11400} Callout 0: last capture = 1 1: \x{1142f} Callout 0: last capture = 1 1: \x{11455} Callout 0: last capture = 1 1: \x{104b0} Callout 0: last capture = 1 1: \x{104d8} Callout 0: last capture = 1 1: \x{104fb} Callout 0: last capture = 1 1: \x{16fe0} Callout 0: last capture = 1 1: \x{18800} Callout 0: last capture = 1 1: \x{18af2} Callout 0: last capture = 1 1: \x{11d00}\x{11d3a} Callout 0: last capture = 1 1: \x{11d59} Callout 0: last capture = 1 1: \x{16fe1} Callout 0: last capture = 1 1: \x{1b170} Callout 0: last capture = 1 1: \x{1b2fb} Callout 0: last capture = 1 1: \x{11a50}\x{11a58} Callout 0: last capture = 1 1: \x{11aa2} Callout 0: last capture = 1 1: \x{11a00}\x{11a07}\x{11a47} 0: \x{1e900}\x{1e924}\x{1e953}\x{11c00}\x{11c2d}\x{11c3e}\x{11c70}\x{11c77}\x{11cab}\x{11400}\x{1142f}\x{11455}\x{104b0}\x{104d8}\x{104fb}\x{16fe0}\x{18800}\x{18af2}\x{11d00}\x{11d3a}\x{11d59}\x{16fe1}\x{1b170}\x{1b2fb}\x{11a50}\x{11a58}\x{11aa2}\x{11a00}\x{11a07}\x{11a47} 1: \x{11a00}\x{11a07}\x{11a47}
# Similarly for Unicode 11.0.0
/^(\p{Dogra}+)(\p{Gunjala_Gondi}+)(\p{Hanifi_Rohingya}+)(\p{Makasar}+) (\p{Medefaidrin}+)(\p{Old_Sogdian}+)(\p{Sogdian}+)/x,utf \x{11800}\x{11da9}\x{10d27}\x{11ee0}\x{16e48}\x{10f27}\x{10f30} 0: \x{11800}\x{11da9}\x{10d27}\x{11ee0}\x{16e48}\x{10f27}\x{10f30} 1: \x{11800} 2: \x{11da9} 3: \x{10d27} 4: \x{11ee0} 5: \x{16e48} 6: \x{10f27} 7: \x{10f30}
# Regional indicators
/^(\X)(\X)/utf,aftertext \x{1F1E6}\x{1F1E7}\x{1F1E7}B 0: \x{1f1e6}\x{1f1e7}\x{1f1e7} 0+ B 1: \x{1f1e6}\x{1f1e7} 2: \x{1f1e7} \x{1F1E6}\x{1F1E7}\x{1F1E7}\x{1F1E6}B 0: \x{1f1e6}\x{1f1e7}\x{1f1e7}\x{1f1e6} 0+ B 1: \x{1f1e6}\x{1f1e7} 2: \x{1f1e7}\x{1f1e6} # More differences from Perl
/^\p{Common}/utf \x{60c} 0: \x{60c} \x{61f} 0: \x{61f} \x{964} 0: \x{964} \x{965} 0: \x{965}
/^\p{Inherited}/utf \x{64b} 0: \x{64b} \x{654} 0: \x{654} \x{655} 0: \x{655} \x{1D1AA} 0: \x{1d1aa}
/\N{U+}/ Failed: error 193 at offset 2: \N{U+dddd} is supported only in Unicode (UTF) mode
/\N{U+}/utf Failed: error 178 at offset 5: digits missing in \x{} or \o{} or \N{U+}
/\N{U}/ Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u
# This tests the non-UTF Unicode NEL pattern whitespace character, only # recognized by PCRE2 with /x when there is Unicode support.
/A
�B/x AB 0: AB # This tests Unicode Pattern White Space characters in verb names when they # are being processed with PCRE2_EXTENDED. Note: there are UTF-8 characters # with code points greater than 255 between A, B, and C in the pattern.
/(*: AB
C)abc/x,utf,mark,alt_verbnames abc 0: abc MK: ABC # Script run tests: auto-possessification
/^(*sr:.*)/B,utf ------------------------------------------------------------------ Bra ^ Script run Any* Ket Ket End ------------------------------------------------------------------ paypаl.com A classic example of why script run checks are a good thing 0: payp
/^(*sr:.*(*ACCEPT))/utf paypаl.com But *ACCEPT breaks things 0: payp\x{430}l.com But *ACCEPT breaks things
/^(*sr:\x{2e80}*)/B,utf ------------------------------------------------------------------ Bra ^ Script run \x{2e80}*+ Ket Ket End ------------------------------------------------------------------
/^(*sr:\x{2e80}*)\x{2e80}/B,utf ------------------------------------------------------------------ Bra ^ Script run \x{2e80}* Ket \x{2e80} Ket End ------------------------------------------------------------------
/(?<!)(*sr:)/B ------------------------------------------------------------------ Bra Assert back not Ket Script run Ket Ket End ------------------------------------------------------------------
/(?<=abc(?=X(*sr:BXY)CCC)XBXYCCC)./B ------------------------------------------------------------------ Bra Assert back Reverse abc Assert X Script run BXY Ket CCC Ket XBXYCCC Ket Any Ket End ------------------------------------------------------------------ abcXBXYCCC! 0: !
# Some script run patterns are broken in Perl 5.28.0. These can be moved into # test 4 when a mended version of Perl is released.
/^(*sr:.{4})/utf \x{0980}12\x{0993} Bengali Common-digits Bengali 0: \x{980}12\x{993} \x{0780}12\x{07b1} Thaana Common-digits Thaana 0: \x{780}12\x{7b1} \x{0e01}12\x{0e5b} Thai Common-digits Thai 0: \x{e01}12\x{e5b} \x{1780}12\x{19ff} Khmer Common-digits Khmer 0: \x{1780}12\x{19ff} \x{0904}12\x{0939} Devanagari Common-digits Devanagari 0: \x{904}12\x{939} A\x{ff10}\x{ff19}B Latin Common-notascii-digits Latin 0: A\x{ff10}\x{ff19}B A\x{1d7ce}\x{1d7cf}B Latin fancy-common-digits Latin 0: A\x{1d7ce}\x{1d7cf}B # These ones involve non-ASCII but nevertheless Common digits. As of October # 2018 even blead Perl wasn't handling all of these - but is going to.
/^(*sr:.{4})/utf A\x{ff10}\x{ff19}B Latin Common-notascii-digits Latin 0: A\x{ff10}\x{ff19}B \x{ff10}\x{ff19}.. Common-notascii-digits Common Common 0: \x{ff10}\x{ff19}.. A\x{ff10}BC Latin Common-notascii-digit Latin Latin 0: A\x{ff10}BC A\x{1d7ce}\x{1d7cf}B Latin fancy-common-digits Latin 0: A\x{1d7ce}\x{1d7cf}B \x{1d7ce}\x{1d7cf},, fancy-common-digits Common Common 0: \x{1d7ce}\x{1d7cf},, A\x{1d7ce}BC Latin fancy-common-digit Latin Latin 0: A\x{1d7ce}BC # Some Unicode 12.1.0 new script characters
/\p{Elymaic}\p{Nandinagari}\p{Nyiakeng_Puachue_Hmong}\p{Wancho}/utf \x{10fe5}\x{119AC}\x{1E10E}\x{1E2D1} 0: \x{10fe5}\x{119ac}\x{1e10e}\x{1e2d1}
# Some Unicode 13.0.0 new script characters
/\p{Chorasmian}\p{Dives_Akuru}\p{Khitan_Small_Script}\p{Yezidi}/utf \x{10FB0}\x{11900}\x{18B00}\x{10E80} 0: \x{10fb0}\x{11900}\x{18b00}\x{10e80}
# -------
# Test reference and errors in non-ASCII characters in group names
/(?'𑠅ABC'...)/I,utf Capture group count = 1 Named capture groups: 𑠅ABC 1 Options: utf Subject length lower bound = 3 abcde\=copy=𑠅ABC 0: abc 1: abc C abc (3) 𑠅ABC (group 1)
# Bad ones
/(?'AB၌C'...)\g{AB၌C}/utf Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator?)
/(?'٠ABC'...)/utf Failed: error 144 at offset 3: subpattern name must start with a non-digit
/(?'²ABC'...)/utf Failed: error 162 at offset 3: subpattern name expected
/(?'X²ABC'...)/utf Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
# -------
/\p{Any}*xyz/I Capture group count = 0 Compile options: <none> Overall options: anchored Last code unit = 'z' Subject length lower bound = 3
/(|�)7/caseless,ucp
/(\xc1)\1/i,ucp \xc1\xe1\=no_jit 0: \xc1\xe1 1: \xc1 /\p{L&}+\p{bidi_control}/B ------------------------------------------------------------------ Bra prop Lc + prop Bidicontrol Ket End ------------------------------------------------------------------
/\p{bidi_control}+\p{L&}/B ------------------------------------------------------------------ Bra prop Bidicontrol + prop Lc Ket End ------------------------------------------------------------------
/\p{han}/B ------------------------------------------------------------------ Bra prop Han Ket End ------------------------------------------------------------------
/\p{script:han}/B ------------------------------------------------------------------ Bra prop script:Han Ket End ------------------------------------------------------------------
/\p{sc:han}/B ------------------------------------------------------------------ Bra prop script:Han Ket End ------------------------------------------------------------------
/\p{script extensions:han}/B ------------------------------------------------------------------ Bra prop Han Ket End ------------------------------------------------------------------
/\p{scx:han}/B ------------------------------------------------------------------ Bra prop Han Ket End ------------------------------------------------------------------
# Test error - invalid script name
/\p{sc:L}/ Failed: error 147 at offset 8: unknown property after \P or \p
# Some Boolean property tests that differ from Perl
/\p{emojimodifierbase}\p{ebase}/g,utf >AN<>\x{261d}\x{1faf6}<>yz< 0: \x{261d}\x{1faf6}
/\p{graphemelink}\p{grlink}/g,utf >AN<>\x{11d97}\x{94d}<>yz< 0: \x{11d97}\x{94d} /\p{soft dotted}\p{sd}/g,utf >AF23<>\x{1df1a}\x{69}<>yz< 0: \x{1df1a}i # ------------------------------------------------
/\p{\2b[:x�igi:t:_/ Failed: error 146 at offset 17: malformed \P or \p sequence
# Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without # the restriction.
/AskZ/i,utf,caseless_restrict AskZ 0: AskZ aSKz 0: aSKz \= Expect no match A\x{17f}kZ No match As\x{212a}Z No match
/AskZ/i,utf AskZ 0: AskZ aSKz 0: aSKz A\x{17f}kZ 0: A\x{17f}kZ As\x{212a}Z 0: As\x{212a}Z
/A\x{17f}\x{212a}Z/ir,utf \= Expect no match AskZ No match
/A\x{17f}\x{212a}Z/i,utf AskZ 0: AskZ
/[AskZ]+/i,utf,caseless_restrict AskZ 0: AskZ aSKz 0: aSKz A\x{17f}kZ 0: A As\x{212a}Z 0: As
/[AskZ]+/i,utf AskZ 0: AskZ aSKz 0: aSKz A\x{17f}kZ 0: A\x{17f}kZ As\x{212a}Z 0: As\x{212a}Z
/[\x{17f}\x{212a}]+/ir,utf \= Expect no match AskZ No match
/[\x{17f}\x{212a}]+/i,utf AskZ 0: sk
/[^s]+/ir,utf A\x{17f}Z 0: A\x{17f}Z
/[^s]+/i,utf A\x{17f}Z 0: A
/[^k]+/ir,utf A\x{212a}Z 0: A\x{212a}Z /[^k]+/i,utf A\x{212a}Z 0: A /[^sk]+/ir,utf A\x{17f}\x{212a}Z 0: A\x{17f}\x{212a}Z
/[^sk]+/i,utf A\x{17f}\x{212a}Z 0: A
/[^\x{17f}]+/ir,utf AsSZ 0: AsSZ
/[^\x{17f}]+/i,utf AsSZ 0: A
/[Ss]+/irB,utf ------------------------------------------------------------------ Bra /i S++ Ket End ------------------------------------------------------------------ Sss\x{17f}ss 0: Sss
/[Ss]+/iB,utf ------------------------------------------------------------------ Bra [Ss\x{17f}\x{17f}]++ Ket End ------------------------------------------------------------------ Sss\x{17f}ss 0: Sss\x{17f}ss
/[S\x{17f}]/irB,utf ------------------------------------------------------------------ Bra [Ss\x{17f}] Ket End ------------------------------------------------------------------
/[S\x{17f}]/iB,utf ------------------------------------------------------------------ Bra [Ss\x{17f}\x{17f}] Ket End ------------------------------------------------------------------
/[\x{17f}s]/irB,utf ------------------------------------------------------------------ Bra [Ss\x{17f}] Ket End ------------------------------------------------------------------
/[\x{17f}s]/iB,utf ------------------------------------------------------------------ Bra [Ss\x{17f}\x{17f}] Ket End ------------------------------------------------------------------
/[\x{4b}\x{6b}]/irB,utf ------------------------------------------------------------------ Bra /i K Ket End ------------------------------------------------------------------
/[\x{4b}\x{6b}]/iB,utf ------------------------------------------------------------------ Bra [Kk\x{212a}\x{212a}] Ket End ------------------------------------------------------------------
/s(?r)s(?-r)s(?r:s)s/i,utf \x{17f}S\x{17f}S\x{17f} 0: \x{17f}S\x{17f}S\x{17f} \= Expect no match \x{17f}\x{17f}\x{17f}S\x{17f} No match \x{17f}S\x{17f}\x{17f}\x{17f} No match
/k(?^i)k/ir,utf K\x{212a} 0: K\x{212a} \= Expect no match \x{212a}\x{212a} No match
# End caseless restrict tests
# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without.
# DIGITS
/\d+/i,utf 123\x{660}456 0: 123
/\d+/i,utf,ucp 123\x{660}456 0: 123\x{660}456
/\d+/i,utf,ucp,ascii_bsd 123\x{660}456 0: 123
/[\d]+/i,utf 123\x{660}456 0: 123
/[\d]+/i,utf,ucp 123\x{660}456 0: 123\x{660}456
/[\d]+/i,utf,ucp,ascii_bsd 123\x{660}456 0: 123
/\d(?aD)\d(?-aD)\d/utf,ucp \x{660}9\x{660} 0: \x{660}9\x{660} \= Expect no match \x{660}\x{660}\x{660} No match
/\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd 999 0: 999 9\x{660}9 0: 9\x{660}9
/\d(?a)\d(?-a)\d/utf,ucp \x{660}9\x{660} 0: \x{660}9\x{660} \= Expect no match \x{660}\x{660}\x{660} No match
/\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd 999 0: 999 9\x{660}9 0: 9\x{660}9
# SPACES
/>\s+</i,utf > < 0: > < \= Expect no match >\x{a0} < No match
/>\s+</i,utf,ucp > < 0: > < >\x{a0} < 0: >\x{a0} <
/>\s+</i,utf,ucp,ascii_bss > < 0: > < \= Expect no match >\x{a0} < No match
/>[\s]+</i,utf > < 0: > < \= Expect no match >\x{a0} < No match
/>[\s]+</i,utf,ucp > < 0: > < >\x{a0} < 0: >\x{a0} <
/>[\s]+</i,utf,ucp,ascii_bss > < 0: > < \= Expect no match >\x{a0} < No match
/>\s(?aS)\s(?-aS)\s</utf,ucp >\x{a0} \x{a0}< 0: >\x{a0} \x{a0}< \= Expect no match >\x{a0}\x{a0}\x{a0}< No match
/>\s(?a)\s(?-a)\s</utf,ucp >\x{a0} \x{a0}< 0: >\x{a0} \x{a0}< \= Expect no match >\x{a0}\x{a0}\x{a0}< No match # WORDS
/\w+/i,utf 123\x{660}abc 0: 123
/\w+/i,utf,ucp 123\x{660}abc 0: 123\x{660}abc
/\w+/i,utf,ucp,ascii_bsw 123\x{660}abc 0: 123
/[\w]+/i,utf 123\x{660}abc 0: 123
/[\w]+/i,utf,ucp 123\x{660}abc 0: 123\x{660}abc
/[\w]+/i,utf,ucp,ascii_bsw 123\x{660}abc 0: 123
/\w(?aW)\w(?-aW)\w/utf,ucp \x{660}A\x{c0} 0: \x{660}A\x{c0} \= Expect no match \x{660}\x{c0}\x{c0} No match
/\w(?a)\w(?-a)\w/utf,ucp \x{660}A\x{c0} 0: \x{660}A\x{c0} \= Expect no match \x{660}\x{c0}\x{c0} No match # WORD BOUNDARY
/\bABC\b/utf \x{c0}ABC\x{d0} 0: ABC
/\bABC\b/utf,ucp \= Expect no match \x{c0}ABC\x{d0} No match
/\bABC\b/utf,ucp,ascii_bsw \x{c0}ABC\x{d0} 0: ABC
/\bABC\b/utf,ucp,ascii_all \x{c0}ABC\x{d0} 0: ABC # POSIX
/^[[:digit:]]+$/utf,ucp 123456 0: 123456 123\x{660}456 0: 123\x{660}456
/^[[:digit:]]+$/utf,ucp,ascii_digit 123456 0: 123456 \= Expect no match 123\x{660}456 No match
/[[:digit:]]+/g,utf,ucp,ascii_digit 123\x{660}456 0: 123 0: 456
/(?-aT)[[:digit:]](?aT)[[:digit:]]/utf,ucp,ascii_digit 11 0: 11 \x{ff11}1 0: \x{ff11}1 \= Expect no match 1\x{ff11} No match
/(?-aT:[[:digit:]])[[:digit:]]/utf,ucp,ascii_digit 11 0: 11 \x{ff11}1 0: \x{ff11}1 \= Expect no match 1\x{ff11} No match
/(?-aT:[[:digit:]])[[:digit:]]/utf,never_ucp,ascii_digit 11 0: 11 \= Expect no match \x{ff11}1 No match 1\x{ff11} No match
/[[:digit:]]+/utf,ucp,ascii_posix 123\x{660}456 0: 123
/(?-aP)[[:digit:]](?aP)[[:digit:]]/utf,ucp,ascii_posix 11 0: 11 \x{ff11}1 0: \x{ff11}1 \= Expect no match 1\x{ff11} No match
/(?-aP:[[:digit:]])[[:digit:]]/utf,ucp,ascii_posix 11 0: 11 \x{ff11}1 0: \x{ff11}1 \= Expect no match 1\x{ff11} No match
/(?-a:[[:digit:]])[[:digit:]]/a,utf,ucp 11 0: 11 \x{ff11}1 0: \x{ff11}1 \= Expect no match 1\x{ff11} No match
/^[[:xdigit:]]+$/utf,ucp f0 0: f0 1A 0: 1A d\x{ff10} 0: d\x{ff10} \x{ff26}8 0: \x{ff26}8 \= Expect no match 8g\=no_jit No match
/^[[:xdigit:]]+$/utf,ucp,ascii_digit f0 0: f0 1A 0: 1A \= Expect no match d\x{ff10} No match \x{ff26}8 No match 8g No match
/>[[:space:]]+</utf,ucp >\x{a0} \x{a0}< 0: >\x{a0} \x{a0}< >\x{a0}\x{a0}\x{a0}< 0: >\x{a0}\x{a0}\x{a0}<
/>[[:space:]]+</utf,ucp,ascii_posix \= Expect no match >\x{a0} \x{a0}< No match
/(?aP)[[:alnum:]]+/i,ucp,utf abcáxyz 0: abc abc\x{660}xyz 0: abc
/(?aP)[[:alnum:]\d]+/i,ucp,utf abc\x{660}xyz 0: abc\x{660}xyz /(*UCP)(*UTF)[[:alnum:]](?aP:[[:alnum:]])[[:alnum:]]/ \x{660}A\x{660} 0: \x{660}A\x{660} \= Expect no match \x{660}\x{660}\x{660} No match # VARIOUS
/[\d\s\w]+/a,ucp,utf 9 A\x{660}À 0: 9 A 9 AÀ\x{660} 0: 9 A
# End PCRE2_EXTRA_ASCII_xxx tests
/(?<!(|l ))/utf (?<!(|l )) No match
# End of testinput5
|