|
|
# This set of tests checks the API, internals, and non-Perl stuff for UTF # support, including Unicode properties. However, tests that give different # results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and # 12).
#newline_default lf any anycrlf
# PCRE2 and Perl disagree about the characteristics of certain Unicode # characters. For example, 061C was considered by Perl to be Arabic, though # it was not listed as such in the Unicode Scripts.txt file for Unicode 8. # However, it *is* in that file for Unicode 10, but when I came to re-check, # Perl had changed in the meantime, with 5.026 not recognizing it as Arabic.
# 2066-2069 are graphic and printable according to Perl, though they are # actually "isolate" control characters. That is why the following tests are # here rather than in test 4.
/^[\p{Arabic}]/utf \x{061c}
/^[[:graph:]]+$/utf,ucp \= Expect no match \x{61c} \x{2066} \x{2067} \x{2068} \x{2069}
/^[[:print:]]+$/utf,ucp \= Expect no match \x{61c} \x{2066} \x{2067} \x{2068} \x{2069}
/^[[:^graph:]]+$/utf,ucp \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680} \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069}
/^[[:^print:]]+$/utf,ucp \x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067} \x{2068}\x{2069}
# Perl does not consider U+180e to be a space character. It is true that it # does not appear in the Unicode PropList.txt file as such, but in many other # sources it is listed as a space, and has been treated as such in PCRE for # a long time.
/^>[[:blank:]]*/utf,ucp >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028}
/^A\s+Z/utf,ucp A\x{85}\x{180e}\x{2005}Z
/^A[\s]+Z/utf,ucp A\x{2005}Z A\x{85}\x{2005}Z
/^[[:graph:]]+$/utf,ucp \= Expect no match \x{180e}
/^[[:print:]]+$/utf,ucp \x{180e}
/^[[:^graph:]]+$/utf,ucp \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e}
/^[[:^print:]]+$/utf,ucp \= Expect no match \x{180e}
# End of U+180E tests.
# ---------------------------------------------------------------------
/\x{110000}/IB,utf
/\o{4200000}/IB,utf
/\x{ffffffff}/utf
/\o{37777777777}/utf
/\x{100000000}/utf
/\o{77777777777}/utf
/\x{d800}/utf
/\o{154000}/utf
/\x{dfff}/utf
/\o{157777}/utf
/\x{d7ff}/utf
/\o{153777}/utf
/\x{e000}/utf
/\o{170000}/utf
/^\x{100}a\x{1234}/utf \x{100}a\x{1234}bcd
/\x{0041}\x{2262}\x{0391}\x{002e}/IB,utf \x{0041}\x{2262}\x{0391}\x{002e}
/.{3,5}X/IB,utf \x{212ab}\x{212ab}\x{212ab}\x{861}X
/.{3,5}?/IB,utf \x{212ab}\x{212ab}\x{212ab}\x{861}
/^[ab]/IB,utf bar \= Expect no match c \x{ff} \x{100}
/\x{100}*(\d+|"(?1)")/utf 1234 "1234" \x{100}1234 "\x{100}1234" \x{100}\x{100}12ab \x{100}\x{100}"12" \= Expect no match \x{100}\x{100}abcd
/\x{100}*/IB,utf
/a\x{100}*/IB,utf
/ab\x{100}*/IB,utf
/[\x{200}-\x{100}]/utf
/[Ā-Ą]/utf \x{100} \x{104} \= Expect no match \x{105} \x{ff}
/[\xFF]/IB >\xff<
/[^\xFF]/IB
/[Ä-Ü]/utf Ö # Matches without Study \x{d6}
/[Ä-Ü]/utf Ö <-- Same with Study \x{d6}
/[\x{c4}-\x{dc}]/utf Ö # Matches without Study \x{d6}
/[\x{c4}-\x{dc}]/utf Ö <-- Same with Study \x{d6}
/[^\x{100}]abc(xyz(?1))/IB,utf
/(\x{100}(b(?2)c))?/IB,utf
/(\x{100}(b(?2)c)){0,2}/IB,utf
/(\x{100}(b(?1)c))?/IB,utf
/(\x{100}(b(?1)c)){0,2}/IB,utf
/\W/utf A.B A\x{100}B
/\w/utf \x{100}X
# Use no_start_optimize because the first code unit is different in 8-bit from # the wider modes.
/^\ሴ/IB,utf,no_start_optimize
/()()()()()()()()()() ()()()()()()()()()() ()()()()()()()()()() ()()()()()()()()()() A (x) (?41) B/x,utf AxxB
/^[\x{100}\E-\Q\E\x{150}]/B,utf
/^[\QĀ\E-\QŐ\E]/B,utf
/^abc./gmx,newline=any,utf abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK
/abc.$/gmx,newline=any,utf abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9
/^a\Rb/bsr=unicode,utf a\nb a\rb a\r\nb a\x0bb a\x0cb a\x{85}b a\x{2028}b a\x{2029}b \= Expect no match a\n\rb
/^a\R*b/bsr=unicode,utf ab a\nb a\rb a\r\nb a\x0bb a\x0c\x{2028}\x{2029}b a\x{85}b a\n\rb a\n\r\x{85}\x0cb
/^a\R+b/bsr=unicode,utf a\nb a\rb a\r\nb a\x0bb a\x0c\x{2028}\x{2029}b a\x{85}b a\n\rb a\n\r\x{85}\x0cb \= Expect no match ab
/^a\R{1,3}b/bsr=unicode,utf a\nb a\n\rb a\n\r\x{85}b a\r\n\r\nb a\r\n\r\n\r\nb a\n\r\n\rb a\n\n\r\nb \= Expect no match a\n\n\n\rb a\r
/\H\h\V\v/utf X X\x0a X\x09X\x0b \= Expect no match \x{a0} X\x0a
/\H*\h+\V?\v{3,4}/utf \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a \x09\x20\x{a0}\x0a\x0b\x0c \= Expect no match \x09\x20\x{a0}\x0a\x0b
/\H\h\V\v/utf \x{3001}\x{3000}\x{2030}\x{2028} X\x{180e}X\x{85} \= Expect no match \x{2009} X\x0a
/\H*\h+\V?\v{3,4}/utf \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a \x09\x20\x{202f}\x0a\x0b\x0c \= Expect no match \x09\x{200a}\x{a0}\x{2028}\x0b
/[\h]/B,utf >\x{1680}
/[\h]{3,}/B,utf >\x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000}<
/[\v]/B,utf
/[\H]/B,utf
/[\V]/B,utf
/.*$/newline=any,utf \x{1ec5}
/a\Rb/I,bsr=anycrlf,utf a\rb a\nb a\r\nb \= Expect no match a\x{85}b a\x0bb
/a\Rb/I,bsr=unicode,utf a\rb a\nb a\r\nb a\x{85}b a\x0bb
/a\R?b/I,bsr=anycrlf,utf a\rb a\nb a\r\nb \= Expect no match a\x{85}b a\x0bb
/a\R?b/I,bsr=unicode,utf a\rb a\nb a\r\nb a\x{85}b a\x0bb
/.*a.*=.b.*/utf,newline=any QQQ\x{2029}ABCaXYZ=!bPQR \= Expect no match a\x{2029}b \x61\xe2\x80\xa9\x62
/[[:a\x{100}b:]]/utf
/a[^]b/utf,allow_empty_class,match_unset_backref a\x{1234}b a\nb \= Expect no match ab
/a[^]+b/utf,allow_empty_class,match_unset_backref aXb a\nX\nX\x{1234}b \= Expect no match ab
/(\x{de})\1/ \x{de}\x{de}
/X/newline=any,utf,firstline A\x{1ec5}ABCXYZ
/Xa{2,4}b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps
/Xa{2,4}?b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps
/Xa{2,4}+b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps
/X\x{123}{2,4}b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps
/X\x{123}{2,4}?b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps
/X\x{123}{2,4}+b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps
/X\x{123}{2,4}b/utf \= Expect no match Xx\=ps X\x{123}x\=ps X\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}\x{123}x\=ps
/X\x{123}{2,4}?b/utf \= Expect no match Xx\=ps X\x{123}x\=ps X\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}\x{123}x\=ps
/X\x{123}{2,4}+b/utf \= Expect no match Xx\=ps X\x{123}x\=ps X\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}\x{123}x\=ps
/X\d{2,4}b/utf X\=ps X3\=ps X33\=ps X333\=ps X3333\=ps
/X\d{2,4}?b/utf X\=ps X3\=ps X33\=ps X333\=ps X3333\=ps
/X\d{2,4}+b/utf X\=ps X3\=ps X33\=ps X333\=ps X3333\=ps
/X\D{2,4}b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps
/X\D{2,4}?b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps
/X\D{2,4}+b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps
/X\D{2,4}b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps
/X\D{2,4}?b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps
/X\D{2,4}+b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps
/X[abc]{2,4}b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps
/X[abc]{2,4}?b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps
/X[abc]{2,4}+b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps
/X[abc\x{123}]{2,4}b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps
/X[abc\x{123}]{2,4}?b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps
/X[abc\x{123}]{2,4}+b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps
/X[^a]{2,4}b/utf X\=ps Xz\=ps Xzz\=ps Xzzz\=ps Xzzzz\=ps
/X[^a]{2,4}?b/utf X\=ps Xz\=ps Xzz\=ps Xzzz\=ps Xzzzz\=ps
/X[^a]{2,4}+b/utf X\=ps Xz\=ps Xzz\=ps Xzzz\=ps Xzzzz\=ps
/X[^a]{2,4}b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps
/X[^a]{2,4}?b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps
/X[^a]{2,4}+b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps
/(Y)X\1{2,4}b/utf YX\=ps YXY\=ps YXYY\=ps YXYYY\=ps YXYYYY\=ps
/(Y)X\1{2,4}?b/utf YX\=ps YXY\=ps YXYY\=ps YXYYY\=ps YXYYYY\=ps
/(Y)X\1{2,4}+b/utf YX\=ps YXY\=ps YXYY\=ps YXYYY\=ps YXYYYY\=ps
/(\x{123})X\1{2,4}b/utf \x{123}X\=ps \x{123}X\x{123}\=ps \x{123}X\x{123}\x{123}\=ps \x{123}X\x{123}\x{123}\x{123}\=ps \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
/(\x{123})X\1{2,4}?b/utf \x{123}X\=ps \x{123}X\x{123}\=ps \x{123}X\x{123}\x{123}\=ps \x{123}X\x{123}\x{123}\x{123}\=ps \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
/(\x{123})X\1{2,4}+b/utf \x{123}X\=ps \x{123}X\x{123}\=ps \x{123}X\x{123}\x{123}\=ps \x{123}X\x{123}\x{123}\x{123}\=ps \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
/\bthe cat\b/utf the cat\=ps the cat\=ph
/abcd*/utf xxxxabcd\=ps xxxxabcd\=ph
/abcd*/i,utf xxxxabcd\=ps xxxxabcd\=ph XXXXABCD\=ps XXXXABCD\=ph
/abc\d*/utf xxxxabc1\=ps xxxxabc1\=ph
/(a)bc\1*/utf xxxxabca\=ps xxxxabca\=ph
/abc[de]*/utf xxxxabcde\=ps xxxxabcde\=ph
/X\W{3}X/utf X\=ps
/\sxxx\s/utf,tables=2 AB\x{85}xxx\x{a0}XYZ AB\x{a0}xxx\x{85}XYZ
/\S \S/utf,tables=2 \x{a2} \x{84}
'A#хц'Bx,newline=any,utf
'A#хц PQ'Bx,newline=any,utf
/a+#хaa z#XX?/Bx,newline=any,utf
/a+#хaa z#х?/Bx,newline=any,utf
/\g{A}xxx#bXX(?'A'123)
(?'A'456)/Bx,newline=any,utf
/\g{A}xxx#bх(?'A'123)
(?'A'456)/Bx,newline=any,utf
/^\cģ/utf
/(\R*)(.)/s,utf \r\n \r\r\n\n\r \r\r\n\n\r\n
/(\R)*(.)/s,utf \r\n \r\r\n\n\r \r\r\n\n\r\n
/[^\x{1234}]+/Ii,utf
/[^\x{1234}]+?/Ii,utf
/[^\x{1234}]++/Ii,utf
/[^\x{1234}]{2}/Ii,utf
/f.*/ for\=ph
/f.*/s for\=ph
/f.*/utf for\=ph
/f.*/s,utf for\=ph
/\x{d7ff}\x{e000}/utf
/\x{d800}/utf
/\x{dfff}/utf
/\h+/utf \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000}
/[\h\x{e000}]+/B,utf \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000}
/\H+/utf \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} \x{2000}\x{200a}\x{1fff}\x{200b} \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001}
/[\H\x{d7ff}]+/B,utf \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} \x{2000}\x{200a}\x{1fff}\x{200b} \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001}
/\v+/utf \x{2027}\x{2030}\x{2028}\x{2029} \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
/[\v\x{e000}]+/B,utf \x{2027}\x{2030}\x{2028}\x{2029} \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
/\V+/utf \x{2028}\x{2029}\x{2027}\x{2030} \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86}
/[\V\x{d7ff}]+/B,utf \x{2028}\x{2029}\x{2027}\x{2030} \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86}
/\R+/bsr=unicode,utf \x{2027}\x{2030}\x{2028}\x{2029} \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
/(..)\1/utf ab\=ps aba\=ps abab\=ps
/(..)\1/i,utf ab\=ps abA\=ps aBAb\=ps
/(..)\1{2,}/utf ab\=ps aba\=ps abab\=ps ababa\=ps ababab\=ps ababab\=ph abababa\=ps abababa\=ph
/(..)\1{2,}/i,utf ab\=ps aBa\=ps aBAb\=ps AbaBA\=ps abABAb\=ps aBAbaB\=ph abABabA\=ps abaBABa\=ph
/(..)\1{2,}?x/i,utf ab\=ps abA\=ps aBAb\=ps abaBA\=ps abAbaB\=ps abaBabA\=ps abAbABaBx\=ps
/./utf,newline=crlf \r\=ps \r\=ph
/.{2,3}/utf,newline=crlf \r\=ps \r\=ph \r\r\=ps \r\r\=ph \r\r\r\=ps \r\r\r\=ph
/.{2,3}?/utf,newline=crlf \r\=ps \r\=ph \r\r\=ps \r\r\=ph \r\r\r\=ps \r\r\r\=ph
/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/B,utf
/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/Bi,utf
/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/B,utf
/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/Bi,utf
/(?<=\x{1234}\x{1234})\bxy/I,utf
/(?<!^)ETA/utf \= Expect no match ETA
/\u0100/B,utf,alt_bsux,allow_empty_class,match_unset_backref
/[\u0100-\u0200]/B,utf,alt_bsux,allow_empty_class,match_unset_backref
/\ud800/utf,alt_bsux,allow_empty_class,match_unset_backref
/^\u{0000000000010ffff}/utf,extra_alt_bsux \x{10ffff}
/\u{ 1bb1}/utf,extra_alt_bsux u{ 1bb1} \= Expect no match \x{1bb1}
/\u/utf,alt_bsux \\u
/^a+[a\x{200}]/B,utf aa
/[b-d\x{200}-\x{250}]*[ae-h]?#[\x{200}-\x{250}]{0,8}[\x00-\xff]*#[\x{200}-\x{250}]+[a-z]/B,utf
/[\p{L}]/IB
/[\p{^L}]/IB
/[\P{L}]/IB
/[\P{^L}]/IB
/[abc\p{L}\x{0660}]/IB,utf
/[\p{Nd}]/IB,utf 1234
/[\p{Nd}+-]+/IB,utf 1234 12-34 12+\x{661}-34 \= Expect no match abcd
/(?:[\PPa*]*){8,}/
/[\P{Any}]/B
/[\P{Any}\E]/B
/(\P{Yi}+\277)/
/(\P{Yi}+\277)?/
/(?<=\P{Yi}{3}A)X/
/\p{Yi}+(\P{Yi}+)(?1)/
/(\P{Yi}{2}\277)?/
/[\P{Yi}A]/
/[\P{Yi}\P{Yi}\P{Yi}A]/
/[^\P{Yi}A]/
/[^\P{Yi}\P{Yi}\P{Yi}A]/
/(\P{Yi}*\277)*/
/(\P{Yi}*?\277)*/
/(\p{Yi}*+\277)*/
/(\P{Yi}?\277)*/
/(\P{Yi}??\277)*/
/(\p{Yi}?+\277)*/
/(\P{Yi}{0,3}\277)*/
/(\P{Yi}{0,3}?\277)*/
/(\p{Yi}{0,3}+\277)*/
/\p{Zl}{2,3}+/B,utf
\x{2028}\x{2028}\x{2028}
/\p{Zl}/B,utf
/\p{Lu}{3}+/B,utf
/\pL{2}+/B,utf
/\p{Cc}{2}+/B,utf
/^\p{Cf}/utf \x{180e} \x{061c} \x{2066} \x{2067} \x{2068} \x{2069}
/^\p{Cs}/utf \x{dfff}\=no_utf_check \= Expect no match \x{09f}
/^\p{Mn}/utf \x{1a1b}
/^\p{Pe}/utf \x{2309} \x{230b}
/^\p{Ps}/utf \x{2308} \x{230a}
/^\p{Sc}+/utf $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6} \x{9f2} \= Expect no match X \x{2c2}
/^\p{Zs}/utf \ \ \x{a0} \x{1680} \x{2000} \x{2001} \= Expect no match \x{2028} \x{200d}
# These are here because Perl has problems with the negative versions of the # properties and has changed how it behaves for caseless matching.
/\p{^Lu}/i,utf 1234 \= Expect no match ABC
/\P{Lu}/i,utf 1234 \= Expect no match ABC
/\p{Ll}/i,utf a Az \= Expect no match ABC
/\p{Lu}/i,utf A a\x{10a0}B \= Expect no match a \x{1d00}
/\p{Lu}/i,utf A aZ \= Expect no match abc
/[\x{c0}\x{391}]/i,utf \x{c0} \x{e0}
# The next two are special cases where the lengths of the different cases of # the same character differ. The first went wrong with heap frame storage; the # second was broken in all cases.
/^\x{023a}+?(\x{0130}+)/i,utf \x{023a}\x{2c65}\x{0130}
/^\x{023a}+([^X])/i,utf \x{023a}\x{2c65}X
/\x{c0}+\x{116}+/i,utf \x{c0}\x{e0}\x{116}\x{117}
/[\x{c0}\x{116}]+/i,utf \x{c0}\x{e0}\x{116}\x{117}
/(\x{de})\1/i,utf \x{de}\x{de} \x{de}\x{fe} \x{fe}\x{fe} \x{fe}\x{de}
/^\x{c0}$/i,utf \x{c0} \x{e0}
/^\x{e0}$/i,utf \x{c0} \x{e0}
# The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE # will match it only with UCP support, because without that it has no notion # of case for anything other than the ASCII letters.
/((?i)[\x{c0}])/utf \x{c0} \x{e0}
/(?i:[\x{c0}])/utf \x{c0} \x{e0}
# These are PCRE's extra properties to help with Unicodizing \d etc.
/^\p{Xan}/utf ABCD 1234 \x{6ca} \x{a6c} \x{10a7} \= Expect no match _ABC
/^\p{Xan}+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ \= Expect no match _ABC
/^\p{Xan}+?/utf \x{6ca}\x{a6c}\x{10a7}_
/^\p{Xan}*/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_
/^\p{Xan}{2,9}/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_
/^\p{Xan}{2,9}?/utf \x{6ca}\x{a6c}\x{10a7}_
/^[\p{Xan}]/utf ABCD1234_ 1234abcd_ \x{6ca} \x{a6c} \x{10a7} \= Expect no match _ABC
/^[\p{Xan}]+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ \= Expect no match _ABC
/^>\p{Xsp}/utf >\x{1680}\x{2028}\x{0b} >\x{a0} \= Expect no match \x{0b}
/^>\p{Xsp}+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xsp}+?/utf >\x{1680}\x{2028}\x{0b}
/^>\p{Xsp}*/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xsp}{2,9}/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xsp}{2,9}?/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>[\p{Xsp}]/utf >\x{2028}\x{0b}
/^>[\p{Xsp}]+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xps}/utf >\x{1680}\x{2028}\x{0b} >\x{a0} \= Expect no match \x{0b}
/^>\p{Xps}+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xps}+?/utf >\x{1680}\x{2028}\x{0b}
/^>\p{Xps}*/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xps}{2,9}/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xps}{2,9}?/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>[\p{Xps}]/utf >\x{2028}\x{0b}
/^>[\p{Xps}]+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^\p{Xwd}/utf ABCD 1234 \x{6ca} \x{a6c} \x{10a7} _ABC \= Expect no match []
/^\p{Xwd}+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_
/^\p{Xwd}+?/utf \x{6ca}\x{a6c}\x{10a7}_
/^\p{Xwd}*/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_
/^\p{Xwd}{2,9}/utf A_B12\x{6ca}\x{a6c}\x{10a7}
/^\p{Xwd}{2,9}?/utf \x{6ca}\x{a6c}\x{10a7}_
/^[\p{Xwd}]/utf ABCD1234_ 1234abcd_ \x{6ca} \x{a6c} \x{10a7} _ABC \= Expect no match []
/^[\p{Xwd}]+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_
# A check not in UTF-8 mode
/^[\p{Xwd}]+/ ABCD1234_
# Some negative checks
/^[\P{Xwd}]+/utf !.+\x{019}\x{482}AB
/^[\p{^Xwd}]+/utf !.+\x{019}\x{589}AB
/[\D]/B,utf,ucp 1\x{3c8}2
/[\d]/B,utf,ucp >\x{6f4}<
/[\S]/B,utf,ucp \x{1680}\x{6f4}\x{1680}
/[\s]/B,utf,ucp >\x{1680}<
/[\W]/B,utf,ucp A\x{1735}B
/[\w]/B,utf,ucp >\x{1723}<
/\D/B,utf,ucp 1\x{3c8}2
/\d/B,utf,ucp >\x{6f4}<
/\S/B,utf,ucp \x{1680}\x{6f4}\x{1680}
/\s/B,utf,ucp >\x{1680}>
/\W/B,utf,ucp A\x{1735}B
/\w/B,utf,ucp >\x{1723}<
/[[:alpha:]]/B,ucp
/[[:lower:]]/B,ucp
/[[:upper:]]/B,ucp
/[[:alnum:]]/B,ucp
/[[:ascii:]]/B,ucp
/[[:cntrl:]]/B,ucp
/[[:digit:]]/B,ucp
/[[:digit:]]/B,ucp,ascii_digit
/[[:graph:]]/B,ucp
/[[:print:]]/B,ucp
/[[:punct:]]/B,ucp
/[[:space:]]/B,ucp
/[[:word:]]/B,ucp
/[[:xdigit:]]/B,ucp
/[[:xdigit:]]/B,ucp,ascii_digit
# Unicode properties for \b and \B
/\b...\B/utf,ucp abc_ \x{37e}abc\x{376} \x{37e}\x{376}\x{371}\x{393}\x{394} !\x{c0}++\x{c1}\x{c2} !\x{c0}+++++
# Without PCRE_UCP, non-ASCII always fail, even if < 256
/\b...\B/utf abc_ \= Expect no match \x{37e}abc\x{376} \x{37e}\x{376}\x{371}\x{393}\x{394} !\x{c0}++\x{c1}\x{c2} !\x{c0}+++++
# With PCRE_UCP, non-UTF8 chars that are < 256 still check properties
/\b...\B/ucp abc_ !\x{c0}++\x{c1}\x{c2} !\x{c0}+++++
# Some of these are silly, but they check various combinations
/[[:^alpha:][:^cntrl:]]+/B,utf,ucp 123 abc
/[[:^cntrl:][:^alpha:]]+/B,utf,ucp 123 abc
/[[:alpha:]]+/B,utf,ucp abc
/[[:^alpha:]\S]+/B,utf,ucp 123 abc
/[^\d]+/B,utf,ucp abc123 abc\x{123} \x{660}abc
/\p{Lu}+9\p{Lu}+B\p{Lu}+b/B
/\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/B
/\P{Lu}+9\P{Lu}+B\P{Lu}+b/B
/\p{Han}+X\p{Greek}+\x{370}/B,utf
/\p{Xan}+!\p{Xan}+A/B
/\p{Xsp}+!\p{Xsp}\t/B
/\p{Xps}+!\p{Xps}\t/B
/\p{Xwd}+!\p{Xwd}_/B
/A+\p{N}A+\dB+\p{N}*B+\d*/B,ucp
# These behaved oddly in Perl, so they are kept in this test
/(\x{23a}\x{23a}\x{23a})?\1/i,utf \= Expect no match \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
/(ȺȺȺ)?\1/i,utf \= Expect no match ȺȺȺⱥⱥ
/(\x{23a}\x{23a}\x{23a})?\1/i,utf \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
/(ȺȺȺ)?\1/i,utf ȺȺȺⱥⱥⱥ
/(\x{23a}\x{23a}\x{23a})\1/i,utf \= Expect no match \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
/(ȺȺȺ)\1/i,utf \= Expect no match ȺȺȺⱥⱥ
/(\x{23a}\x{23a}\x{23a})\1/i,utf \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
/(ȺȺȺ)\1/i,utf ȺȺȺⱥⱥⱥ
/(\x{2c65}\x{2c65})\1/i,utf \x{2c65}\x{2c65}\x{23a}\x{23a}
/(ⱥⱥ)\1/i,utf ⱥⱥȺȺ
/(\x{23a}\x{23a}\x{23a})\1Y/i,utf X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ
/(\x{2c65}\x{2c65})\1Y/i,utf X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ
# These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE
/^[\p{Batak}]/utf \x{1bc0} \x{1bff} \= Expect no match \x{1bf4}
/^[\p{Brahmi}]/utf \x{11000} \x{1106f} \= Expect no match \x{1104e}
/^[\p{Mandaic}]/utf \x{840} \x{85e} \= Expect no match \x{85c} \x{85d}
/(\X*)(.)/s,utf A\x{300}
/^S(\X*)e(\X*)$/utf Stéréo
/^\X/utf ́réo
/^a\X41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames aX41z \= Expect no match aAz
/\X/ a\=ps a\=ph
/\Xa/ aa\=ps aa\=ph
/\X{2}/ aa\=ps aa\=ph
/\X+a/ a\=ps aa\=ps aa\=ph
/\X+?a/ a\=ps ab\=ps aa\=ps aa\=ph aba\=ps
# These Unicode 6.1.0 scripts are not known to Perl.
/\p{Chakma}\d/utf,ucp \x{11100}\x{1113c}
/\p{Takri}\d/utf,ucp \x{11680}\x{116c0}
/^\X/utf A\=ps A\=ph A\x{300}\x{301}\=ps A\x{300}\x{301}\=ph A\x{301}\=ps A\x{301}\=ph
/^\X{2,3}/utf A\=ps A\=ph AA\=ps AA\=ph A\x{300}\x{301}\=ps A\x{300}\x{301}\=ph A\x{300}\x{301}A\x{300}\x{301}\=ps A\x{300}\x{301}A\x{300}\x{301}\=ph
/^\X{2}/utf AA\=ps AA\=ph A\x{300}\x{301}A\x{300}\x{301}\=ps A\x{300}\x{301}A\x{300}\x{301}\=ph
/^\X+/utf AA\=ps AA\=ph
/^\X+?Z/utf AA\=ps AA\=ph
/A\x{3a3}B/IBi,utf
/[\x{3a3}]/Bi,utf
/[^\x{3a3}]/Bi,utf
/[\x{3a3}]+/Bi,utf
/[^\x{3a3}]+/Bi,utf
/a*\x{3a3}/Bi,utf
/\x{3a3}+a/Bi,utf
/\x{3a3}*\x{3c2}/Bi,utf
/\x{3a3}{3}/i,utf,aftertext \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
/\x{3a3}{2,4}/i,utf,aftertext \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
/\x{3a3}{2,4}?/i,utf,aftertext \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
/\x{3a3}+./i,utf,aftertext \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
/\x{3a3}++./i,utf,aftertext \= Expect no match \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
/\x{3a3}*\x{3c2}/Bi,utf
/[^\x{3a3}]*\x{3c2}/Bi,utf
/[^a]*\x{3c2}/Bi,utf
/ist/Bi,utf \= Expect no match ikt
/is+t/i,utf iSs\x{17f}t \= Expect no match ikt
/is+?t/i,utf \= Expect no match ikt
/is?t/i,utf \= Expect no match ikt
/is{2}t/i,utf \= Expect no match iskt
# This property is a PCRE special
/^\p{Xuc}/utf $abc @abc `abc \x{1234}abc \= Expect no match abc
/^\p{Xuc}+/utf $@`\x{a0}\x{1234}\x{e000}** \= Expect no match \x{9f}
/^\p{Xuc}+?/utf $@`\x{a0}\x{1234}\x{e000}** \= Expect no match \x{9f}
/^\p{Xuc}+?\*/utf $@`\x{a0}\x{1234}\x{e000}** \= Expect no match \x{9f}
/^\p{Xuc}++/utf $@`\x{a0}\x{1234}\x{e000}** \= Expect no match \x{9f}
/^\p{Xuc}{3,5}/utf $@`\x{a0}\x{1234}\x{e000}** \= Expect no match \x{9f}
/^\p{Xuc}{3,5}?/utf $@`\x{a0}\x{1234}\x{e000}** \= Expect no match \x{9f}
/^[\p{Xuc}]/utf $@`\x{a0}\x{1234}\x{e000}** \= Expect no match \x{9f}
/^[\p{Xuc}]+/utf $@`\x{a0}\x{1234}\x{e000}** \= Expect no match \x{9f}
/^\P{Xuc}/utf abc \= Expect no match $abc @abc `abc \x{1234}abc
/^[\P{Xuc}]/utf abc \= Expect no match $abc @abc `abc \x{1234}abc
# Some auto-possessification tests
/\pN+\z/B
/\PN+\z/B
/\pN+/B
/\PN+/B
/\p{Any}+\p{Any} \p{Any}+\P{Any} \p{Any}+\p{L&} \p{Any}+\p{L} \p{Any}+\p{Lu} \p{Any}+\p{Han} \p{Any}+\p{Xan} \p{Any}+\p{Xsp} \p{Any}+\p{Xps} \p{Xwd}+\p{Any} \p{Any}+\p{Xuc}/Bx,ucp
/\p{L&}+\p{Any} \p{L&}+\p{L&} \P{L&}+\p{L&} \p{L&}+\p{L} \p{L&}+\p{Lu} \p{L&}+\p{Han} \p{L&}+\p{Xan} \p{L&}+\P{Xan} \p{L&}+\p{Xsp} \p{L&}+\p{Xps} \p{Xwd}+\p{L&} \p{L&}+\p{Xuc}/Bx,ucp
/\p{N}+\p{Any} \p{N}+\p{L&} \p{N}+\p{L} \p{N}+\P{L} \p{N}+\P{N} \p{N}+\p{Lu} \p{N}+\p{Han} \p{N}+\p{Xan} \p{N}+\p{Xsp} \p{N}+\p{Xps} \p{Xwd}+\p{N} \p{N}+\p{Xuc}/Bx,ucp
/\p{Lu}+\p{Any} \p{Lu}+\p{L&} \p{Lu}+\p{L} \p{Lu}+\p{Lu} \P{Lu}+\p{Lu} \p{Lu}+\p{Nd} \p{Lu}+\P{Nd} \p{Lu}+\p{Han} \p{Lu}+\p{Xan} \p{Lu}+\p{Xsp} \p{Lu}+\p{Xps} \p{Xwd}+\p{Lu} \p{Lu}+\p{Xuc}/Bx,ucp
/\p{Han}+\p{Lu} \p{Han}+\p{L&} \p{Han}+\p{L} \p{Han}+\p{Lu} \p{Han}+\p{Arabic} \p{Arabic}+\p{Arabic} \p{Han}+\p{Xan} \p{Han}+\p{Xsp} \p{Han}+\p{Xps} \p{Xwd}+\p{Han} \p{Han}+\p{Xuc}/Bx,ucp
/\p{Xan}+\p{Any} \p{Xan}+\p{L&} \P{Xan}+\p{L&} \p{Xan}+\p{L} \p{Xan}+\p{Lu} \p{Xan}+\p{Han} \p{Xan}+\p{Xan} \p{Xan}+\P{Xan} \p{Xan}+\p{Xsp} \p{Xan}+\p{Xps} \p{Xwd}+\p{Xan} \p{Xan}+\p{Xuc}/Bx,ucp
/\p{Xsp}+\p{Any} \p{Xsp}+\p{L&} \p{Xsp}+\p{L} \p{Xsp}+\p{Lu} \p{Xsp}+\p{Han} \p{Xsp}+\p{Xan} \p{Xsp}+\p{Xsp} \P{Xsp}+\p{Xsp} \p{Xsp}+\p{Xps} \p{Xwd}+\p{Xsp} \p{Xsp}+\p{Xuc}/Bx,ucp
/\p{Xwd}+\p{Any} \p{Xwd}+\p{L&} \p{Xwd}+\p{L} \p{Xwd}+\p{Lu} \p{Xwd}+\p{Han} \p{Xwd}+\p{Xan} \p{Xwd}+\p{Xsp} \p{Xwd}+\p{Xps} \p{Xwd}+\p{Xwd} \p{Xwd}+\P{Xwd} \p{Xwd}+\p{Xuc}/Bx,ucp
/\p{Xuc}+\p{Any} \p{Xuc}+\p{L&} \p{Xuc}+\p{L} \p{Xuc}+\p{Lu} \p{Xuc}+\p{Han} \p{Xuc}+\p{Xan} \p{Xuc}+\p{Xsp} \p{Xuc}+\p{Xps} \p{Xwd}+\p{Xuc} \p{Xuc}+\p{Xuc} \p{Xuc}+\P{Xuc}/Bx,ucp
/\p{N}+\p{Ll} \p{N}+\p{Nd} \p{N}+\P{Nd}/Bx,ucp
/\p{Xan}+\p{L} \p{Xan}+\p{N} \p{Xan}+\p{C} \p{Xan}+\P{L} \P{Xan}+\p{N} \p{Xan}+\P{C}/Bx,ucp
/\p{L}+\p{Xan} \p{N}+\p{Xan} \p{C}+\p{Xan} \P{L}+\p{Xan} \p{N}+\p{Xan} \P{C}+\p{Xan} \p{L}+\P{Xan}/Bx,ucp
/\p{Xan}+\p{Lu} \p{Xan}+\p{Nd} \p{Xan}+\p{Cc} \p{Xan}+\P{Ll} \P{Xan}+\p{No} \p{Xan}+\P{Cf}/Bx,ucp
/\p{Lu}+\p{Xan} \p{Nd}+\p{Xan} \p{Cs}+\p{Xan} \P{Lt}+\p{Xan} \p{Nl}+\p{Xan} \P{Cc}+\p{Xan} \p{Lt}+\P{Xan}/Bx,ucp
/\w+\p{P} \w+\p{Po} \w+\s \p{Xan}+\s \s+\p{Xan} \s+\w/Bx,ucp
/\w+\P{P} \W+\p{Po} \w+\S \P{Xan}+\s \s+\P{Xan} \s+\W/Bx,ucp
/\w+\p{Po} \w+\p{Pc} \W+\p{Po} \W+\p{Pc} \w+\P{Po} \w+\P{Pc}/Bx,ucp
/\p{Nl}+\p{Xan} \P{Nl}+\p{Xan} \p{Nl}+\P{Xan} \P{Nl}+\P{Xan}/Bx,ucp
/\p{Xan}+\p{Nl} \P{Xan}+\p{Nl} \p{Xan}+\P{Nl} \P{Xan}+\P{Nl}/Bx,ucp
/\p{Xan}+\p{Nd} \P{Xan}+\p{Nd} \p{Xan}+\P{Nd} \P{Xan}+\P{Nd}/Bx,ucp
# End auto-possessification tests
/\w+/B,utf,ucp,auto_callout abcd
/[\p{N}]?+/B,no_auto_possess
/[\p{L}ab]{2,3}+/B,no_auto_possess
/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx
/.+\X/Bsx
/\X+$/Bmx
/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx
/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp
/[RST]+/Bi,utf,ucp
/[R-T]+/Bi,utf,ucp
/[Q-U]+/Bi,utf,ucp
/^s?c/Iim,utf scat
/\X?abc/utf,no_start_optimize \xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06
/\x{100}\x{200}\K\x{300}/utf,startchar \x{100}\x{200}\x{300}
# Test UTF characters in a substitution
/ábc/utf,replace=XሴZ 123ábc123
/(?<=abc)(|def)/g,utf,replace=<$0> 123abcáyzabcdef789abcሴqr
/[A-`]/iB,utf abcdefghijklmno
/(?<=\K\x{17f})/g,utf,aftertext,allow_lookaround_bsk \x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
/(?<=\K\x{17f})/altglobal,utf,aftertext,allow_lookaround_bsk \x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
"\xa\xf<(.\pZ*\P{Xwd}+^\xa8\3'3yq.::?(?J:()\xd1+!~:3'(8?:)':(?'d'(?'d'^u]!.+.+\\A\Ah(n+?9){7}+\K;(?'X'u'(?'c'(?'z'(?<y>\xb::\xf0'|\xd3(\xae?'w(z\x8?P>l)\x8?P>a)'\H\R\xd1+!!~:3'(?:h$N{26875}\W+?\\=D{2}\x89(?i:Uy0\N({2\xa(\v\x85*){y*\A(()\p{L}+?\P{^Xan}'+?\xff\+pS\?|).{;y*\A(()\p{L}+?\8}\d?1(|)(/1){7}.+[Lp{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(\xbf(R))\x8a\X*?\x8a\xb\xd1^9\3*+(\xc1,\k'R'\xb4)\xcc(z\z(?J)(?'X'\x1b(\xb\xd1^9\?'3*+P{^Xan}+?\xff\+(\xc1.]k+\xb'Pm'\xb4)\xcc4f\xa7'\xd1V(?i:U,{2,2})'(?'X'))?-%--\x95$9*\4'|\xd1(\x9c''%\x94$9)#(?'R')3\x7?('P\xed7'\xa8\xb1^u\xeaw\1\0\0\(|(?1){7}.+[\p{Me}].\s\xdcC*^\x14?(?(<y>))(?<!^)$C((;*?(R*?))+(?(R)\x8a\X*?\x8a\xb\xd1^9\3*+|(\xc1,\k'R'\xb4)\xcc! z)\z(?JJ)(?'X';(\xb\xd1^9\?'3*+(\xc1.]k+\xb'Pm'\xb4))':(?'d')(?'RD'(d')|)|$)'|(?<x>\g{d});\g{x}\x11\g{d}\x81\|$((?'X'\'X'(?'W''\x92()'9'\x83*))\xba*\!?^ <){)':;\xcc4'\xd1'(?'X'28))?-%--\x95$9*\4'|\xd1((''e\x94*$9:)*#(?'R')3)\x7?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+0!~:(?)'d'E:yD!\s(?'R'\x1e;\x10:U))|'\x9g!\xb0*){)\\x16:;()\x1e\x10\x87*:(?<y>)\xd1+!~:(?)'}'\d'E:yD!\s(?'R'\x1e;\x10:U))|'))|)g!\xb0*R+9{29+)#(?'P'})*?pS\{3,}\x85,{0,}l{*UTF)(\xe{7}){3722,{9,}d{2,?|))|{)\(A?&d}}{\xa,}2}){3,}7,l{)22}(,}l:7{2,4}}29\x19+)#?'P'})*v?))\x5"
/$(&.+[\p{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(?(R)){0,6}?|){12\x8a\X*?\x8a\x0b\xd1^9\3*+(\xc1,\k'P'\xb4)\xcc(z\z(?JJ)(?'X'8};(\x0b\xd1^9\?'3*+(\xc1.]k+\x0b'Pm'\xb4\xcc4'\xd1'(?'X'))?-%--\x95$9*\4'|\xd1(''%\x95*$9)#(?'R')3\x07?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/
"(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'"
/[\pS#moq]/ =
/(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark cxxxz
/abcd/utf,replace=x\x{824}y\o{3333}z(\Q12\$34$$\x34\E5$$),substitute_extended abcd
/a(\x{e0}\x{101})(\x{c0}\x{102})/utf,replace=a\u$1\U$1\E$1\l$2\L$2\Eab\U\x{e0}\x{101}\L\x{d0}\x{160}\EDone,substitute_extended a\x{e0}\x{101}\x{c0}\x{102}
/((?<digit>\d)|(?<letter>\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}> ab12cde
/(*UCP)(*UTF)[[:>:]]X/B
/abc/utf,replace=xyz abc\=zero_terminate
/a[[:punct:]b]/ucp,bincode
/a[[:punct:]b]/utf,ucp,bincode
/a[b[:punct:]]/utf,ucp,bincode
/[[:^ascii:]]/utf,ucp,bincode
/[[:^ascii:]\w]/utf,ucp,bincode
/[\w[:^ascii:]]/utf,ucp,bincode
/[^[:ascii:]\W]/utf,ucp,bincode \x{de} \x{200} \= Expect no match \x{589} \x{37e}
/[[:^ascii:]a]/utf,ucp,bincode
/L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout
/L(?#(|++<!(2)?/B,utf,ucp,auto_callout
/(*UTF)C\x09((?<!'(?x)!*H? #\xcc\x9a[^$]/
/[\D]/utf \x{1d7cf}
/[\D\P{Nd}]/utf \x{1d7cf}
/[^\D]/utf a9b \= Expect no match \x{1d7cf}
/[^\D\P{Nd}]/utf a9b \x{1d7cf} \= Expect no match \x{10000}
# Hex uses pattern length, not zero-terminated. This tests for overrunning # the given length of a pattern.
/'(*UTF)'/hex
/'#('/hex,extended,utf
/a(?<=A\XB)/utf
/../utf,auto_callout \n\x{123}\x{123}\x{123}\x{123}
# This tests processing wide characters in extended mode.
/XȀ/x,utf
# These three test a bug fix that was not clearing up after a locale setting # when the test or a subsequent one matched a wide character.
//locale=C
/[\P{Yi}]/utf \x{2f000}
/[\P{Yi}]/utf,locale=C \x{2f000}
/^(?<!(?=))/B,utf
# Horizontal and vertical space lists ignore caseless
/[\HH]/Bi,utf
/[^\HH]/Bi,utf
//g,utf \=zero_terminate
/^(?1)\p{Nd}{3}(a)/ a123a
/\p{Nd}{0,3}[\pL](*:abc)(?C1)xxx/callout_info
# ---------------------------------------------------------------------------
# A bunch of tests that hit lines of code that others do not (at least when # these were created).
/^[^a]{3,}?x/i,utf,no_start_optimize,no_auto_possess \= Expect no match bbb cc
/^[ac]{3,}?x/i,utf,no_start_optimize,no_auto_possess \= Expect no match aaa\x{100}
/^X\X/no_start_optimize,no_auto_possess \= Expect no match X
/^X\p{L&}+?/no_start_optimize,no_auto_possess \= Expect no match X
/^X\p{L}+?/no_start_optimize,no_auto_possess \= Expect no match X
/^X\p{Lu}+?/no_start_optimize,no_auto_possess \= Expect no match X
/^X\p{Arabic}+?/no_start_optimize,no_auto_possess \= Expect no match X
/^X\p{Xan}+?/ucp,no_start_optimize,no_auto_possess \= Expect no match X
/^X\s+?/ucp,no_start_optimize,no_auto_possess \= Expect no match X XX
/^X\S+?/ucp,no_start_optimize,no_auto_possess XX \= Expect no match X
/^X\w+?/ucp,no_start_optimize,no_auto_possess \= Expect no match X
/^X[^\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess \= Expect no match X
/^X[\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess \= Expect no match X
/^X\p{Xuc}+?/utf,no_start_optimize,no_auto_possess \= Expect no match X
/^X.+?Z/s,utf,no_start_optimize,no_auto_possess \= Expect no match X
/^X\R+?/utf,no_start_optimize,no_auto_possess \= Expect no match X
/^X\H+?/utf,no_start_optimize,no_auto_possess \= Expect no match X
/^X\V+?/utf,no_start_optimize,no_auto_possess \= Expect no match X
/^X\s+?/utf,no_start_optimize,no_auto_possess \= Expect no match X XX
/^X\S+?/utf,no_start_optimize,no_auto_possess \= Expect no match X
/^X\p{Any}{1,3}?Z/s,no_start_optimize,no_auto_possess XYYYZ \= Expect no match XY XYY XYYY XYYYYZ
/^X\p{L&}{1,3}?Z/s,no_start_optimize,no_auto_possess \= Expect no match XY XY!
/^X\p{L}{1,3}?Z/s,no_start_optimize,no_auto_possess \= Expect no match XY XY!
/^X\p{Lu}{1,3}?Z/s,no_start_optimize,no_auto_possess \= Expect no match XY XY!
/^X\P{Han}{1,3}?Z/s,utf,no_start_optimize,no_auto_possess \= Expect no match XY XY! XY\x{2f00}!
/^X\p{Xan}{1,3}?Z/s,no_start_optimize,no_auto_possess \= Expect no match XY XY!
/^X\p{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess \= Expect no match X\n X\n! X\n\n!
/^X\P{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess \= Expect no match XYY\n
/^X\p{Xwd}{1,3}?Z/s,no_start_optimize,no_auto_possess \= Expect no match XY XY! XYY!
/^X\x{b5}+?Z/i,utf,no_start_optimize,no_auto_possess \= Expect no match X X\x{b5} X\x{b5}\x{b5}Y
/^X\p{Xuc}+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X X$ X@@Y
/(*CRLF)^X.+?Z/utf,no_start_optimize,no_auto_possess \= Expect partial match XYY\r\=ph \= Expect no match X
/^X.+?Z/s,utf,no_start_optimize,no_auto_possess \= Expect no match X XYY
/^X\R+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X\nX X\n\rX X\n\r\nX X\n\n X\n\x{0c}
/(*BSR_ANYCRLF)^X\R+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X\nX X\n\rX X\n\r\nX X\n\n X\n\x{0c}
/^X\H+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match XY\t XYY
/^X\h+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X\t\t X\tY
/^X\V+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match XY\n XYY
/^X\v+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X\n\n X\nY
/^X\D+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match XY9 XYY
/^X\d+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X99 X9Y
/^X\S+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match XY\n XYY
/^X\s+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X\n\n X\nY
/^X\W+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X.A X++
/^X\p{L&}{1,3}Z/no_start_optimize,no_auto_possess \= Expect no match XY XY!
/^X\p{L}{1,3}Z/no_start_optimize,no_auto_possess \= Expect no match XY
/^X\p{Xan}{1,3}Z/no_start_optimize,no_auto_possess \= Expect no match XY
/^X\P{Xsp}{1,3}Z/no_start_optimize,no_auto_possess \= Expect no match XYY
/^X\p{Xuc}+Z/utf,no_start_optimize,no_auto_possess \= Expect no match X$
# ---------------------------------------------------------------------- # These test the dangerous PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL option.
/\x{d800}/B,utf,bad_escape_is_literal
/\ud800/B,utf,alt_bsux,bad_escape_is_literal
# ----------------------------------------------------------------------
/Aሴ+B/literal,utf,no_utf_check Aሴ+B # These are here because I upgraded to Unicode 10.0.0 before Perl did, so it # doesn't recognize all these scripts. In time these three tests can be moved # to test 4.
/^(\p{Adlam}+)(\p{Bhaiksuki}+)(\p{Marchen}+)(\p{Newa}+)(\p{Osage}+) (\p{Tangut}+)(\p{Masaram_Gondi}+)(\p{Nushu}+)(\p{Soyombo}+) (\p{Zanabazar_Square}+)/x,utf \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}
/^\x{1E900}\x{104B0}/i,utf \x{1E900}\x{104B0} \x{1E922}\x{104D8} /^(?:(\X)(?C))+$/utf \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}\=callout_capture,callout_no_where
# Similarly for Unicode 11.0.0
/^(\p{Dogra}+)(\p{Gunjala_Gondi}+)(\p{Hanifi_Rohingya}+)(\p{Makasar}+) (\p{Medefaidrin}+)(\p{Old_Sogdian}+)(\p{Sogdian}+)/x,utf \x{11800}\x{11da9}\x{10d27}\x{11ee0}\x{16e48}\x{10f27}\x{10f30}
# Regional indicators
/^(\X)(\X)/utf,aftertext \x{1F1E6}\x{1F1E7}\x{1F1E7}B \x{1F1E6}\x{1F1E7}\x{1F1E7}\x{1F1E6}B # More differences from Perl
/^\p{Common}/utf \x{60c} \x{61f} \x{964} \x{965}
/^\p{Inherited}/utf \x{64b} \x{654} \x{655} \x{1D1AA}
/\N{U+}/
/\N{U+}/utf
/\N{U}/
# This tests the non-UTF Unicode NEL pattern whitespace character, only # recognized by PCRE2 with /x when there is Unicode support.
/A
�B/x AB # This tests Unicode Pattern White Space characters in verb names when they # are being processed with PCRE2_EXTENDED. Note: there are UTF-8 characters # with code points greater than 255 between A, B, and C in the pattern.
/(*: AB
C)abc/x,utf,mark,alt_verbnames abc # Script run tests: auto-possessification
/^(*sr:.*)/B,utf paypаl.com A classic example of why script run checks are a good thing
/^(*sr:.*(*ACCEPT))/utf paypаl.com But *ACCEPT breaks things
/^(*sr:\x{2e80}*)/B,utf
/^(*sr:\x{2e80}*)\x{2e80}/B,utf
/(?<!)(*sr:)/B
/(?<=abc(?=X(*sr:BXY)CCC)XBXYCCC)./B abcXBXYCCC!
# Some script run patterns are broken in Perl 5.28.0. These can be moved into # test 4 when a mended version of Perl is released.
/^(*sr:.{4})/utf \x{0980}12\x{0993} Bengali Common-digits Bengali \x{0780}12\x{07b1} Thaana Common-digits Thaana \x{0e01}12\x{0e5b} Thai Common-digits Thai \x{1780}12\x{19ff} Khmer Common-digits Khmer \x{0904}12\x{0939} Devanagari Common-digits Devanagari A\x{ff10}\x{ff19}B Latin Common-notascii-digits Latin A\x{1d7ce}\x{1d7cf}B Latin fancy-common-digits Latin # These ones involve non-ASCII but nevertheless Common digits. As of October # 2018 even blead Perl wasn't handling all of these - but is going to.
/^(*sr:.{4})/utf A\x{ff10}\x{ff19}B Latin Common-notascii-digits Latin \x{ff10}\x{ff19}.. Common-notascii-digits Common Common A\x{ff10}BC Latin Common-notascii-digit Latin Latin A\x{1d7ce}\x{1d7cf}B Latin fancy-common-digits Latin \x{1d7ce}\x{1d7cf},, fancy-common-digits Common Common A\x{1d7ce}BC Latin fancy-common-digit Latin Latin # Some Unicode 12.1.0 new script characters
/\p{Elymaic}\p{Nandinagari}\p{Nyiakeng_Puachue_Hmong}\p{Wancho}/utf \x{10fe5}\x{119AC}\x{1E10E}\x{1E2D1}
# Some Unicode 13.0.0 new script characters
/\p{Chorasmian}\p{Dives_Akuru}\p{Khitan_Small_Script}\p{Yezidi}/utf \x{10FB0}\x{11900}\x{18B00}\x{10E80}
# -------
# Test reference and errors in non-ASCII characters in group names
/(?'𑠅ABC'...)/I,utf abcde\=copy=𑠅ABC
# Bad ones
/(?'AB၌C'...)\g{AB၌C}/utf
/(?'٠ABC'...)/utf
/(?'²ABC'...)/utf
/(?'X²ABC'...)/utf
# -------
/\p{Any}*xyz/I
/(|�)7/caseless,ucp
/(\xc1)\1/i,ucp \xc1\xe1\=no_jit /\p{L&}+\p{bidi_control}/B
/\p{bidi_control}+\p{L&}/B
/\p{han}/B
/\p{script:han}/B
/\p{sc:han}/B
/\p{script extensions:han}/B
/\p{scx:han}/B
# Test error - invalid script name
/\p{sc:L}/
# Some Boolean property tests that differ from Perl
/\p{emojimodifierbase}\p{ebase}/g,utf >AN<>\x{261d}\x{1faf6}<>yz<
/\p{graphemelink}\p{grlink}/g,utf >AN<>\x{11d97}\x{94d}<>yz< /\p{soft dotted}\p{sd}/g,utf >AF23<>\x{1df1a}\x{69}<>yz< # ------------------------------------------------
/\p{\2b[:x�igi:t:_/
# Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without # the restriction.
/AskZ/i,utf,caseless_restrict AskZ aSKz \= Expect no match A\x{17f}kZ As\x{212a}Z
/AskZ/i,utf AskZ aSKz A\x{17f}kZ As\x{212a}Z
/A\x{17f}\x{212a}Z/ir,utf \= Expect no match AskZ
/A\x{17f}\x{212a}Z/i,utf AskZ
/[AskZ]+/i,utf,caseless_restrict AskZ aSKz A\x{17f}kZ As\x{212a}Z
/[AskZ]+/i,utf AskZ aSKz A\x{17f}kZ As\x{212a}Z
/[\x{17f}\x{212a}]+/ir,utf \= Expect no match AskZ
/[\x{17f}\x{212a}]+/i,utf AskZ
/[^s]+/ir,utf A\x{17f}Z
/[^s]+/i,utf A\x{17f}Z
/[^k]+/ir,utf A\x{212a}Z /[^k]+/i,utf A\x{212a}Z /[^sk]+/ir,utf A\x{17f}\x{212a}Z
/[^sk]+/i,utf A\x{17f}\x{212a}Z
/[^\x{17f}]+/ir,utf AsSZ
/[^\x{17f}]+/i,utf AsSZ
/[Ss]+/irB,utf Sss\x{17f}ss
/[Ss]+/iB,utf Sss\x{17f}ss
/[S\x{17f}]/irB,utf
/[S\x{17f}]/iB,utf
/[\x{17f}s]/irB,utf
/[\x{17f}s]/iB,utf
/[\x{4b}\x{6b}]/irB,utf
/[\x{4b}\x{6b}]/iB,utf
/s(?r)s(?-r)s(?r:s)s/i,utf \x{17f}S\x{17f}S\x{17f} \= Expect no match \x{17f}\x{17f}\x{17f}S\x{17f} \x{17f}S\x{17f}\x{17f}\x{17f}
/k(?^i)k/ir,utf K\x{212a} \= Expect no match \x{212a}\x{212a}
# End caseless restrict tests
# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without.
# DIGITS
/\d+/i,utf 123\x{660}456
/\d+/i,utf,ucp 123\x{660}456
/\d+/i,utf,ucp,ascii_bsd 123\x{660}456
/[\d]+/i,utf 123\x{660}456
/[\d]+/i,utf,ucp 123\x{660}456
/[\d]+/i,utf,ucp,ascii_bsd 123\x{660}456
/\d(?aD)\d(?-aD)\d/utf,ucp \x{660}9\x{660} \= Expect no match \x{660}\x{660}\x{660}
/\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd 999 9\x{660}9
/\d(?a)\d(?-a)\d/utf,ucp \x{660}9\x{660} \= Expect no match \x{660}\x{660}\x{660}
/\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd 999 9\x{660}9
# SPACES
/>\s+</i,utf > < \= Expect no match >\x{a0} <
/>\s+</i,utf,ucp > < >\x{a0} <
/>\s+</i,utf,ucp,ascii_bss > < \= Expect no match >\x{a0} <
/>[\s]+</i,utf > < \= Expect no match >\x{a0} <
/>[\s]+</i,utf,ucp > < >\x{a0} <
/>[\s]+</i,utf,ucp,ascii_bss > < \= Expect no match >\x{a0} <
/>\s(?aS)\s(?-aS)\s</utf,ucp >\x{a0} \x{a0}< \= Expect no match >\x{a0}\x{a0}\x{a0}<
/>\s(?a)\s(?-a)\s</utf,ucp >\x{a0} \x{a0}< \= Expect no match >\x{a0}\x{a0}\x{a0}< # WORDS
/\w+/i,utf 123\x{660}abc
/\w+/i,utf,ucp 123\x{660}abc
/\w+/i,utf,ucp,ascii_bsw 123\x{660}abc
/[\w]+/i,utf 123\x{660}abc
/[\w]+/i,utf,ucp 123\x{660}abc
/[\w]+/i,utf,ucp,ascii_bsw 123\x{660}abc
/\w(?aW)\w(?-aW)\w/utf,ucp \x{660}A\x{c0} \= Expect no match \x{660}\x{c0}\x{c0}
/\w(?a)\w(?-a)\w/utf,ucp \x{660}A\x{c0} \= Expect no match \x{660}\x{c0}\x{c0} # WORD BOUNDARY
/\bABC\b/utf \x{c0}ABC\x{d0}
/\bABC\b/utf,ucp \= Expect no match \x{c0}ABC\x{d0}
/\bABC\b/utf,ucp,ascii_bsw \x{c0}ABC\x{d0}
/\bABC\b/utf,ucp,ascii_all \x{c0}ABC\x{d0} # POSIX
/^[[:digit:]]+$/utf,ucp 123456 123\x{660}456
/^[[:digit:]]+$/utf,ucp,ascii_digit 123456 \= Expect no match 123\x{660}456
/[[:digit:]]+/g,utf,ucp,ascii_digit 123\x{660}456
/(?-aT)[[:digit:]](?aT)[[:digit:]]/utf,ucp,ascii_digit 11 \x{ff11}1 \= Expect no match 1\x{ff11}
/(?-aT:[[:digit:]])[[:digit:]]/utf,ucp,ascii_digit 11 \x{ff11}1 \= Expect no match 1\x{ff11}
/(?-aT:[[:digit:]])[[:digit:]]/utf,never_ucp,ascii_digit 11 \= Expect no match \x{ff11}1 1\x{ff11}
/[[:digit:]]+/utf,ucp,ascii_posix 123\x{660}456
/(?-aP)[[:digit:]](?aP)[[:digit:]]/utf,ucp,ascii_posix 11 \x{ff11}1 \= Expect no match 1\x{ff11}
/(?-aP:[[:digit:]])[[:digit:]]/utf,ucp,ascii_posix 11 \x{ff11}1 \= Expect no match 1\x{ff11}
/(?-a:[[:digit:]])[[:digit:]]/a,utf,ucp 11 \x{ff11}1 \= Expect no match 1\x{ff11}
/^[[:xdigit:]]+$/utf,ucp f0 1A d\x{ff10} \x{ff26}8 \= Expect no match 8g\=no_jit
/^[[:xdigit:]]+$/utf,ucp,ascii_digit f0 1A \= Expect no match d\x{ff10} \x{ff26}8 8g
/>[[:space:]]+</utf,ucp >\x{a0} \x{a0}< >\x{a0}\x{a0}\x{a0}<
/>[[:space:]]+</utf,ucp,ascii_posix \= Expect no match >\x{a0} \x{a0}<
/(?aP)[[:alnum:]]+/i,ucp,utf abcáxyz abc\x{660}xyz
/(?aP)[[:alnum:]\d]+/i,ucp,utf abc\x{660}xyz /(*UCP)(*UTF)[[:alnum:]](?aP:[[:alnum:]])[[:alnum:]]/ \x{660}A\x{660} \= Expect no match \x{660}\x{660}\x{660} # VARIOUS
/[\d\s\w]+/a,ucp,utf 9 A\x{660}À 9 AÀ\x{660}
# End PCRE2_EXTRA_ASCII_xxx tests
/(?<!(|l ))/utf (?<!(|l ))
# End of testinput5
|