github的一些开源项目
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2534 lines
45 KiB

  1. # This set of tests checks the API, internals, and non-Perl stuff for UTF
  2. # support, including Unicode properties. However, tests that give different
  3. # results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and
  4. # 12).
  5. #newline_default lf any anycrlf
  6. # PCRE2 and Perl disagree about the characteristics of certain Unicode
  7. # characters. For example, 061C was considered by Perl to be Arabic, though
  8. # it was not listed as such in the Unicode Scripts.txt file for Unicode 8.
  9. # However, it *is* in that file for Unicode 10, but when I came to re-check,
  10. # Perl had changed in the meantime, with 5.026 not recognizing it as Arabic.
  11. # 2066-2069 are graphic and printable according to Perl, though they are
  12. # actually "isolate" control characters. That is why the following tests are
  13. # here rather than in test 4.
  14. /^[\p{Arabic}]/utf
  15. \x{061c}
  16. /^[[:graph:]]+$/utf,ucp
  17. \= Expect no match
  18. \x{61c}
  19. \x{2066}
  20. \x{2067}
  21. \x{2068}
  22. \x{2069}
  23. /^[[:print:]]+$/utf,ucp
  24. \= Expect no match
  25. \x{61c}
  26. \x{2066}
  27. \x{2067}
  28. \x{2068}
  29. \x{2069}
  30. /^[[:^graph:]]+$/utf,ucp
  31. \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}
  32. \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069}
  33. /^[[:^print:]]+$/utf,ucp
  34. \x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067}
  35. \x{2068}\x{2069}
  36. # Perl does not consider U+180e to be a space character. It is true that it
  37. # does not appear in the Unicode PropList.txt file as such, but in many other
  38. # sources it is listed as a space, and has been treated as such in PCRE for
  39. # a long time.
  40. /^>[[:blank:]]*/utf,ucp
  41. >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028}
  42. /^A\s+Z/utf,ucp
  43. A\x{85}\x{180e}\x{2005}Z
  44. /^A[\s]+Z/utf,ucp
  45. A\x{2005}Z
  46. A\x{85}\x{2005}Z
  47. /^[[:graph:]]+$/utf,ucp
  48. \= Expect no match
  49. \x{180e}
  50. /^[[:print:]]+$/utf,ucp
  51. \x{180e}
  52. /^[[:^graph:]]+$/utf,ucp
  53. \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e}
  54. /^[[:^print:]]+$/utf,ucp
  55. \= Expect no match
  56. \x{180e}
  57. # End of U+180E tests.
  58. # ---------------------------------------------------------------------
  59. /\x{110000}/IB,utf
  60. /\o{4200000}/IB,utf
  61. /\x{ffffffff}/utf
  62. /\o{37777777777}/utf
  63. /\x{100000000}/utf
  64. /\o{77777777777}/utf
  65. /\x{d800}/utf
  66. /\o{154000}/utf
  67. /\x{dfff}/utf
  68. /\o{157777}/utf
  69. /\x{d7ff}/utf
  70. /\o{153777}/utf
  71. /\x{e000}/utf
  72. /\o{170000}/utf
  73. /^\x{100}a\x{1234}/utf
  74. \x{100}a\x{1234}bcd
  75. /\x{0041}\x{2262}\x{0391}\x{002e}/IB,utf
  76. \x{0041}\x{2262}\x{0391}\x{002e}
  77. /.{3,5}X/IB,utf
  78. \x{212ab}\x{212ab}\x{212ab}\x{861}X
  79. /.{3,5}?/IB,utf
  80. \x{212ab}\x{212ab}\x{212ab}\x{861}
  81. /^[ab]/IB,utf
  82. bar
  83. \= Expect no match
  84. c
  85. \x{ff}
  86. \x{100}
  87. /\x{100}*(\d+|"(?1)")/utf
  88. 1234
  89. "1234"
  90. \x{100}1234
  91. "\x{100}1234"
  92. \x{100}\x{100}12ab
  93. \x{100}\x{100}"12"
  94. \= Expect no match
  95. \x{100}\x{100}abcd
  96. /\x{100}*/IB,utf
  97. /a\x{100}*/IB,utf
  98. /ab\x{100}*/IB,utf
  99. /[\x{200}-\x{100}]/utf
  100. /[Ā-Ą]/utf
  101. \x{100}
  102. \x{104}
  103. \= Expect no match
  104. \x{105}
  105. \x{ff}
  106. /[\xFF]/IB
  107. >\xff<
  108. /[^\xFF]/IB
  109. /[Ä-Ü]/utf
  110. Ö # Matches without Study
  111. \x{d6}
  112. /[Ä-Ü]/utf
  113. Ö <-- Same with Study
  114. \x{d6}
  115. /[\x{c4}-\x{dc}]/utf
  116. Ö # Matches without Study
  117. \x{d6}
  118. /[\x{c4}-\x{dc}]/utf
  119. Ö <-- Same with Study
  120. \x{d6}
  121. /[^\x{100}]abc(xyz(?1))/IB,utf
  122. /(\x{100}(b(?2)c))?/IB,utf
  123. /(\x{100}(b(?2)c)){0,2}/IB,utf
  124. /(\x{100}(b(?1)c))?/IB,utf
  125. /(\x{100}(b(?1)c)){0,2}/IB,utf
  126. /\W/utf
  127. A.B
  128. A\x{100}B
  129. /\w/utf
  130. \x{100}X
  131. # Use no_start_optimize because the first code unit is different in 8-bit from
  132. # the wider modes.
  133. /^\ሴ/IB,utf,no_start_optimize
  134. /()()()()()()()()()()
  135. ()()()()()()()()()()
  136. ()()()()()()()()()()
  137. ()()()()()()()()()()
  138. A (x) (?41) B/x,utf
  139. AxxB
  140. /^[\x{100}\E-\Q\E\x{150}]/B,utf
  141. /^[\QĀ\E-\QŐ\E]/B,utf
  142. /^abc./gmx,newline=any,utf
  143. abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK
  144. /abc.$/gmx,newline=any,utf
  145. abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9
  146. /^a\Rb/bsr=unicode,utf
  147. a\nb
  148. a\rb
  149. a\r\nb
  150. a\x0bb
  151. a\x0cb
  152. a\x{85}b
  153. a\x{2028}b
  154. a\x{2029}b
  155. \= Expect no match
  156. a\n\rb
  157. /^a\R*b/bsr=unicode,utf
  158. ab
  159. a\nb
  160. a\rb
  161. a\r\nb
  162. a\x0bb
  163. a\x0c\x{2028}\x{2029}b
  164. a\x{85}b
  165. a\n\rb
  166. a\n\r\x{85}\x0cb
  167. /^a\R+b/bsr=unicode,utf
  168. a\nb
  169. a\rb
  170. a\r\nb
  171. a\x0bb
  172. a\x0c\x{2028}\x{2029}b
  173. a\x{85}b
  174. a\n\rb
  175. a\n\r\x{85}\x0cb
  176. \= Expect no match
  177. ab
  178. /^a\R{1,3}b/bsr=unicode,utf
  179. a\nb
  180. a\n\rb
  181. a\n\r\x{85}b
  182. a\r\n\r\nb
  183. a\r\n\r\n\r\nb
  184. a\n\r\n\rb
  185. a\n\n\r\nb
  186. \= Expect no match
  187. a\n\n\n\rb
  188. a\r
  189. /\H\h\V\v/utf
  190. X X\x0a
  191. X\x09X\x0b
  192. \= Expect no match
  193. \x{a0} X\x0a
  194. /\H*\h+\V?\v{3,4}/utf
  195. \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a
  196. \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a
  197. \x09\x20\x{a0}\x0a\x0b\x0c
  198. \= Expect no match
  199. \x09\x20\x{a0}\x0a\x0b
  200. /\H\h\V\v/utf
  201. \x{3001}\x{3000}\x{2030}\x{2028}
  202. X\x{180e}X\x{85}
  203. \= Expect no match
  204. \x{2009} X\x0a
  205. /\H*\h+\V?\v{3,4}/utf
  206. \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a
  207. \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a
  208. \x09\x20\x{202f}\x0a\x0b\x0c
  209. \= Expect no match
  210. \x09\x{200a}\x{a0}\x{2028}\x0b
  211. /[\h]/B,utf
  212. >\x{1680}
  213. /[\h]{3,}/B,utf
  214. >\x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000}<
  215. /[\v]/B,utf
  216. /[\H]/B,utf
  217. /[\V]/B,utf
  218. /.*$/newline=any,utf
  219. \x{1ec5}
  220. /a\Rb/I,bsr=anycrlf,utf
  221. a\rb
  222. a\nb
  223. a\r\nb
  224. \= Expect no match
  225. a\x{85}b
  226. a\x0bb
  227. /a\Rb/I,bsr=unicode,utf
  228. a\rb
  229. a\nb
  230. a\r\nb
  231. a\x{85}b
  232. a\x0bb
  233. /a\R?b/I,bsr=anycrlf,utf
  234. a\rb
  235. a\nb
  236. a\r\nb
  237. \= Expect no match
  238. a\x{85}b
  239. a\x0bb
  240. /a\R?b/I,bsr=unicode,utf
  241. a\rb
  242. a\nb
  243. a\r\nb
  244. a\x{85}b
  245. a\x0bb
  246. /.*a.*=.b.*/utf,newline=any
  247. QQQ\x{2029}ABCaXYZ=!bPQR
  248. \= Expect no match
  249. a\x{2029}b
  250. \x61\xe2\x80\xa9\x62
  251. /[[:a\x{100}b:]]/utf
  252. /a[^]b/utf,allow_empty_class,match_unset_backref
  253. a\x{1234}b
  254. a\nb
  255. \= Expect no match
  256. ab
  257. /a[^]+b/utf,allow_empty_class,match_unset_backref
  258. aXb
  259. a\nX\nX\x{1234}b
  260. \= Expect no match
  261. ab
  262. /(\x{de})\1/
  263. \x{de}\x{de}
  264. /X/newline=any,utf,firstline
  265. A\x{1ec5}ABCXYZ
  266. /Xa{2,4}b/utf
  267. X\=ps
  268. Xa\=ps
  269. Xaa\=ps
  270. Xaaa\=ps
  271. Xaaaa\=ps
  272. /Xa{2,4}?b/utf
  273. X\=ps
  274. Xa\=ps
  275. Xaa\=ps
  276. Xaaa\=ps
  277. Xaaaa\=ps
  278. /Xa{2,4}+b/utf
  279. X\=ps
  280. Xa\=ps
  281. Xaa\=ps
  282. Xaaa\=ps
  283. Xaaaa\=ps
  284. /X\x{123}{2,4}b/utf
  285. X\=ps
  286. X\x{123}\=ps
  287. X\x{123}\x{123}\=ps
  288. X\x{123}\x{123}\x{123}\=ps
  289. X\x{123}\x{123}\x{123}\x{123}\=ps
  290. /X\x{123}{2,4}?b/utf
  291. X\=ps
  292. X\x{123}\=ps
  293. X\x{123}\x{123}\=ps
  294. X\x{123}\x{123}\x{123}\=ps
  295. X\x{123}\x{123}\x{123}\x{123}\=ps
  296. /X\x{123}{2,4}+b/utf
  297. X\=ps
  298. X\x{123}\=ps
  299. X\x{123}\x{123}\=ps
  300. X\x{123}\x{123}\x{123}\=ps
  301. X\x{123}\x{123}\x{123}\x{123}\=ps
  302. /X\x{123}{2,4}b/utf
  303. \= Expect no match
  304. Xx\=ps
  305. X\x{123}x\=ps
  306. X\x{123}\x{123}x\=ps
  307. X\x{123}\x{123}\x{123}x\=ps
  308. X\x{123}\x{123}\x{123}\x{123}x\=ps
  309. /X\x{123}{2,4}?b/utf
  310. \= Expect no match
  311. Xx\=ps
  312. X\x{123}x\=ps
  313. X\x{123}\x{123}x\=ps
  314. X\x{123}\x{123}\x{123}x\=ps
  315. X\x{123}\x{123}\x{123}\x{123}x\=ps
  316. /X\x{123}{2,4}+b/utf
  317. \= Expect no match
  318. Xx\=ps
  319. X\x{123}x\=ps
  320. X\x{123}\x{123}x\=ps
  321. X\x{123}\x{123}\x{123}x\=ps
  322. X\x{123}\x{123}\x{123}\x{123}x\=ps
  323. /X\d{2,4}b/utf
  324. X\=ps
  325. X3\=ps
  326. X33\=ps
  327. X333\=ps
  328. X3333\=ps
  329. /X\d{2,4}?b/utf
  330. X\=ps
  331. X3\=ps
  332. X33\=ps
  333. X333\=ps
  334. X3333\=ps
  335. /X\d{2,4}+b/utf
  336. X\=ps
  337. X3\=ps
  338. X33\=ps
  339. X333\=ps
  340. X3333\=ps
  341. /X\D{2,4}b/utf
  342. X\=ps
  343. Xa\=ps
  344. Xaa\=ps
  345. Xaaa\=ps
  346. Xaaaa\=ps
  347. /X\D{2,4}?b/utf
  348. X\=ps
  349. Xa\=ps
  350. Xaa\=ps
  351. Xaaa\=ps
  352. Xaaaa\=ps
  353. /X\D{2,4}+b/utf
  354. X\=ps
  355. Xa\=ps
  356. Xaa\=ps
  357. Xaaa\=ps
  358. Xaaaa\=ps
  359. /X\D{2,4}b/utf
  360. X\=ps
  361. X\x{123}\=ps
  362. X\x{123}\x{123}\=ps
  363. X\x{123}\x{123}\x{123}\=ps
  364. X\x{123}\x{123}\x{123}\x{123}\=ps
  365. /X\D{2,4}?b/utf
  366. X\=ps
  367. X\x{123}\=ps
  368. X\x{123}\x{123}\=ps
  369. X\x{123}\x{123}\x{123}\=ps
  370. X\x{123}\x{123}\x{123}\x{123}\=ps
  371. /X\D{2,4}+b/utf
  372. X\=ps
  373. X\x{123}\=ps
  374. X\x{123}\x{123}\=ps
  375. X\x{123}\x{123}\x{123}\=ps
  376. X\x{123}\x{123}\x{123}\x{123}\=ps
  377. /X[abc]{2,4}b/utf
  378. X\=ps
  379. Xa\=ps
  380. Xaa\=ps
  381. Xaaa\=ps
  382. Xaaaa\=ps
  383. /X[abc]{2,4}?b/utf
  384. X\=ps
  385. Xa\=ps
  386. Xaa\=ps
  387. Xaaa\=ps
  388. Xaaaa\=ps
  389. /X[abc]{2,4}+b/utf
  390. X\=ps
  391. Xa\=ps
  392. Xaa\=ps
  393. Xaaa\=ps
  394. Xaaaa\=ps
  395. /X[abc\x{123}]{2,4}b/utf
  396. X\=ps
  397. X\x{123}\=ps
  398. X\x{123}\x{123}\=ps
  399. X\x{123}\x{123}\x{123}\=ps
  400. X\x{123}\x{123}\x{123}\x{123}\=ps
  401. /X[abc\x{123}]{2,4}?b/utf
  402. X\=ps
  403. X\x{123}\=ps
  404. X\x{123}\x{123}\=ps
  405. X\x{123}\x{123}\x{123}\=ps
  406. X\x{123}\x{123}\x{123}\x{123}\=ps
  407. /X[abc\x{123}]{2,4}+b/utf
  408. X\=ps
  409. X\x{123}\=ps
  410. X\x{123}\x{123}\=ps
  411. X\x{123}\x{123}\x{123}\=ps
  412. X\x{123}\x{123}\x{123}\x{123}\=ps
  413. /X[^a]{2,4}b/utf
  414. X\=ps
  415. Xz\=ps
  416. Xzz\=ps
  417. Xzzz\=ps
  418. Xzzzz\=ps
  419. /X[^a]{2,4}?b/utf
  420. X\=ps
  421. Xz\=ps
  422. Xzz\=ps
  423. Xzzz\=ps
  424. Xzzzz\=ps
  425. /X[^a]{2,4}+b/utf
  426. X\=ps
  427. Xz\=ps
  428. Xzz\=ps
  429. Xzzz\=ps
  430. Xzzzz\=ps
  431. /X[^a]{2,4}b/utf
  432. X\=ps
  433. X\x{123}\=ps
  434. X\x{123}\x{123}\=ps
  435. X\x{123}\x{123}\x{123}\=ps
  436. X\x{123}\x{123}\x{123}\x{123}\=ps
  437. /X[^a]{2,4}?b/utf
  438. X\=ps
  439. X\x{123}\=ps
  440. X\x{123}\x{123}\=ps
  441. X\x{123}\x{123}\x{123}\=ps
  442. X\x{123}\x{123}\x{123}\x{123}\=ps
  443. /X[^a]{2,4}+b/utf
  444. X\=ps
  445. X\x{123}\=ps
  446. X\x{123}\x{123}\=ps
  447. X\x{123}\x{123}\x{123}\=ps
  448. X\x{123}\x{123}\x{123}\x{123}\=ps
  449. /(Y)X\1{2,4}b/utf
  450. YX\=ps
  451. YXY\=ps
  452. YXYY\=ps
  453. YXYYY\=ps
  454. YXYYYY\=ps
  455. /(Y)X\1{2,4}?b/utf
  456. YX\=ps
  457. YXY\=ps
  458. YXYY\=ps
  459. YXYYY\=ps
  460. YXYYYY\=ps
  461. /(Y)X\1{2,4}+b/utf
  462. YX\=ps
  463. YXY\=ps
  464. YXYY\=ps
  465. YXYYY\=ps
  466. YXYYYY\=ps
  467. /(\x{123})X\1{2,4}b/utf
  468. \x{123}X\=ps
  469. \x{123}X\x{123}\=ps
  470. \x{123}X\x{123}\x{123}\=ps
  471. \x{123}X\x{123}\x{123}\x{123}\=ps
  472. \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
  473. /(\x{123})X\1{2,4}?b/utf
  474. \x{123}X\=ps
  475. \x{123}X\x{123}\=ps
  476. \x{123}X\x{123}\x{123}\=ps
  477. \x{123}X\x{123}\x{123}\x{123}\=ps
  478. \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
  479. /(\x{123})X\1{2,4}+b/utf
  480. \x{123}X\=ps
  481. \x{123}X\x{123}\=ps
  482. \x{123}X\x{123}\x{123}\=ps
  483. \x{123}X\x{123}\x{123}\x{123}\=ps
  484. \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
  485. /\bthe cat\b/utf
  486. the cat\=ps
  487. the cat\=ph
  488. /abcd*/utf
  489. xxxxabcd\=ps
  490. xxxxabcd\=ph
  491. /abcd*/i,utf
  492. xxxxabcd\=ps
  493. xxxxabcd\=ph
  494. XXXXABCD\=ps
  495. XXXXABCD\=ph
  496. /abc\d*/utf
  497. xxxxabc1\=ps
  498. xxxxabc1\=ph
  499. /(a)bc\1*/utf
  500. xxxxabca\=ps
  501. xxxxabca\=ph
  502. /abc[de]*/utf
  503. xxxxabcde\=ps
  504. xxxxabcde\=ph
  505. /X\W{3}X/utf
  506. X\=ps
  507. /\sxxx\s/utf,tables=2
  508. AB\x{85}xxx\x{a0}XYZ
  509. AB\x{a0}xxx\x{85}XYZ
  510. /\S \S/utf,tables=2
  511. \x{a2} \x{84}
  512. 'A#хц'Bx,newline=any,utf
  513. 'A#хц
  514. PQ'Bx,newline=any,utf
  515. /a+#хaa
  516. z#XX?/Bx,newline=any,utf
  517. /a+#хaa
  518. z#х?/Bx,newline=any,utf
  519. /\g{A}xxx#bXX(?'A'123) (?'A'456)/Bx,newline=any,utf
  520. /\g{A}xxx#bх(?'A'123) (?'A'456)/Bx,newline=any,utf
  521. /^\cģ/utf
  522. /(\R*)(.)/s,utf
  523. \r\n
  524. \r\r\n\n\r
  525. \r\r\n\n\r\n
  526. /(\R)*(.)/s,utf
  527. \r\n
  528. \r\r\n\n\r
  529. \r\r\n\n\r\n
  530. /[^\x{1234}]+/Ii,utf
  531. /[^\x{1234}]+?/Ii,utf
  532. /[^\x{1234}]++/Ii,utf
  533. /[^\x{1234}]{2}/Ii,utf
  534. /f.*/
  535. for\=ph
  536. /f.*/s
  537. for\=ph
  538. /f.*/utf
  539. for\=ph
  540. /f.*/s,utf
  541. for\=ph
  542. /\x{d7ff}\x{e000}/utf
  543. /\x{d800}/utf
  544. /\x{dfff}/utf
  545. /\h+/utf
  546. \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
  547. \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000}
  548. /[\h\x{e000}]+/B,utf
  549. \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
  550. \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000}
  551. /\H+/utf
  552. \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
  553. \x{2000}\x{200a}\x{1fff}\x{200b}
  554. \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
  555. \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001}
  556. /[\H\x{d7ff}]+/B,utf
  557. \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
  558. \x{2000}\x{200a}\x{1fff}\x{200b}
  559. \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
  560. \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001}
  561. /\v+/utf
  562. \x{2027}\x{2030}\x{2028}\x{2029}
  563. \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
  564. /[\v\x{e000}]+/B,utf
  565. \x{2027}\x{2030}\x{2028}\x{2029}
  566. \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
  567. /\V+/utf
  568. \x{2028}\x{2029}\x{2027}\x{2030}
  569. \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86}
  570. /[\V\x{d7ff}]+/B,utf
  571. \x{2028}\x{2029}\x{2027}\x{2030}
  572. \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86}
  573. /\R+/bsr=unicode,utf
  574. \x{2027}\x{2030}\x{2028}\x{2029}
  575. \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
  576. /(..)\1/utf
  577. ab\=ps
  578. aba\=ps
  579. abab\=ps
  580. /(..)\1/i,utf
  581. ab\=ps
  582. abA\=ps
  583. aBAb\=ps
  584. /(..)\1{2,}/utf
  585. ab\=ps
  586. aba\=ps
  587. abab\=ps
  588. ababa\=ps
  589. ababab\=ps
  590. ababab\=ph
  591. abababa\=ps
  592. abababa\=ph
  593. /(..)\1{2,}/i,utf
  594. ab\=ps
  595. aBa\=ps
  596. aBAb\=ps
  597. AbaBA\=ps
  598. abABAb\=ps
  599. aBAbaB\=ph
  600. abABabA\=ps
  601. abaBABa\=ph
  602. /(..)\1{2,}?x/i,utf
  603. ab\=ps
  604. abA\=ps
  605. aBAb\=ps
  606. abaBA\=ps
  607. abAbaB\=ps
  608. abaBabA\=ps
  609. abAbABaBx\=ps
  610. /./utf,newline=crlf
  611. \r\=ps
  612. \r\=ph
  613. /.{2,3}/utf,newline=crlf
  614. \r\=ps
  615. \r\=ph
  616. \r\r\=ps
  617. \r\r\=ph
  618. \r\r\r\=ps
  619. \r\r\r\=ph
  620. /.{2,3}?/utf,newline=crlf
  621. \r\=ps
  622. \r\=ph
  623. \r\r\=ps
  624. \r\r\=ph
  625. \r\r\r\=ps
  626. \r\r\r\=ph
  627. /[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/B,utf
  628. /[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/Bi,utf
  629. /[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/B,utf
  630. /[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/Bi,utf
  631. /(?<=\x{1234}\x{1234})\bxy/I,utf
  632. /(?<!^)ETA/utf
  633. \= Expect no match
  634. ETA
  635. /\u0100/B,utf,alt_bsux,allow_empty_class,match_unset_backref
  636. /[\u0100-\u0200]/B,utf,alt_bsux,allow_empty_class,match_unset_backref
  637. /\ud800/utf,alt_bsux,allow_empty_class,match_unset_backref
  638. /^\u{0000000000010ffff}/utf,extra_alt_bsux
  639. \x{10ffff}
  640. /\u{ 1bb1}/utf,extra_alt_bsux
  641. u{ 1bb1}
  642. \= Expect no match
  643. \x{1bb1}
  644. /\u/utf,alt_bsux
  645. \\u
  646. /^a+[a\x{200}]/B,utf
  647. aa
  648. /[b-d\x{200}-\x{250}]*[ae-h]?#[\x{200}-\x{250}]{0,8}[\x00-\xff]*#[\x{200}-\x{250}]+[a-z]/B,utf
  649. /[\p{L}]/IB
  650. /[\p{^L}]/IB
  651. /[\P{L}]/IB
  652. /[\P{^L}]/IB
  653. /[abc\p{L}\x{0660}]/IB,utf
  654. /[\p{Nd}]/IB,utf
  655. 1234
  656. /[\p{Nd}+-]+/IB,utf
  657. 1234
  658. 12-34
  659. 12+\x{661}-34
  660. \= Expect no match
  661. abcd
  662. /(?:[\PPa*]*){8,}/
  663. /[\P{Any}]/B
  664. /[\P{Any}\E]/B
  665. /(\P{Yi}+\277)/
  666. /(\P{Yi}+\277)?/
  667. /(?<=\P{Yi}{3}A)X/
  668. /\p{Yi}+(\P{Yi}+)(?1)/
  669. /(\P{Yi}{2}\277)?/
  670. /[\P{Yi}A]/
  671. /[\P{Yi}\P{Yi}\P{Yi}A]/
  672. /[^\P{Yi}A]/
  673. /[^\P{Yi}\P{Yi}\P{Yi}A]/
  674. /(\P{Yi}*\277)*/
  675. /(\P{Yi}*?\277)*/
  676. /(\p{Yi}*+\277)*/
  677. /(\P{Yi}?\277)*/
  678. /(\P{Yi}??\277)*/
  679. /(\p{Yi}?+\277)*/
  680. /(\P{Yi}{0,3}\277)*/
  681. /(\P{Yi}{0,3}?\277)*/
  682. /(\p{Yi}{0,3}+\277)*/
  683. /\p{Zl}{2,3}+/B,utf
  684. 


  685. \x{2028}\x{2028}\x{2028}
  686. /\p{Zl}/B,utf
  687. /\p{Lu}{3}+/B,utf
  688. /\pL{2}+/B,utf
  689. /\p{Cc}{2}+/B,utf
  690. /^\p{Cf}/utf
  691. \x{180e}
  692. \x{061c}
  693. \x{2066}
  694. \x{2067}
  695. \x{2068}
  696. \x{2069}
  697. /^\p{Cs}/utf
  698. \x{dfff}\=no_utf_check
  699. \= Expect no match
  700. \x{09f}
  701. /^\p{Mn}/utf
  702. \x{1a1b}
  703. /^\p{Pe}/utf
  704. \x{2309}
  705. \x{230b}
  706. /^\p{Ps}/utf
  707. \x{2308}
  708. \x{230a}
  709. /^\p{Sc}+/utf
  710. $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}
  711. \x{9f2}
  712. \= Expect no match
  713. X
  714. \x{2c2}
  715. /^\p{Zs}/utf
  716. \ \
  717. \x{a0}
  718. \x{1680}
  719. \x{2000}
  720. \x{2001}
  721. \= Expect no match
  722. \x{2028}
  723. \x{200d}
  724. # These are here because Perl has problems with the negative versions of the
  725. # properties and has changed how it behaves for caseless matching.
  726. /\p{^Lu}/i,utf
  727. 1234
  728. \= Expect no match
  729. ABC
  730. /\P{Lu}/i,utf
  731. 1234
  732. \= Expect no match
  733. ABC
  734. /\p{Ll}/i,utf
  735. a
  736. Az
  737. \= Expect no match
  738. ABC
  739. /\p{Lu}/i,utf
  740. A
  741. a\x{10a0}B
  742. \= Expect no match
  743. a
  744. \x{1d00}
  745. /\p{Lu}/i,utf
  746. A
  747. aZ
  748. \= Expect no match
  749. abc
  750. /[\x{c0}\x{391}]/i,utf
  751. \x{c0}
  752. \x{e0}
  753. # The next two are special cases where the lengths of the different cases of
  754. # the same character differ. The first went wrong with heap frame storage; the
  755. # second was broken in all cases.
  756. /^\x{023a}+?(\x{0130}+)/i,utf
  757. \x{023a}\x{2c65}\x{0130}
  758. /^\x{023a}+([^X])/i,utf
  759. \x{023a}\x{2c65}X
  760. /\x{c0}+\x{116}+/i,utf
  761. \x{c0}\x{e0}\x{116}\x{117}
  762. /[\x{c0}\x{116}]+/i,utf
  763. \x{c0}\x{e0}\x{116}\x{117}
  764. /(\x{de})\1/i,utf
  765. \x{de}\x{de}
  766. \x{de}\x{fe}
  767. \x{fe}\x{fe}
  768. \x{fe}\x{de}
  769. /^\x{c0}$/i,utf
  770. \x{c0}
  771. \x{e0}
  772. /^\x{e0}$/i,utf
  773. \x{c0}
  774. \x{e0}
  775. # The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE
  776. # will match it only with UCP support, because without that it has no notion
  777. # of case for anything other than the ASCII letters.
  778. /((?i)[\x{c0}])/utf
  779. \x{c0}
  780. \x{e0}
  781. /(?i:[\x{c0}])/utf
  782. \x{c0}
  783. \x{e0}
  784. # These are PCRE's extra properties to help with Unicodizing \d etc.
  785. /^\p{Xan}/utf
  786. ABCD
  787. 1234
  788. \x{6ca}
  789. \x{a6c}
  790. \x{10a7}
  791. \= Expect no match
  792. _ABC
  793. /^\p{Xan}+/utf
  794. ABCD1234\x{6ca}\x{a6c}\x{10a7}_
  795. \= Expect no match
  796. _ABC
  797. /^\p{Xan}+?/utf
  798. \x{6ca}\x{a6c}\x{10a7}_
  799. /^\p{Xan}*/utf
  800. ABCD1234\x{6ca}\x{a6c}\x{10a7}_
  801. /^\p{Xan}{2,9}/utf
  802. ABCD1234\x{6ca}\x{a6c}\x{10a7}_
  803. /^\p{Xan}{2,9}?/utf
  804. \x{6ca}\x{a6c}\x{10a7}_
  805. /^[\p{Xan}]/utf
  806. ABCD1234_
  807. 1234abcd_
  808. \x{6ca}
  809. \x{a6c}
  810. \x{10a7}
  811. \= Expect no match
  812. _ABC
  813. /^[\p{Xan}]+/utf
  814. ABCD1234\x{6ca}\x{a6c}\x{10a7}_
  815. \= Expect no match
  816. _ABC
  817. /^>\p{Xsp}/utf
  818. >\x{1680}\x{2028}\x{0b}
  819. >\x{a0}
  820. \= Expect no match
  821. \x{0b}
  822. /^>\p{Xsp}+/utf
  823. > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  824. /^>\p{Xsp}+?/utf
  825. >\x{1680}\x{2028}\x{0b}
  826. /^>\p{Xsp}*/utf
  827. > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  828. /^>\p{Xsp}{2,9}/utf
  829. > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  830. /^>\p{Xsp}{2,9}?/utf
  831. > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  832. /^>[\p{Xsp}]/utf
  833. >\x{2028}\x{0b}
  834. /^>[\p{Xsp}]+/utf
  835. > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  836. /^>\p{Xps}/utf
  837. >\x{1680}\x{2028}\x{0b}
  838. >\x{a0}
  839. \= Expect no match
  840. \x{0b}
  841. /^>\p{Xps}+/utf
  842. > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  843. /^>\p{Xps}+?/utf
  844. >\x{1680}\x{2028}\x{0b}
  845. /^>\p{Xps}*/utf
  846. > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  847. /^>\p{Xps}{2,9}/utf
  848. > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  849. /^>\p{Xps}{2,9}?/utf
  850. > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  851. /^>[\p{Xps}]/utf
  852. >\x{2028}\x{0b}
  853. /^>[\p{Xps}]+/utf
  854. > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  855. /^\p{Xwd}/utf
  856. ABCD
  857. 1234
  858. \x{6ca}
  859. \x{a6c}
  860. \x{10a7}
  861. _ABC
  862. \= Expect no match
  863. []
  864. /^\p{Xwd}+/utf
  865. ABCD1234\x{6ca}\x{a6c}\x{10a7}_
  866. /^\p{Xwd}+?/utf
  867. \x{6ca}\x{a6c}\x{10a7}_
  868. /^\p{Xwd}*/utf
  869. ABCD1234\x{6ca}\x{a6c}\x{10a7}_
  870. /^\p{Xwd}{2,9}/utf
  871. A_B12\x{6ca}\x{a6c}\x{10a7}
  872. /^\p{Xwd}{2,9}?/utf
  873. \x{6ca}\x{a6c}\x{10a7}_
  874. /^[\p{Xwd}]/utf
  875. ABCD1234_
  876. 1234abcd_
  877. \x{6ca}
  878. \x{a6c}
  879. \x{10a7}
  880. _ABC
  881. \= Expect no match
  882. []
  883. /^[\p{Xwd}]+/utf
  884. ABCD1234\x{6ca}\x{a6c}\x{10a7}_
  885. # A check not in UTF-8 mode
  886. /^[\p{Xwd}]+/
  887. ABCD1234_
  888. # Some negative checks
  889. /^[\P{Xwd}]+/utf
  890. !.+\x{019}\x{482}AB
  891. /^[\p{^Xwd}]+/utf
  892. !.+\x{019}\x{589}AB
  893. /[\D]/B,utf,ucp
  894. 1\x{3c8}2
  895. /[\d]/B,utf,ucp
  896. >\x{6f4}<
  897. /[\S]/B,utf,ucp
  898. \x{1680}\x{6f4}\x{1680}
  899. /[\s]/B,utf,ucp
  900. >\x{1680}<
  901. /[\W]/B,utf,ucp
  902. A\x{1735}B
  903. /[\w]/B,utf,ucp
  904. >\x{1723}<
  905. /\D/B,utf,ucp
  906. 1\x{3c8}2
  907. /\d/B,utf,ucp
  908. >\x{6f4}<
  909. /\S/B,utf,ucp
  910. \x{1680}\x{6f4}\x{1680}
  911. /\s/B,utf,ucp
  912. >\x{1680}>
  913. /\W/B,utf,ucp
  914. A\x{1735}B
  915. /\w/B,utf,ucp
  916. >\x{1723}<
  917. /[[:alpha:]]/B,ucp
  918. /[[:lower:]]/B,ucp
  919. /[[:upper:]]/B,ucp
  920. /[[:alnum:]]/B,ucp
  921. /[[:ascii:]]/B,ucp
  922. /[[:cntrl:]]/B,ucp
  923. /[[:digit:]]/B,ucp
  924. /[[:digit:]]/B,ucp,ascii_digit
  925. /[[:graph:]]/B,ucp
  926. /[[:print:]]/B,ucp
  927. /[[:punct:]]/B,ucp
  928. /[[:space:]]/B,ucp
  929. /[[:word:]]/B,ucp
  930. /[[:xdigit:]]/B,ucp
  931. /[[:xdigit:]]/B,ucp,ascii_digit
  932. # Unicode properties for \b and \B
  933. /\b...\B/utf,ucp
  934. abc_
  935. \x{37e}abc\x{376}
  936. \x{37e}\x{376}\x{371}\x{393}\x{394}
  937. !\x{c0}++\x{c1}\x{c2}
  938. !\x{c0}+++++
  939. # Without PCRE_UCP, non-ASCII always fail, even if < 256
  940. /\b...\B/utf
  941. abc_
  942. \= Expect no match
  943. \x{37e}abc\x{376}
  944. \x{37e}\x{376}\x{371}\x{393}\x{394}
  945. !\x{c0}++\x{c1}\x{c2}
  946. !\x{c0}+++++
  947. # With PCRE_UCP, non-UTF8 chars that are < 256 still check properties
  948. /\b...\B/ucp
  949. abc_
  950. !\x{c0}++\x{c1}\x{c2}
  951. !\x{c0}+++++
  952. # Some of these are silly, but they check various combinations
  953. /[[:^alpha:][:^cntrl:]]+/B,utf,ucp
  954. 123
  955. abc
  956. /[[:^cntrl:][:^alpha:]]+/B,utf,ucp
  957. 123
  958. abc
  959. /[[:alpha:]]+/B,utf,ucp
  960. abc
  961. /[[:^alpha:]\S]+/B,utf,ucp
  962. 123
  963. abc
  964. /[^\d]+/B,utf,ucp
  965. abc123
  966. abc\x{123}
  967. \x{660}abc
  968. /\p{Lu}+9\p{Lu}+B\p{Lu}+b/B
  969. /\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/B
  970. /\P{Lu}+9\P{Lu}+B\P{Lu}+b/B
  971. /\p{Han}+X\p{Greek}+\x{370}/B,utf
  972. /\p{Xan}+!\p{Xan}+A/B
  973. /\p{Xsp}+!\p{Xsp}\t/B
  974. /\p{Xps}+!\p{Xps}\t/B
  975. /\p{Xwd}+!\p{Xwd}_/B
  976. /A+\p{N}A+\dB+\p{N}*B+\d*/B,ucp
  977. # These behaved oddly in Perl, so they are kept in this test
  978. /(\x{23a}\x{23a}\x{23a})?\1/i,utf
  979. \= Expect no match
  980. \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
  981. /(ȺȺȺ)?\1/i,utf
  982. \= Expect no match
  983. ȺȺȺⱥⱥ
  984. /(\x{23a}\x{23a}\x{23a})?\1/i,utf
  985. \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
  986. /(ȺȺȺ)?\1/i,utf
  987. ȺȺȺⱥⱥⱥ
  988. /(\x{23a}\x{23a}\x{23a})\1/i,utf
  989. \= Expect no match
  990. \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
  991. /(ȺȺȺ)\1/i,utf
  992. \= Expect no match
  993. ȺȺȺⱥⱥ
  994. /(\x{23a}\x{23a}\x{23a})\1/i,utf
  995. \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
  996. /(ȺȺȺ)\1/i,utf
  997. ȺȺȺⱥⱥⱥ
  998. /(\x{2c65}\x{2c65})\1/i,utf
  999. \x{2c65}\x{2c65}\x{23a}\x{23a}
  1000. /(ⱥⱥ)\1/i,utf
  1001. ⱥⱥȺȺ
  1002. /(\x{23a}\x{23a}\x{23a})\1Y/i,utf
  1003. X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ
  1004. /(\x{2c65}\x{2c65})\1Y/i,utf
  1005. X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ
  1006. # These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE
  1007. /^[\p{Batak}]/utf
  1008. \x{1bc0}
  1009. \x{1bff}
  1010. \= Expect no match
  1011. \x{1bf4}
  1012. /^[\p{Brahmi}]/utf
  1013. \x{11000}
  1014. \x{1106f}
  1015. \= Expect no match
  1016. \x{1104e}
  1017. /^[\p{Mandaic}]/utf
  1018. \x{840}
  1019. \x{85e}
  1020. \= Expect no match
  1021. \x{85c}
  1022. \x{85d}
  1023. /(\X*)(.)/s,utf
  1024. A\x{300}
  1025. /^S(\X*)e(\X*)$/utf
  1026. Stéréo
  1027. /^\X/utf
  1028. ́réo
  1029. /^a\X41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames
  1030. aX41z
  1031. \= Expect no match
  1032. aAz
  1033. /\X/
  1034. a\=ps
  1035. a\=ph
  1036. /\Xa/
  1037. aa\=ps
  1038. aa\=ph
  1039. /\X{2}/
  1040. aa\=ps
  1041. aa\=ph
  1042. /\X+a/
  1043. a\=ps
  1044. aa\=ps
  1045. aa\=ph
  1046. /\X+?a/
  1047. a\=ps
  1048. ab\=ps
  1049. aa\=ps
  1050. aa\=ph
  1051. aba\=ps
  1052. # These Unicode 6.1.0 scripts are not known to Perl.
  1053. /\p{Chakma}\d/utf,ucp
  1054. \x{11100}\x{1113c}
  1055. /\p{Takri}\d/utf,ucp
  1056. \x{11680}\x{116c0}
  1057. /^\X/utf
  1058. A\=ps
  1059. A\=ph
  1060. A\x{300}\x{301}\=ps
  1061. A\x{300}\x{301}\=ph
  1062. A\x{301}\=ps
  1063. A\x{301}\=ph
  1064. /^\X{2,3}/utf
  1065. A\=ps
  1066. A\=ph
  1067. AA\=ps
  1068. AA\=ph
  1069. A\x{300}\x{301}\=ps
  1070. A\x{300}\x{301}\=ph
  1071. A\x{300}\x{301}A\x{300}\x{301}\=ps
  1072. A\x{300}\x{301}A\x{300}\x{301}\=ph
  1073. /^\X{2}/utf
  1074. AA\=ps
  1075. AA\=ph
  1076. A\x{300}\x{301}A\x{300}\x{301}\=ps
  1077. A\x{300}\x{301}A\x{300}\x{301}\=ph
  1078. /^\X+/utf
  1079. AA\=ps
  1080. AA\=ph
  1081. /^\X+?Z/utf
  1082. AA\=ps
  1083. AA\=ph
  1084. /A\x{3a3}B/IBi,utf
  1085. /[\x{3a3}]/Bi,utf
  1086. /[^\x{3a3}]/Bi,utf
  1087. /[\x{3a3}]+/Bi,utf
  1088. /[^\x{3a3}]+/Bi,utf
  1089. /a*\x{3a3}/Bi,utf
  1090. /\x{3a3}+a/Bi,utf
  1091. /\x{3a3}*\x{3c2}/Bi,utf
  1092. /\x{3a3}{3}/i,utf,aftertext
  1093. \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
  1094. /\x{3a3}{2,4}/i,utf,aftertext
  1095. \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
  1096. /\x{3a3}{2,4}?/i,utf,aftertext
  1097. \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
  1098. /\x{3a3}+./i,utf,aftertext
  1099. \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
  1100. /\x{3a3}++./i,utf,aftertext
  1101. \= Expect no match
  1102. \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
  1103. /\x{3a3}*\x{3c2}/Bi,utf
  1104. /[^\x{3a3}]*\x{3c2}/Bi,utf
  1105. /[^a]*\x{3c2}/Bi,utf
  1106. /ist/Bi,utf
  1107. \= Expect no match
  1108. ikt
  1109. /is+t/i,utf
  1110. iSs\x{17f}t
  1111. \= Expect no match
  1112. ikt
  1113. /is+?t/i,utf
  1114. \= Expect no match
  1115. ikt
  1116. /is?t/i,utf
  1117. \= Expect no match
  1118. ikt
  1119. /is{2}t/i,utf
  1120. \= Expect no match
  1121. iskt
  1122. # This property is a PCRE special
  1123. /^\p{Xuc}/utf
  1124. $abc
  1125. @abc
  1126. `abc
  1127. \x{1234}abc
  1128. \= Expect no match
  1129. abc
  1130. /^\p{Xuc}+/utf
  1131. $@`\x{a0}\x{1234}\x{e000}**
  1132. \= Expect no match
  1133. \x{9f}
  1134. /^\p{Xuc}+?/utf
  1135. $@`\x{a0}\x{1234}\x{e000}**
  1136. \= Expect no match
  1137. \x{9f}
  1138. /^\p{Xuc}+?\*/utf
  1139. $@`\x{a0}\x{1234}\x{e000}**
  1140. \= Expect no match
  1141. \x{9f}
  1142. /^\p{Xuc}++/utf
  1143. $@`\x{a0}\x{1234}\x{e000}**
  1144. \= Expect no match
  1145. \x{9f}
  1146. /^\p{Xuc}{3,5}/utf
  1147. $@`\x{a0}\x{1234}\x{e000}**
  1148. \= Expect no match
  1149. \x{9f}
  1150. /^\p{Xuc}{3,5}?/utf
  1151. $@`\x{a0}\x{1234}\x{e000}**
  1152. \= Expect no match
  1153. \x{9f}
  1154. /^[\p{Xuc}]/utf
  1155. $@`\x{a0}\x{1234}\x{e000}**
  1156. \= Expect no match
  1157. \x{9f}
  1158. /^[\p{Xuc}]+/utf
  1159. $@`\x{a0}\x{1234}\x{e000}**
  1160. \= Expect no match
  1161. \x{9f}
  1162. /^\P{Xuc}/utf
  1163. abc
  1164. \= Expect no match
  1165. $abc
  1166. @abc
  1167. `abc
  1168. \x{1234}abc
  1169. /^[\P{Xuc}]/utf
  1170. abc
  1171. \= Expect no match
  1172. $abc
  1173. @abc
  1174. `abc
  1175. \x{1234}abc
  1176. # Some auto-possessification tests
  1177. /\pN+\z/B
  1178. /\PN+\z/B
  1179. /\pN+/B
  1180. /\PN+/B
  1181. /\p{Any}+\p{Any} \p{Any}+\P{Any} \p{Any}+\p{L&} \p{Any}+\p{L} \p{Any}+\p{Lu} \p{Any}+\p{Han} \p{Any}+\p{Xan} \p{Any}+\p{Xsp} \p{Any}+\p{Xps} \p{Xwd}+\p{Any} \p{Any}+\p{Xuc}/Bx,ucp
  1182. /\p{L&}+\p{Any} \p{L&}+\p{L&} \P{L&}+\p{L&} \p{L&}+\p{L} \p{L&}+\p{Lu} \p{L&}+\p{Han} \p{L&}+\p{Xan} \p{L&}+\P{Xan} \p{L&}+\p{Xsp} \p{L&}+\p{Xps} \p{Xwd}+\p{L&} \p{L&}+\p{Xuc}/Bx,ucp
  1183. /\p{N}+\p{Any} \p{N}+\p{L&} \p{N}+\p{L} \p{N}+\P{L} \p{N}+\P{N} \p{N}+\p{Lu} \p{N}+\p{Han} \p{N}+\p{Xan} \p{N}+\p{Xsp} \p{N}+\p{Xps} \p{Xwd}+\p{N} \p{N}+\p{Xuc}/Bx,ucp
  1184. /\p{Lu}+\p{Any} \p{Lu}+\p{L&} \p{Lu}+\p{L} \p{Lu}+\p{Lu} \P{Lu}+\p{Lu} \p{Lu}+\p{Nd} \p{Lu}+\P{Nd} \p{Lu}+\p{Han} \p{Lu}+\p{Xan} \p{Lu}+\p{Xsp} \p{Lu}+\p{Xps} \p{Xwd}+\p{Lu} \p{Lu}+\p{Xuc}/Bx,ucp
  1185. /\p{Han}+\p{Lu} \p{Han}+\p{L&} \p{Han}+\p{L} \p{Han}+\p{Lu} \p{Han}+\p{Arabic} \p{Arabic}+\p{Arabic} \p{Han}+\p{Xan} \p{Han}+\p{Xsp} \p{Han}+\p{Xps} \p{Xwd}+\p{Han} \p{Han}+\p{Xuc}/Bx,ucp
  1186. /\p{Xan}+\p{Any} \p{Xan}+\p{L&} \P{Xan}+\p{L&} \p{Xan}+\p{L} \p{Xan}+\p{Lu} \p{Xan}+\p{Han} \p{Xan}+\p{Xan} \p{Xan}+\P{Xan} \p{Xan}+\p{Xsp} \p{Xan}+\p{Xps} \p{Xwd}+\p{Xan} \p{Xan}+\p{Xuc}/Bx,ucp
  1187. /\p{Xsp}+\p{Any} \p{Xsp}+\p{L&} \p{Xsp}+\p{L} \p{Xsp}+\p{Lu} \p{Xsp}+\p{Han} \p{Xsp}+\p{Xan} \p{Xsp}+\p{Xsp} \P{Xsp}+\p{Xsp} \p{Xsp}+\p{Xps} \p{Xwd}+\p{Xsp} \p{Xsp}+\p{Xuc}/Bx,ucp
  1188. /\p{Xwd}+\p{Any} \p{Xwd}+\p{L&} \p{Xwd}+\p{L} \p{Xwd}+\p{Lu} \p{Xwd}+\p{Han} \p{Xwd}+\p{Xan} \p{Xwd}+\p{Xsp} \p{Xwd}+\p{Xps} \p{Xwd}+\p{Xwd} \p{Xwd}+\P{Xwd} \p{Xwd}+\p{Xuc}/Bx,ucp
  1189. /\p{Xuc}+\p{Any} \p{Xuc}+\p{L&} \p{Xuc}+\p{L} \p{Xuc}+\p{Lu} \p{Xuc}+\p{Han} \p{Xuc}+\p{Xan} \p{Xuc}+\p{Xsp} \p{Xuc}+\p{Xps} \p{Xwd}+\p{Xuc} \p{Xuc}+\p{Xuc} \p{Xuc}+\P{Xuc}/Bx,ucp
  1190. /\p{N}+\p{Ll} \p{N}+\p{Nd} \p{N}+\P{Nd}/Bx,ucp
  1191. /\p{Xan}+\p{L} \p{Xan}+\p{N} \p{Xan}+\p{C} \p{Xan}+\P{L} \P{Xan}+\p{N} \p{Xan}+\P{C}/Bx,ucp
  1192. /\p{L}+\p{Xan} \p{N}+\p{Xan} \p{C}+\p{Xan} \P{L}+\p{Xan} \p{N}+\p{Xan} \P{C}+\p{Xan} \p{L}+\P{Xan}/Bx,ucp
  1193. /\p{Xan}+\p{Lu} \p{Xan}+\p{Nd} \p{Xan}+\p{Cc} \p{Xan}+\P{Ll} \P{Xan}+\p{No} \p{Xan}+\P{Cf}/Bx,ucp
  1194. /\p{Lu}+\p{Xan} \p{Nd}+\p{Xan} \p{Cs}+\p{Xan} \P{Lt}+\p{Xan} \p{Nl}+\p{Xan} \P{Cc}+\p{Xan} \p{Lt}+\P{Xan}/Bx,ucp
  1195. /\w+\p{P} \w+\p{Po} \w+\s \p{Xan}+\s \s+\p{Xan} \s+\w/Bx,ucp
  1196. /\w+\P{P} \W+\p{Po} \w+\S \P{Xan}+\s \s+\P{Xan} \s+\W/Bx,ucp
  1197. /\w+\p{Po} \w+\p{Pc} \W+\p{Po} \W+\p{Pc} \w+\P{Po} \w+\P{Pc}/Bx,ucp
  1198. /\p{Nl}+\p{Xan} \P{Nl}+\p{Xan} \p{Nl}+\P{Xan} \P{Nl}+\P{Xan}/Bx,ucp
  1199. /\p{Xan}+\p{Nl} \P{Xan}+\p{Nl} \p{Xan}+\P{Nl} \P{Xan}+\P{Nl}/Bx,ucp
  1200. /\p{Xan}+\p{Nd} \P{Xan}+\p{Nd} \p{Xan}+\P{Nd} \P{Xan}+\P{Nd}/Bx,ucp
  1201. # End auto-possessification tests
  1202. /\w+/B,utf,ucp,auto_callout
  1203. abcd
  1204. /[\p{N}]?+/B,no_auto_possess
  1205. /[\p{L}ab]{2,3}+/B,no_auto_possess
  1206. /\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx
  1207. /.+\X/Bsx
  1208. /\X+$/Bmx
  1209. /\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx
  1210. /\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp
  1211. /[RST]+/Bi,utf,ucp
  1212. /[R-T]+/Bi,utf,ucp
  1213. /[Q-U]+/Bi,utf,ucp
  1214. /^s?c/Iim,utf
  1215. scat
  1216. /\X?abc/utf,no_start_optimize
  1217. \xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06
  1218. /\x{100}\x{200}\K\x{300}/utf,startchar
  1219. \x{100}\x{200}\x{300}
  1220. # Test UTF characters in a substitution
  1221. /ábc/utf,replace=XሴZ
  1222. 123ábc123
  1223. /(?<=abc)(|def)/g,utf,replace=<$0>
  1224. 123abcáyzabcdef789abcሴqr
  1225. /[A-`]/iB,utf
  1226. abcdefghijklmno
  1227. /(?<=\K\x{17f})/g,utf,aftertext,allow_lookaround_bsk
  1228. \x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
  1229. /(?<=\K\x{17f})/altglobal,utf,aftertext,allow_lookaround_bsk
  1230. \x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
  1231. "\xa\xf<(.\pZ*\P{Xwd}+^\xa8\3'3yq.::?(?J:()\xd1+!~:3'(8?:)':(?'d'(?'d'^u]!.+.+\\A\Ah(n+?9){7}+\K;(?'X'u'(?'c'(?'z'(?<y>\xb::\xf0'|\xd3(\xae?'w(z\x8?P>l)\x8?P>a)'\H\R\xd1+!!~:3'(?:h$N{26875}\W+?\\=D{2}\x89(?i:Uy0\N({2\xa(\v\x85*){y*\A(()\p{L}+?\P{^Xan}'+?\xff\+pS\?|).{;y*\A(()\p{L}+?\8}\d?1(|)(/1){7}.+[Lp{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(\xbf(R))\x8a\X*?\x8a\xb\xd1^9\3*+(\xc1,\k'R'\xb4)\xcc(z\z(?J)(?'X'\x1b(\xb\xd1^9\?'3*+P{^Xan}+?\xff\+(\xc1.]k+\xb'Pm'\xb4)\xcc4f\xa7'\xd1V(?i:U,{2,2})'(?'X'))?-%--\x95$9*\4'|\xd1(\x9c''%\x94$9)#(?'R')3\x7?('P\xed7'\xa8\xb1^u\xeaw\1\0\0\(|(?1){7}.+[\p{Me}].\s\xdcC*^\x14?(?(<y>))(?<!^)$C((;*?(R*?))+(?(R)\x8a\X*?\x8a\xb\xd1^9\3*+|(\xc1,\k'R'\xb4)\xcc! z)\z(?JJ)(?'X';(\xb\xd1^9\?'3*+(\xc1.]k+\xb'Pm'\xb4))':(?'d')(?'RD'(d')|)|$)'|(?<x>\g{d});\g{x}\x11\g{d}\x81\|$((?'X'\'X'(?'W''\x92()'9'\x83*))\xba*\!?^ <){)':;\xcc4'\xd1'(?'X'28))?-%--\x95$9*\4'|\xd1((''e\x94*$9:)*#(?'R')3)\x7?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+0!~:(?)'d'E:yD!\s(?'R'\x1e;\x10:U))|'\x9g!\xb0*){)\\x16:;()\x1e\x10\x87*:(?<y>)\xd1+!~:(?)'}'\d'E:yD!\s(?'R'\x1e;\x10:U))|'))|)g!\xb0*R+9{29+)#(?'P'})*?pS\{3,}\x85,{0,}l{*UTF)(\xe{7}){3722,{9,}d{2,?|))|{)\(A?&d}}{\xa,}2}){3,}7,l{)22}(,}l:7{2,4}}29\x19+)#?'P'})*v?))\x5"
  1232. /$(&.+[\p{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(?(R)){0,6}?|){12\x8a\X*?\x8a\x0b\xd1^9\3*+(\xc1,\k'P'\xb4)\xcc(z\z(?JJ)(?'X'8};(\x0b\xd1^9\?'3*+(\xc1.]k+\x0b'Pm'\xb4\xcc4'\xd1'(?'X'))?-%--\x95$9*\4'|\xd1(''%\x95*$9)#(?'R')3\x07?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/
  1233. "(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'"
  1234. /[\pS#moq]/
  1235. =
  1236. /(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark
  1237. cxxxz
  1238. /abcd/utf,replace=x\x{824}y\o{3333}z(\Q12\$34$$\x34\E5$$),substitute_extended
  1239. abcd
  1240. /a(\x{e0}\x{101})(\x{c0}\x{102})/utf,replace=a\u$1\U$1\E$1\l$2\L$2\Eab\U\x{e0}\x{101}\L\x{d0}\x{160}\EDone,substitute_extended
  1241. a\x{e0}\x{101}\x{c0}\x{102}
  1242. /((?<digit>\d)|(?<letter>\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}>
  1243. ab12cde
  1244. /(*UCP)(*UTF)[[:>:]]X/B
  1245. /abc/utf,replace=xyz
  1246. abc\=zero_terminate
  1247. /a[[:punct:]b]/ucp,bincode
  1248. /a[[:punct:]b]/utf,ucp,bincode
  1249. /a[b[:punct:]]/utf,ucp,bincode
  1250. /[[:^ascii:]]/utf,ucp,bincode
  1251. /[[:^ascii:]\w]/utf,ucp,bincode
  1252. /[\w[:^ascii:]]/utf,ucp,bincode
  1253. /[^[:ascii:]\W]/utf,ucp,bincode
  1254. \x{de}
  1255. \x{200}
  1256. \= Expect no match
  1257. \x{589}
  1258. \x{37e}
  1259. /[[:^ascii:]a]/utf,ucp,bincode
  1260. /L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout
  1261. /L(?#(|++<!(2)?/B,utf,ucp,auto_callout
  1262. /(*UTF)C\x09((?<!'(?x)!*H? #\xcc\x9a[^$]/
  1263. /[\D]/utf
  1264. \x{1d7cf}
  1265. /[\D\P{Nd}]/utf
  1266. \x{1d7cf}
  1267. /[^\D]/utf
  1268. a9b
  1269. \= Expect no match
  1270. \x{1d7cf}
  1271. /[^\D\P{Nd}]/utf
  1272. a9b
  1273. \x{1d7cf}
  1274. \= Expect no match
  1275. \x{10000}
  1276. # Hex uses pattern length, not zero-terminated. This tests for overrunning
  1277. # the given length of a pattern.
  1278. /'(*UTF)'/hex
  1279. /'#('/hex,extended,utf
  1280. /a(?<=A\XB)/utf
  1281. /../utf,auto_callout
  1282. \n\x{123}\x{123}\x{123}\x{123}
  1283. # This tests processing wide characters in extended mode.
  1284. /XȀ/x,utf
  1285. # These three test a bug fix that was not clearing up after a locale setting
  1286. # when the test or a subsequent one matched a wide character.
  1287. //locale=C
  1288. /[\P{Yi}]/utf
  1289. \x{2f000}
  1290. /[\P{Yi}]/utf,locale=C
  1291. \x{2f000}
  1292. /^(?<!(?=􃡜))/B,utf
  1293. # Horizontal and vertical space lists ignore caseless
  1294. /[\HH]/Bi,utf
  1295. /[^\HH]/Bi,utf
  1296. //g,utf
  1297. \=zero_terminate
  1298. /^(?1)\p{Nd}{3}(a)/
  1299. a123a
  1300. /\p{Nd}{0,3}[\pL](*:abc)(?C1)xxx/callout_info
  1301. # ---------------------------------------------------------------------------
  1302. # A bunch of tests that hit lines of code that others do not (at least when
  1303. # these were created).
  1304. /^[^a]{3,}?x/i,utf,no_start_optimize,no_auto_possess
  1305. \= Expect no match
  1306. bbb
  1307. cc
  1308. /^[ac]{3,}?x/i,utf,no_start_optimize,no_auto_possess
  1309. \= Expect no match
  1310. aaa\x{100}
  1311. /^X\X/no_start_optimize,no_auto_possess
  1312. \= Expect no match
  1313. X
  1314. /^X\p{L&}+?/no_start_optimize,no_auto_possess
  1315. \= Expect no match
  1316. X
  1317. /^X\p{L}+?/no_start_optimize,no_auto_possess
  1318. \= Expect no match
  1319. X
  1320. /^X\p{Lu}+?/no_start_optimize,no_auto_possess
  1321. \= Expect no match
  1322. X
  1323. /^X\p{Arabic}+?/no_start_optimize,no_auto_possess
  1324. \= Expect no match
  1325. X
  1326. /^X\p{Xan}+?/ucp,no_start_optimize,no_auto_possess
  1327. \= Expect no match
  1328. X
  1329. /^X\s+?/ucp,no_start_optimize,no_auto_possess
  1330. \= Expect no match
  1331. X
  1332. XX
  1333. /^X\S+?/ucp,no_start_optimize,no_auto_possess
  1334. XX
  1335. \= Expect no match
  1336. X
  1337. /^X\w+?/ucp,no_start_optimize,no_auto_possess
  1338. \= Expect no match
  1339. X
  1340. /^X[^\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess
  1341. \= Expect no match
  1342. X
  1343. /^X[\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess
  1344. \= Expect no match
  1345. X
  1346. /^X\p{Xuc}+?/utf,no_start_optimize,no_auto_possess
  1347. \= Expect no match
  1348. X
  1349. /^X.+?Z/s,utf,no_start_optimize,no_auto_possess
  1350. \= Expect no match
  1351. X
  1352. /^X\R+?/utf,no_start_optimize,no_auto_possess
  1353. \= Expect no match
  1354. X
  1355. /^X\H+?/utf,no_start_optimize,no_auto_possess
  1356. \= Expect no match
  1357. X
  1358. /^X\V+?/utf,no_start_optimize,no_auto_possess
  1359. \= Expect no match
  1360. X
  1361. /^X\s+?/utf,no_start_optimize,no_auto_possess
  1362. \= Expect no match
  1363. X
  1364. XX
  1365. /^X\S+?/utf,no_start_optimize,no_auto_possess
  1366. \= Expect no match
  1367. X
  1368. /^X\p{Any}{1,3}?Z/s,no_start_optimize,no_auto_possess
  1369. XYYYZ
  1370. \= Expect no match
  1371. XY
  1372. XYY
  1373. XYYY
  1374. XYYYYZ
  1375. /^X\p{L&}{1,3}?Z/s,no_start_optimize,no_auto_possess
  1376. \= Expect no match
  1377. XY
  1378. XY!
  1379. /^X\p{L}{1,3}?Z/s,no_start_optimize,no_auto_possess
  1380. \= Expect no match
  1381. XY
  1382. XY!
  1383. /^X\p{Lu}{1,3}?Z/s,no_start_optimize,no_auto_possess
  1384. \= Expect no match
  1385. XY
  1386. XY!
  1387. /^X\P{Han}{1,3}?Z/s,utf,no_start_optimize,no_auto_possess
  1388. \= Expect no match
  1389. XY
  1390. XY!
  1391. XY\x{2f00}!
  1392. /^X\p{Xan}{1,3}?Z/s,no_start_optimize,no_auto_possess
  1393. \= Expect no match
  1394. XY
  1395. XY!
  1396. /^X\p{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess
  1397. \= Expect no match
  1398. X\n
  1399. X\n!
  1400. X\n\n!
  1401. /^X\P{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess
  1402. \= Expect no match
  1403. XYY\n
  1404. /^X\p{Xwd}{1,3}?Z/s,no_start_optimize,no_auto_possess
  1405. \= Expect no match
  1406. XY
  1407. XY!
  1408. XYY!
  1409. /^X\x{b5}+?Z/i,utf,no_start_optimize,no_auto_possess
  1410. \= Expect no match
  1411. X
  1412. X\x{b5}
  1413. X\x{b5}\x{b5}Y
  1414. /^X\p{Xuc}+?Z/utf,no_start_optimize,no_auto_possess
  1415. \= Expect no match
  1416. X
  1417. X$
  1418. X@@Y
  1419. /(*CRLF)^X.+?Z/utf,no_start_optimize,no_auto_possess
  1420. \= Expect partial match
  1421. XYY\r\=ph
  1422. \= Expect no match
  1423. X
  1424. /^X.+?Z/s,utf,no_start_optimize,no_auto_possess
  1425. \= Expect no match
  1426. X
  1427. XYY
  1428. /^X\R+?Z/utf,no_start_optimize,no_auto_possess
  1429. \= Expect no match
  1430. X\nX
  1431. X\n\rX
  1432. X\n\r\nX
  1433. X\n\n
  1434. X\n\x{0c}
  1435. /(*BSR_ANYCRLF)^X\R+?Z/utf,no_start_optimize,no_auto_possess
  1436. \= Expect no match
  1437. X\nX
  1438. X\n\rX
  1439. X\n\r\nX
  1440. X\n\n
  1441. X\n\x{0c}
  1442. /^X\H+?Z/utf,no_start_optimize,no_auto_possess
  1443. \= Expect no match
  1444. XY\t
  1445. XYY
  1446. /^X\h+?Z/utf,no_start_optimize,no_auto_possess
  1447. \= Expect no match
  1448. X\t\t
  1449. X\tY
  1450. /^X\V+?Z/utf,no_start_optimize,no_auto_possess
  1451. \= Expect no match
  1452. XY\n
  1453. XYY
  1454. /^X\v+?Z/utf,no_start_optimize,no_auto_possess
  1455. \= Expect no match
  1456. X\n\n
  1457. X\nY
  1458. /^X\D+?Z/utf,no_start_optimize,no_auto_possess
  1459. \= Expect no match
  1460. XY9
  1461. XYY
  1462. /^X\d+?Z/utf,no_start_optimize,no_auto_possess
  1463. \= Expect no match
  1464. X99
  1465. X9Y
  1466. /^X\S+?Z/utf,no_start_optimize,no_auto_possess
  1467. \= Expect no match
  1468. XY\n
  1469. XYY
  1470. /^X\s+?Z/utf,no_start_optimize,no_auto_possess
  1471. \= Expect no match
  1472. X\n\n
  1473. X\nY
  1474. /^X\W+?Z/utf,no_start_optimize,no_auto_possess
  1475. \= Expect no match
  1476. X.A
  1477. X++
  1478. /^X\p{L&}{1,3}Z/no_start_optimize,no_auto_possess
  1479. \= Expect no match
  1480. XY
  1481. XY!
  1482. /^X\p{L}{1,3}Z/no_start_optimize,no_auto_possess
  1483. \= Expect no match
  1484. XY
  1485. /^X\p{Xan}{1,3}Z/no_start_optimize,no_auto_possess
  1486. \= Expect no match
  1487. XY
  1488. /^X\P{Xsp}{1,3}Z/no_start_optimize,no_auto_possess
  1489. \= Expect no match
  1490. XYY
  1491. /^X\p{Xuc}+Z/utf,no_start_optimize,no_auto_possess
  1492. \= Expect no match
  1493. X$
  1494. # ----------------------------------------------------------------------
  1495. # These test the dangerous PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL option.
  1496. /\x{d800}/B,utf,bad_escape_is_literal
  1497. /\ud800/B,utf,alt_bsux,bad_escape_is_literal
  1498. # ----------------------------------------------------------------------
  1499. /Aሴ+B/literal,utf,no_utf_check
  1500. Aሴ+B
  1501. # These are here because I upgraded to Unicode 10.0.0 before Perl did, so it
  1502. # doesn't recognize all these scripts. In time these three tests can be moved
  1503. # to test 4.
  1504. /^(\p{Adlam}+)(\p{Bhaiksuki}+)(\p{Marchen}+)(\p{Newa}+)(\p{Osage}+)
  1505. (\p{Tangut}+)(\p{Masaram_Gondi}+)(\p{Nushu}+)(\p{Soyombo}+)
  1506. (\p{Zanabazar_Square}+)/x,utf
  1507. \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}
  1508. /^\x{1E900}\x{104B0}/i,utf
  1509. \x{1E900}\x{104B0}
  1510. \x{1E922}\x{104D8}
  1511. /^(?:(\X)(?C))+$/utf
  1512. \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}\=callout_capture,callout_no_where
  1513. # Similarly for Unicode 11.0.0
  1514. /^(\p{Dogra}+)(\p{Gunjala_Gondi}+)(\p{Hanifi_Rohingya}+)(\p{Makasar}+)
  1515. (\p{Medefaidrin}+)(\p{Old_Sogdian}+)(\p{Sogdian}+)/x,utf
  1516. \x{11800}\x{11da9}\x{10d27}\x{11ee0}\x{16e48}\x{10f27}\x{10f30}
  1517. # Regional indicators
  1518. /^(\X)(\X)/utf,aftertext
  1519. \x{1F1E6}\x{1F1E7}\x{1F1E7}B
  1520. \x{1F1E6}\x{1F1E7}\x{1F1E7}\x{1F1E6}B
  1521. # More differences from Perl
  1522. /^\p{Common}/utf
  1523. \x{60c}
  1524. \x{61f}
  1525. \x{964}
  1526. \x{965}
  1527. /^\p{Inherited}/utf
  1528. \x{64b}
  1529. \x{654}
  1530. \x{655}
  1531. \x{1D1AA}
  1532. /\N{U+}/
  1533. /\N{U+}/utf
  1534. /\N{U}/
  1535. # This tests the non-UTF Unicode NEL pattern whitespace character, only
  1536. # recognized by PCRE2 with /x when there is Unicode support.
  1537. /A
  1538. �B/x
  1539. AB
  1540. # This tests Unicode Pattern White Space characters in verb names when they
  1541. # are being processed with PCRE2_EXTENDED. Note: there are UTF-8 characters
  1542. # with code points greater than 255 between A, B, and C in the pattern.
  1543. /(*: A‎B
C)abc/x,utf,mark,alt_verbnames
  1544. abc
  1545. # Script run tests: auto-possessification
  1546. /^(*sr:.*)/B,utf
  1547. paypаl.com A classic example of why script run checks are a good thing
  1548. /^(*sr:.*(*ACCEPT))/utf
  1549. paypаl.com But *ACCEPT breaks things
  1550. /^(*sr:\x{2e80}*)/B,utf
  1551. /^(*sr:\x{2e80}*)\x{2e80}/B,utf
  1552. /(?<!)(*sr:)/B
  1553. /(?<=abc(?=X(*sr:BXY)CCC)XBXYCCC)./B
  1554. abcXBXYCCC!
  1555. # Some script run patterns are broken in Perl 5.28.0. These can be moved into
  1556. # test 4 when a mended version of Perl is released.
  1557. /^(*sr:.{4})/utf
  1558. \x{0980}12\x{0993} Bengali Common-digits Bengali
  1559. \x{0780}12\x{07b1} Thaana Common-digits Thaana
  1560. \x{0e01}12\x{0e5b} Thai Common-digits Thai
  1561. \x{1780}12\x{19ff} Khmer Common-digits Khmer
  1562. \x{0904}12\x{0939} Devanagari Common-digits Devanagari
  1563. A\x{ff10}\x{ff19}B Latin Common-notascii-digits Latin
  1564. A\x{1d7ce}\x{1d7cf}B Latin fancy-common-digits Latin
  1565. # These ones involve non-ASCII but nevertheless Common digits. As of October
  1566. # 2018 even blead Perl wasn't handling all of these - but is going to.
  1567. /^(*sr:.{4})/utf
  1568. A\x{ff10}\x{ff19}B Latin Common-notascii-digits Latin
  1569. \x{ff10}\x{ff19}.. Common-notascii-digits Common Common
  1570. A\x{ff10}BC Latin Common-notascii-digit Latin Latin
  1571. A\x{1d7ce}\x{1d7cf}B Latin fancy-common-digits Latin
  1572. \x{1d7ce}\x{1d7cf},, fancy-common-digits Common Common
  1573. A\x{1d7ce}BC Latin fancy-common-digit Latin Latin
  1574. # Some Unicode 12.1.0 new script characters
  1575. /\p{Elymaic}\p{Nandinagari}\p{Nyiakeng_Puachue_Hmong}\p{Wancho}/utf
  1576. \x{10fe5}\x{119AC}\x{1E10E}\x{1E2D1}
  1577. # Some Unicode 13.0.0 new script characters
  1578. /\p{Chorasmian}\p{Dives_Akuru}\p{Khitan_Small_Script}\p{Yezidi}/utf
  1579. \x{10FB0}\x{11900}\x{18B00}\x{10E80}
  1580. # -------
  1581. # Test reference and errors in non-ASCII characters in group names
  1582. /(?'𑠅ABC'...)/I,utf
  1583. abcde\=copy=𑠅ABC
  1584. # Bad ones
  1585. /(?'AB၌C'...)\g{AB၌C}/utf
  1586. /(?'٠ABC'...)/utf
  1587. /(?'²ABC'...)/utf
  1588. /(?'X²ABC'...)/utf
  1589. # -------
  1590. /\p{Any}*xyz/I
  1591. /(|�)7/caseless,ucp
  1592. /(\xc1)\1/i,ucp
  1593. \xc1\xe1\=no_jit
  1594. /\p{L&}+\p{bidi_control}/B
  1595. /\p{bidi_control}+\p{L&}/B
  1596. /\p{han}/B
  1597. /\p{script:han}/B
  1598. /\p{sc:han}/B
  1599. /\p{script extensions:han}/B
  1600. /\p{scx:han}/B
  1601. # Test error - invalid script name
  1602. /\p{sc:L}/
  1603. # Some Boolean property tests that differ from Perl
  1604. /\p{emojimodifierbase}\p{ebase}/g,utf
  1605. >AN<>\x{261d}\x{1faf6}<>yz<
  1606. /\p{graphemelink}\p{grlink}/g,utf
  1607. >AN<>\x{11d97}\x{94d}<>yz<
  1608. /\p{soft dotted}\p{sd}/g,utf
  1609. >AF23<>\x{1df1a}\x{69}<>yz<
  1610. # ------------------------------------------------
  1611. /\p{\2b[:x�igi:t:_/
  1612. # Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without
  1613. # the restriction.
  1614. /AskZ/i,utf,caseless_restrict
  1615. AskZ
  1616. aSKz
  1617. \= Expect no match
  1618. A\x{17f}kZ
  1619. As\x{212a}Z
  1620. /AskZ/i,utf
  1621. AskZ
  1622. aSKz
  1623. A\x{17f}kZ
  1624. As\x{212a}Z
  1625. /A\x{17f}\x{212a}Z/ir,utf
  1626. \= Expect no match
  1627. AskZ
  1628. /A\x{17f}\x{212a}Z/i,utf
  1629. AskZ
  1630. /[AskZ]+/i,utf,caseless_restrict
  1631. AskZ
  1632. aSKz
  1633. A\x{17f}kZ
  1634. As\x{212a}Z
  1635. /[AskZ]+/i,utf
  1636. AskZ
  1637. aSKz
  1638. A\x{17f}kZ
  1639. As\x{212a}Z
  1640. /[\x{17f}\x{212a}]+/ir,utf
  1641. \= Expect no match
  1642. AskZ
  1643. /[\x{17f}\x{212a}]+/i,utf
  1644. AskZ
  1645. /[^s]+/ir,utf
  1646. A\x{17f}Z
  1647. /[^s]+/i,utf
  1648. A\x{17f}Z
  1649. /[^k]+/ir,utf
  1650. A\x{212a}Z
  1651. /[^k]+/i,utf
  1652. A\x{212a}Z
  1653. /[^sk]+/ir,utf
  1654. A\x{17f}\x{212a}Z
  1655. /[^sk]+/i,utf
  1656. A\x{17f}\x{212a}Z
  1657. /[^\x{17f}]+/ir,utf
  1658. AsSZ
  1659. /[^\x{17f}]+/i,utf
  1660. AsSZ
  1661. /[Ss]+/irB,utf
  1662. Sss\x{17f}ss
  1663. /[Ss]+/iB,utf
  1664. Sss\x{17f}ss
  1665. /[S\x{17f}]/irB,utf
  1666. /[S\x{17f}]/iB,utf
  1667. /[\x{17f}s]/irB,utf
  1668. /[\x{17f}s]/iB,utf
  1669. /[\x{4b}\x{6b}]/irB,utf
  1670. /[\x{4b}\x{6b}]/iB,utf
  1671. /s(?r)s(?-r)s(?r:s)s/i,utf
  1672. \x{17f}S\x{17f}S\x{17f}
  1673. \= Expect no match
  1674. \x{17f}\x{17f}\x{17f}S\x{17f}
  1675. \x{17f}S\x{17f}\x{17f}\x{17f}
  1676. /k(?^i)k/ir,utf
  1677. K\x{212a}
  1678. \= Expect no match
  1679. \x{212a}\x{212a}
  1680. # End caseless restrict tests
  1681. # TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without.
  1682. # DIGITS
  1683. /\d+/i,utf
  1684. 123\x{660}456
  1685. /\d+/i,utf,ucp
  1686. 123\x{660}456
  1687. /\d+/i,utf,ucp,ascii_bsd
  1688. 123\x{660}456
  1689. /[\d]+/i,utf
  1690. 123\x{660}456
  1691. /[\d]+/i,utf,ucp
  1692. 123\x{660}456
  1693. /[\d]+/i,utf,ucp,ascii_bsd
  1694. 123\x{660}456
  1695. /\d(?aD)\d(?-aD)\d/utf,ucp
  1696. \x{660}9\x{660}
  1697. \= Expect no match
  1698. \x{660}\x{660}\x{660}
  1699. /\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd
  1700. 999
  1701. 9\x{660}9
  1702. /\d(?a)\d(?-a)\d/utf,ucp
  1703. \x{660}9\x{660}
  1704. \= Expect no match
  1705. \x{660}\x{660}\x{660}
  1706. /\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd
  1707. 999
  1708. 9\x{660}9
  1709. # SPACES
  1710. />\s+</i,utf
  1711. > <
  1712. \= Expect no match
  1713. >\x{a0} <
  1714. />\s+</i,utf,ucp
  1715. > <
  1716. >\x{a0} <
  1717. />\s+</i,utf,ucp,ascii_bss
  1718. > <
  1719. \= Expect no match
  1720. >\x{a0} <
  1721. />[\s]+</i,utf
  1722. > <
  1723. \= Expect no match
  1724. >\x{a0} <
  1725. />[\s]+</i,utf,ucp
  1726. > <
  1727. >\x{a0} <
  1728. />[\s]+</i,utf,ucp,ascii_bss
  1729. > <
  1730. \= Expect no match
  1731. >\x{a0} <
  1732. />\s(?aS)\s(?-aS)\s</utf,ucp
  1733. >\x{a0} \x{a0}<
  1734. \= Expect no match
  1735. >\x{a0}\x{a0}\x{a0}<
  1736. />\s(?a)\s(?-a)\s</utf,ucp
  1737. >\x{a0} \x{a0}<
  1738. \= Expect no match
  1739. >\x{a0}\x{a0}\x{a0}<
  1740. # WORDS
  1741. /\w+/i,utf
  1742. 123\x{660}abc
  1743. /\w+/i,utf,ucp
  1744. 123\x{660}abc
  1745. /\w+/i,utf,ucp,ascii_bsw
  1746. 123\x{660}abc
  1747. /[\w]+/i,utf
  1748. 123\x{660}abc
  1749. /[\w]+/i,utf,ucp
  1750. 123\x{660}abc
  1751. /[\w]+/i,utf,ucp,ascii_bsw
  1752. 123\x{660}abc
  1753. /\w(?aW)\w(?-aW)\w/utf,ucp
  1754. \x{660}A\x{c0}
  1755. \= Expect no match
  1756. \x{660}\x{c0}\x{c0}
  1757. /\w(?a)\w(?-a)\w/utf,ucp
  1758. \x{660}A\x{c0}
  1759. \= Expect no match
  1760. \x{660}\x{c0}\x{c0}
  1761. # WORD BOUNDARY
  1762. /\bABC\b/utf
  1763. \x{c0}ABC\x{d0}
  1764. /\bABC\b/utf,ucp
  1765. \= Expect no match
  1766. \x{c0}ABC\x{d0}
  1767. /\bABC\b/utf,ucp,ascii_bsw
  1768. \x{c0}ABC\x{d0}
  1769. /\bABC\b/utf,ucp,ascii_all
  1770. \x{c0}ABC\x{d0}
  1771. # POSIX
  1772. /^[[:digit:]]+$/utf,ucp
  1773. 123456
  1774. 123\x{660}456
  1775. /^[[:digit:]]+$/utf,ucp,ascii_digit
  1776. 123456
  1777. \= Expect no match
  1778. 123\x{660}456
  1779. /[[:digit:]]+/g,utf,ucp,ascii_digit
  1780. 123\x{660}456
  1781. /(?-aT)[[:digit:]](?aT)[[:digit:]]/utf,ucp,ascii_digit
  1782. 11
  1783. \x{ff11}1
  1784. \= Expect no match
  1785. 1\x{ff11}
  1786. /(?-aT:[[:digit:]])[[:digit:]]/utf,ucp,ascii_digit
  1787. 11
  1788. \x{ff11}1
  1789. \= Expect no match
  1790. 1\x{ff11}
  1791. /(?-aT:[[:digit:]])[[:digit:]]/utf,never_ucp,ascii_digit
  1792. 11
  1793. \= Expect no match
  1794. \x{ff11}1
  1795. 1\x{ff11}
  1796. /[[:digit:]]+/utf,ucp,ascii_posix
  1797. 123\x{660}456
  1798. /(?-aP)[[:digit:]](?aP)[[:digit:]]/utf,ucp,ascii_posix
  1799. 11
  1800. \x{ff11}1
  1801. \= Expect no match
  1802. 1\x{ff11}
  1803. /(?-aP:[[:digit:]])[[:digit:]]/utf,ucp,ascii_posix
  1804. 11
  1805. \x{ff11}1
  1806. \= Expect no match
  1807. 1\x{ff11}
  1808. /(?-a:[[:digit:]])[[:digit:]]/a,utf,ucp
  1809. 11
  1810. \x{ff11}1
  1811. \= Expect no match
  1812. 1\x{ff11}
  1813. /^[[:xdigit:]]+$/utf,ucp
  1814. f0
  1815. 1A
  1816. d\x{ff10}
  1817. \x{ff26}8
  1818. \= Expect no match
  1819. 8g\=no_jit
  1820. /^[[:xdigit:]]+$/utf,ucp,ascii_digit
  1821. f0
  1822. 1A
  1823. \= Expect no match
  1824. d\x{ff10}
  1825. \x{ff26}8
  1826. 8g
  1827. />[[:space:]]+</utf,ucp
  1828. >\x{a0} \x{a0}<
  1829. >\x{a0}\x{a0}\x{a0}<
  1830. />[[:space:]]+</utf,ucp,ascii_posix
  1831. \= Expect no match
  1832. >\x{a0} \x{a0}<
  1833. /(?aP)[[:alnum:]]+/i,ucp,utf
  1834. abcáxyz
  1835. abc\x{660}xyz
  1836. /(?aP)[[:alnum:]\d]+/i,ucp,utf
  1837. abc\x{660}xyz
  1838. /(*UCP)(*UTF)[[:alnum:]](?aP:[[:alnum:]])[[:alnum:]]/
  1839. \x{660}A\x{660}
  1840. \= Expect no match
  1841. \x{660}\x{660}\x{660}
  1842. # VARIOUS
  1843. /[\d\s\w]+/a,ucp,utf
  1844. 9 A\x{660}À
  1845. 9 AÀ\x{660}
  1846. # End PCRE2_EXTRA_ASCII_xxx tests
  1847. /(?<!(|l ))/utf
  1848. (?<!(|l ))
  1849. # End of testinput5