github的一些开源项目
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

5518 lines
114 KiB

  1. # This set of tests checks the API, internals, and non-Perl stuff for UTF
  2. # support, including Unicode properties. However, tests that give different
  3. # results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and
  4. # 12).
  5. #newline_default lf any anycrlf
  6. # PCRE2 and Perl disagree about the characteristics of certain Unicode
  7. # characters. For example, 061C was considered by Perl to be Arabic, though
  8. # it was not listed as such in the Unicode Scripts.txt file for Unicode 8.
  9. # However, it *is* in that file for Unicode 10, but when I came to re-check,
  10. # Perl had changed in the meantime, with 5.026 not recognizing it as Arabic.
  11. # 2066-2069 are graphic and printable according to Perl, though they are
  12. # actually "isolate" control characters. That is why the following tests are
  13. # here rather than in test 4.
  14. /^[\p{Arabic}]/utf
  15. \x{061c}
  16. 0: \x{61c}
  17. /^[[:graph:]]+$/utf,ucp
  18. \= Expect no match
  19. \x{61c}
  20. No match
  21. \x{2066}
  22. No match
  23. \x{2067}
  24. No match
  25. \x{2068}
  26. No match
  27. \x{2069}
  28. No match
  29. /^[[:print:]]+$/utf,ucp
  30. \= Expect no match
  31. \x{61c}
  32. No match
  33. \x{2066}
  34. No match
  35. \x{2067}
  36. No match
  37. \x{2068}
  38. No match
  39. \x{2069}
  40. No match
  41. /^[[:^graph:]]+$/utf,ucp
  42. \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}
  43. 0: \x{09}\x{0a}\x{1d} \x{85}\x{a0}\x{61c}\x{1680}
  44. \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069}
  45. 0: \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069}
  46. /^[[:^print:]]+$/utf,ucp
  47. \x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067}
  48. 0: \x{09}\x{1d}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067}
  49. \x{2068}\x{2069}
  50. 0: \x{2068}\x{2069}
  51. # Perl does not consider U+180e to be a space character. It is true that it
  52. # does not appear in the Unicode PropList.txt file as such, but in many other
  53. # sources it is listed as a space, and has been treated as such in PCRE for
  54. # a long time.
  55. /^>[[:blank:]]*/utf,ucp
  56. >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028}
  57. 0: > \x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{09}
  58. /^A\s+Z/utf,ucp
  59. A\x{85}\x{180e}\x{2005}Z
  60. 0: A\x{85}\x{180e}\x{2005}Z
  61. /^A[\s]+Z/utf,ucp
  62. A\x{2005}Z
  63. 0: A\x{2005}Z
  64. A\x{85}\x{2005}Z
  65. 0: A\x{85}\x{2005}Z
  66. /^[[:graph:]]+$/utf,ucp
  67. \= Expect no match
  68. \x{180e}
  69. No match
  70. /^[[:print:]]+$/utf,ucp
  71. \x{180e}
  72. 0: \x{180e}
  73. /^[[:^graph:]]+$/utf,ucp
  74. \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e}
  75. 0: \x{09}\x{0a}\x{1d} \x{85}\x{a0}\x{61c}\x{1680}\x{180e}
  76. /^[[:^print:]]+$/utf,ucp
  77. \= Expect no match
  78. \x{180e}
  79. No match
  80. # End of U+180E tests.
  81. # ---------------------------------------------------------------------
  82. /\x{110000}/IB,utf
  83. Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
  84. /\o{4200000}/IB,utf
  85. Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large
  86. /\x{ffffffff}/utf
  87. Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
  88. /\o{37777777777}/utf
  89. Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
  90. /\x{100000000}/utf
  91. Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
  92. /\o{77777777777}/utf
  93. Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
  94. /\x{d800}/utf
  95. Failed: error 173 at offset 7: disallowed Unicode code point (>= 0xd800 && <= 0xdfff)
  96. /\o{154000}/utf
  97. Failed: error 173 at offset 9: disallowed Unicode code point (>= 0xd800 && <= 0xdfff)
  98. /\x{dfff}/utf
  99. Failed: error 173 at offset 7: disallowed Unicode code point (>= 0xd800 && <= 0xdfff)
  100. /\o{157777}/utf
  101. Failed: error 173 at offset 9: disallowed Unicode code point (>= 0xd800 && <= 0xdfff)
  102. /\x{d7ff}/utf
  103. /\o{153777}/utf
  104. /\x{e000}/utf
  105. /\o{170000}/utf
  106. /^\x{100}a\x{1234}/utf
  107. \x{100}a\x{1234}bcd
  108. 0: \x{100}a\x{1234}
  109. /\x{0041}\x{2262}\x{0391}\x{002e}/IB,utf
  110. ------------------------------------------------------------------
  111. Bra
  112. A\x{2262}\x{391}.
  113. Ket
  114. End
  115. ------------------------------------------------------------------
  116. Capture group count = 0
  117. Options: utf
  118. First code unit = 'A'
  119. Last code unit = '.'
  120. Subject length lower bound = 4
  121. \x{0041}\x{2262}\x{0391}\x{002e}
  122. 0: A\x{2262}\x{391}.
  123. /.{3,5}X/IB,utf
  124. ------------------------------------------------------------------
  125. Bra
  126. Any{3}
  127. Any{0,2}
  128. X
  129. Ket
  130. End
  131. ------------------------------------------------------------------
  132. Capture group count = 0
  133. Options: utf
  134. Last code unit = 'X'
  135. Subject length lower bound = 4
  136. \x{212ab}\x{212ab}\x{212ab}\x{861}X
  137. 0: \x{212ab}\x{212ab}\x{212ab}\x{861}X
  138. /.{3,5}?/IB,utf
  139. ------------------------------------------------------------------
  140. Bra
  141. Any{3}
  142. Any{0,2}?
  143. Ket
  144. End
  145. ------------------------------------------------------------------
  146. Capture group count = 0
  147. Options: utf
  148. Subject length lower bound = 3
  149. \x{212ab}\x{212ab}\x{212ab}\x{861}
  150. 0: \x{212ab}\x{212ab}\x{212ab}
  151. /^[ab]/IB,utf
  152. ------------------------------------------------------------------
  153. Bra
  154. ^
  155. [ab]
  156. Ket
  157. End
  158. ------------------------------------------------------------------
  159. Capture group count = 0
  160. Compile options: utf
  161. Overall options: anchored utf
  162. Starting code units: a b
  163. Subject length lower bound = 1
  164. bar
  165. 0: b
  166. \= Expect no match
  167. c
  168. No match
  169. \x{ff}
  170. No match
  171. \x{100}
  172. No match
  173. /\x{100}*(\d+|"(?1)")/utf
  174. 1234
  175. 0: 1234
  176. 1: 1234
  177. "1234"
  178. 0: "1234"
  179. 1: "1234"
  180. \x{100}1234
  181. 0: \x{100}1234
  182. 1: 1234
  183. "\x{100}1234"
  184. 0: \x{100}1234
  185. 1: 1234
  186. \x{100}\x{100}12ab
  187. 0: \x{100}\x{100}12
  188. 1: 12
  189. \x{100}\x{100}"12"
  190. 0: \x{100}\x{100}"12"
  191. 1: "12"
  192. \= Expect no match
  193. \x{100}\x{100}abcd
  194. No match
  195. /\x{100}*/IB,utf
  196. ------------------------------------------------------------------
  197. Bra
  198. \x{100}*+
  199. Ket
  200. End
  201. ------------------------------------------------------------------
  202. Capture group count = 0
  203. May match empty string
  204. Options: utf
  205. Subject length lower bound = 0
  206. /a\x{100}*/IB,utf
  207. ------------------------------------------------------------------
  208. Bra
  209. a
  210. \x{100}*+
  211. Ket
  212. End
  213. ------------------------------------------------------------------
  214. Capture group count = 0
  215. Options: utf
  216. First code unit = 'a'
  217. Subject length lower bound = 1
  218. /ab\x{100}*/IB,utf
  219. ------------------------------------------------------------------
  220. Bra
  221. ab
  222. \x{100}*+
  223. Ket
  224. End
  225. ------------------------------------------------------------------
  226. Capture group count = 0
  227. Options: utf
  228. First code unit = 'a'
  229. Last code unit = 'b'
  230. Subject length lower bound = 2
  231. /[\x{200}-\x{100}]/utf
  232. Failed: error 108 at offset 15: range out of order in character class
  233. /[Ā-Ą]/utf
  234. \x{100}
  235. 0: \x{100}
  236. \x{104}
  237. 0: \x{104}
  238. \= Expect no match
  239. \x{105}
  240. No match
  241. \x{ff}
  242. No match
  243. /[\xFF]/IB
  244. ------------------------------------------------------------------
  245. Bra
  246. \x{ff}
  247. Ket
  248. End
  249. ------------------------------------------------------------------
  250. Capture group count = 0
  251. First code unit = \xff
  252. Subject length lower bound = 1
  253. >\xff<
  254. 0: \xff
  255. /[^\xFF]/IB
  256. ------------------------------------------------------------------
  257. Bra
  258. [^\x{ff}]
  259. Ket
  260. End
  261. ------------------------------------------------------------------
  262. Capture group count = 0
  263. Subject length lower bound = 1
  264. /[Ä-Ü]/utf
  265. Ö # Matches without Study
  266. 0: \x{d6}
  267. \x{d6}
  268. 0: \x{d6}
  269. /[Ä-Ü]/utf
  270. Ö <-- Same with Study
  271. 0: \x{d6}
  272. \x{d6}
  273. 0: \x{d6}
  274. /[\x{c4}-\x{dc}]/utf
  275. Ö # Matches without Study
  276. 0: \x{d6}
  277. \x{d6}
  278. 0: \x{d6}
  279. /[\x{c4}-\x{dc}]/utf
  280. Ö <-- Same with Study
  281. 0: \x{d6}
  282. \x{d6}
  283. 0: \x{d6}
  284. /[^\x{100}]abc(xyz(?1))/IB,utf
  285. ------------------------------------------------------------------
  286. Bra
  287. [^\x{100}]
  288. abc
  289. CBra 1
  290. xyz
  291. Recurse
  292. Ket
  293. Ket
  294. End
  295. ------------------------------------------------------------------
  296. Capture group count = 1
  297. Options: utf
  298. Last code unit = 'z'
  299. Subject length lower bound = 7
  300. /(\x{100}(b(?2)c))?/IB,utf
  301. ------------------------------------------------------------------
  302. Bra
  303. Brazero
  304. CBra 1
  305. \x{100}
  306. CBra 2
  307. b
  308. Recurse
  309. c
  310. Ket
  311. Ket
  312. Ket
  313. End
  314. ------------------------------------------------------------------
  315. Capture group count = 2
  316. May match empty string
  317. Options: utf
  318. Subject length lower bound = 0
  319. /(\x{100}(b(?2)c)){0,2}/IB,utf
  320. ------------------------------------------------------------------
  321. Bra
  322. Brazero
  323. Bra
  324. CBra 1
  325. \x{100}
  326. CBra 2
  327. b
  328. Recurse
  329. c
  330. Ket
  331. Ket
  332. Brazero
  333. CBra 1
  334. \x{100}
  335. CBra 2
  336. b
  337. Recurse
  338. c
  339. Ket
  340. Ket
  341. Ket
  342. Ket
  343. End
  344. ------------------------------------------------------------------
  345. Capture group count = 2
  346. May match empty string
  347. Options: utf
  348. Subject length lower bound = 0
  349. /(\x{100}(b(?1)c))?/IB,utf
  350. ------------------------------------------------------------------
  351. Bra
  352. Brazero
  353. CBra 1
  354. \x{100}
  355. CBra 2
  356. b
  357. Recurse
  358. c
  359. Ket
  360. Ket
  361. Ket
  362. End
  363. ------------------------------------------------------------------
  364. Capture group count = 2
  365. May match empty string
  366. Options: utf
  367. Subject length lower bound = 0
  368. /(\x{100}(b(?1)c)){0,2}/IB,utf
  369. ------------------------------------------------------------------
  370. Bra
  371. Brazero
  372. Bra
  373. CBra 1
  374. \x{100}
  375. CBra 2
  376. b
  377. Recurse
  378. c
  379. Ket
  380. Ket
  381. Brazero
  382. CBra 1
  383. \x{100}
  384. CBra 2
  385. b
  386. Recurse
  387. c
  388. Ket
  389. Ket
  390. Ket
  391. Ket
  392. End
  393. ------------------------------------------------------------------
  394. Capture group count = 2
  395. May match empty string
  396. Options: utf
  397. Subject length lower bound = 0
  398. /\W/utf
  399. A.B
  400. 0: .
  401. A\x{100}B
  402. 0: \x{100}
  403. /\w/utf
  404. \x{100}X
  405. 0: X
  406. # Use no_start_optimize because the first code unit is different in 8-bit from
  407. # the wider modes.
  408. /^\ሴ/IB,utf,no_start_optimize
  409. ------------------------------------------------------------------
  410. Bra
  411. ^
  412. \x{1234}
  413. Ket
  414. End
  415. ------------------------------------------------------------------
  416. Capture group count = 0
  417. Compile options: no_start_optimize utf
  418. Overall options: anchored no_start_optimize utf
  419. /()()()()()()()()()()
  420. ()()()()()()()()()()
  421. ()()()()()()()()()()
  422. ()()()()()()()()()()
  423. A (x) (?41) B/x,utf
  424. AxxB
  425. Matched, but too many substrings
  426. 0: AxxB
  427. 1:
  428. 2:
  429. 3:
  430. 4:
  431. 5:
  432. 6:
  433. 7:
  434. 8:
  435. 9:
  436. 10:
  437. 11:
  438. 12:
  439. 13:
  440. 14:
  441. /^[\x{100}\E-\Q\E\x{150}]/B,utf
  442. ------------------------------------------------------------------
  443. Bra
  444. ^
  445. [\x{100}-\x{150}]
  446. Ket
  447. End
  448. ------------------------------------------------------------------
  449. /^[\QĀ\E-\QŐ\E]/B,utf
  450. ------------------------------------------------------------------
  451. Bra
  452. ^
  453. [\x{100}-\x{150}]
  454. Ket
  455. End
  456. ------------------------------------------------------------------
  457. /^abc./gmx,newline=any,utf
  458. abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK
  459. 0: abc1
  460. 0: abc2
  461. 0: abc3
  462. 0: abc4
  463. 0: abc5
  464. 0: abc6
  465. 0: abc7
  466. 0: abc8
  467. 0: abc9
  468. /abc.$/gmx,newline=any,utf
  469. abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9
  470. 0: abc1
  471. 0: abc2
  472. 0: abc3
  473. 0: abc4
  474. 0: abc5
  475. 0: abc6
  476. 0: abc7
  477. 0: abc8
  478. 0: abc9
  479. /^a\Rb/bsr=unicode,utf
  480. a\nb
  481. 0: a\x{0a}b
  482. a\rb
  483. 0: a\x{0d}b
  484. a\r\nb
  485. 0: a\x{0d}\x{0a}b
  486. a\x0bb
  487. 0: a\x{0b}b
  488. a\x0cb
  489. 0: a\x{0c}b
  490. a\x{85}b
  491. 0: a\x{85}b
  492. a\x{2028}b
  493. 0: a\x{2028}b
  494. a\x{2029}b
  495. 0: a\x{2029}b
  496. \= Expect no match
  497. a\n\rb
  498. No match
  499. /^a\R*b/bsr=unicode,utf
  500. ab
  501. 0: ab
  502. a\nb
  503. 0: a\x{0a}b
  504. a\rb
  505. 0: a\x{0d}b
  506. a\r\nb
  507. 0: a\x{0d}\x{0a}b
  508. a\x0bb
  509. 0: a\x{0b}b
  510. a\x0c\x{2028}\x{2029}b
  511. 0: a\x{0c}\x{2028}\x{2029}b
  512. a\x{85}b
  513. 0: a\x{85}b
  514. a\n\rb
  515. 0: a\x{0a}\x{0d}b
  516. a\n\r\x{85}\x0cb
  517. 0: a\x{0a}\x{0d}\x{85}\x{0c}b
  518. /^a\R+b/bsr=unicode,utf
  519. a\nb
  520. 0: a\x{0a}b
  521. a\rb
  522. 0: a\x{0d}b
  523. a\r\nb
  524. 0: a\x{0d}\x{0a}b
  525. a\x0bb
  526. 0: a\x{0b}b
  527. a\x0c\x{2028}\x{2029}b
  528. 0: a\x{0c}\x{2028}\x{2029}b
  529. a\x{85}b
  530. 0: a\x{85}b
  531. a\n\rb
  532. 0: a\x{0a}\x{0d}b
  533. a\n\r\x{85}\x0cb
  534. 0: a\x{0a}\x{0d}\x{85}\x{0c}b
  535. \= Expect no match
  536. ab
  537. No match
  538. /^a\R{1,3}b/bsr=unicode,utf
  539. a\nb
  540. 0: a\x{0a}b
  541. a\n\rb
  542. 0: a\x{0a}\x{0d}b
  543. a\n\r\x{85}b
  544. 0: a\x{0a}\x{0d}\x{85}b
  545. a\r\n\r\nb
  546. 0: a\x{0d}\x{0a}\x{0d}\x{0a}b
  547. a\r\n\r\n\r\nb
  548. 0: a\x{0d}\x{0a}\x{0d}\x{0a}\x{0d}\x{0a}b
  549. a\n\r\n\rb
  550. 0: a\x{0a}\x{0d}\x{0a}\x{0d}b
  551. a\n\n\r\nb
  552. 0: a\x{0a}\x{0a}\x{0d}\x{0a}b
  553. \= Expect no match
  554. a\n\n\n\rb
  555. No match
  556. a\r
  557. No match
  558. /\H\h\V\v/utf
  559. X X\x0a
  560. 0: X X\x{0a}
  561. X\x09X\x0b
  562. 0: X\x{09}X\x{0b}
  563. \= Expect no match
  564. \x{a0} X\x0a
  565. No match
  566. /\H*\h+\V?\v{3,4}/utf
  567. \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a
  568. 0: \x{09} \x{a0}X\x{0a}\x{0b}\x{0c}\x{0d}
  569. \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a
  570. 0: \x{09} \x{a0}\x{0a}\x{0b}\x{0c}\x{0d}
  571. \x09\x20\x{a0}\x0a\x0b\x0c
  572. 0: \x{09} \x{a0}\x{0a}\x{0b}\x{0c}
  573. \= Expect no match
  574. \x09\x20\x{a0}\x0a\x0b
  575. No match
  576. /\H\h\V\v/utf
  577. \x{3001}\x{3000}\x{2030}\x{2028}
  578. 0: \x{3001}\x{3000}\x{2030}\x{2028}
  579. X\x{180e}X\x{85}
  580. 0: X\x{180e}X\x{85}
  581. \= Expect no match
  582. \x{2009} X\x0a
  583. No match
  584. /\H*\h+\V?\v{3,4}/utf
  585. \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a
  586. 0: \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x{0c}\x{0d}
  587. \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a
  588. 0: \x{09}\x{205f}\x{a0}\x{0a}\x{2029}\x{0c}\x{2028}
  589. \x09\x20\x{202f}\x0a\x0b\x0c
  590. 0: \x{09} \x{202f}\x{0a}\x{0b}\x{0c}
  591. \= Expect no match
  592. \x09\x{200a}\x{a0}\x{2028}\x0b
  593. No match
  594. /[\h]/B,utf
  595. ------------------------------------------------------------------
  596. Bra
  597. [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
  598. Ket
  599. End
  600. ------------------------------------------------------------------
  601. >\x{1680}
  602. 0: \x{1680}
  603. /[\h]{3,}/B,utf
  604. ------------------------------------------------------------------
  605. Bra
  606. [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]{3,}+
  607. Ket
  608. End
  609. ------------------------------------------------------------------
  610. >\x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000}<
  611. 0: \x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000}
  612. /[\v]/B,utf
  613. ------------------------------------------------------------------
  614. Bra
  615. [\x0a-\x0d\x85\x{2028}-\x{2029}]
  616. Ket
  617. End
  618. ------------------------------------------------------------------
  619. /[\H]/B,utf
  620. ------------------------------------------------------------------
  621. Bra
  622. [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}]
  623. Ket
  624. End
  625. ------------------------------------------------------------------
  626. /[\V]/B,utf
  627. ------------------------------------------------------------------
  628. Bra
  629. [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}]
  630. Ket
  631. End
  632. ------------------------------------------------------------------
  633. /.*$/newline=any,utf
  634. \x{1ec5}
  635. 0: \x{1ec5}
  636. /a\Rb/I,bsr=anycrlf,utf
  637. Capture group count = 0
  638. Options: utf
  639. \R matches CR, LF, or CRLF
  640. First code unit = 'a'
  641. Last code unit = 'b'
  642. Subject length lower bound = 3
  643. a\rb
  644. 0: a\x{0d}b
  645. a\nb
  646. 0: a\x{0a}b
  647. a\r\nb
  648. 0: a\x{0d}\x{0a}b
  649. \= Expect no match
  650. a\x{85}b
  651. No match
  652. a\x0bb
  653. No match
  654. /a\Rb/I,bsr=unicode,utf
  655. Capture group count = 0
  656. Options: utf
  657. \R matches any Unicode newline
  658. First code unit = 'a'
  659. Last code unit = 'b'
  660. Subject length lower bound = 3
  661. a\rb
  662. 0: a\x{0d}b
  663. a\nb
  664. 0: a\x{0a}b
  665. a\r\nb
  666. 0: a\x{0d}\x{0a}b
  667. a\x{85}b
  668. 0: a\x{85}b
  669. a\x0bb
  670. 0: a\x{0b}b
  671. /a\R?b/I,bsr=anycrlf,utf
  672. Capture group count = 0
  673. Options: utf
  674. \R matches CR, LF, or CRLF
  675. First code unit = 'a'
  676. Last code unit = 'b'
  677. Subject length lower bound = 2
  678. a\rb
  679. 0: a\x{0d}b
  680. a\nb
  681. 0: a\x{0a}b
  682. a\r\nb
  683. 0: a\x{0d}\x{0a}b
  684. \= Expect no match
  685. a\x{85}b
  686. No match
  687. a\x0bb
  688. No match
  689. /a\R?b/I,bsr=unicode,utf
  690. Capture group count = 0
  691. Options: utf
  692. \R matches any Unicode newline
  693. First code unit = 'a'
  694. Last code unit = 'b'
  695. Subject length lower bound = 2
  696. a\rb
  697. 0: a\x{0d}b
  698. a\nb
  699. 0: a\x{0a}b
  700. a\r\nb
  701. 0: a\x{0d}\x{0a}b
  702. a\x{85}b
  703. 0: a\x{85}b
  704. a\x0bb
  705. 0: a\x{0b}b
  706. /.*a.*=.b.*/utf,newline=any
  707. QQQ\x{2029}ABCaXYZ=!bPQR
  708. 0: ABCaXYZ=!bPQR
  709. \= Expect no match
  710. a\x{2029}b
  711. No match
  712. \x61\xe2\x80\xa9\x62
  713. No match
  714. /[[:a\x{100}b:]]/utf
  715. Failed: error 130 at offset 3: unknown POSIX class name
  716. /a[^]b/utf,allow_empty_class,match_unset_backref
  717. a\x{1234}b
  718. 0: a\x{1234}b
  719. a\nb
  720. 0: a\x{0a}b
  721. \= Expect no match
  722. ab
  723. No match
  724. /a[^]+b/utf,allow_empty_class,match_unset_backref
  725. aXb
  726. 0: aXb
  727. a\nX\nX\x{1234}b
  728. 0: a\x{0a}X\x{0a}X\x{1234}b
  729. \= Expect no match
  730. ab
  731. No match
  732. /(\x{de})\1/
  733. \x{de}\x{de}
  734. 0: \xde\xde
  735. 1: \xde
  736. /X/newline=any,utf,firstline
  737. A\x{1ec5}ABCXYZ
  738. 0: X
  739. /Xa{2,4}b/utf
  740. X\=ps
  741. Partial match: X
  742. Xa\=ps
  743. Partial match: Xa
  744. Xaa\=ps
  745. Partial match: Xaa
  746. Xaaa\=ps
  747. Partial match: Xaaa
  748. Xaaaa\=ps
  749. Partial match: Xaaaa
  750. /Xa{2,4}?b/utf
  751. X\=ps
  752. Partial match: X
  753. Xa\=ps
  754. Partial match: Xa
  755. Xaa\=ps
  756. Partial match: Xaa
  757. Xaaa\=ps
  758. Partial match: Xaaa
  759. Xaaaa\=ps
  760. Partial match: Xaaaa
  761. /Xa{2,4}+b/utf
  762. X\=ps
  763. Partial match: X
  764. Xa\=ps
  765. Partial match: Xa
  766. Xaa\=ps
  767. Partial match: Xaa
  768. Xaaa\=ps
  769. Partial match: Xaaa
  770. Xaaaa\=ps
  771. Partial match: Xaaaa
  772. /X\x{123}{2,4}b/utf
  773. X\=ps
  774. Partial match: X
  775. X\x{123}\=ps
  776. Partial match: X\x{123}
  777. X\x{123}\x{123}\=ps
  778. Partial match: X\x{123}\x{123}
  779. X\x{123}\x{123}\x{123}\=ps
  780. Partial match: X\x{123}\x{123}\x{123}
  781. X\x{123}\x{123}\x{123}\x{123}\=ps
  782. Partial match: X\x{123}\x{123}\x{123}\x{123}
  783. /X\x{123}{2,4}?b/utf
  784. X\=ps
  785. Partial match: X
  786. X\x{123}\=ps
  787. Partial match: X\x{123}
  788. X\x{123}\x{123}\=ps
  789. Partial match: X\x{123}\x{123}
  790. X\x{123}\x{123}\x{123}\=ps
  791. Partial match: X\x{123}\x{123}\x{123}
  792. X\x{123}\x{123}\x{123}\x{123}\=ps
  793. Partial match: X\x{123}\x{123}\x{123}\x{123}
  794. /X\x{123}{2,4}+b/utf
  795. X\=ps
  796. Partial match: X
  797. X\x{123}\=ps
  798. Partial match: X\x{123}
  799. X\x{123}\x{123}\=ps
  800. Partial match: X\x{123}\x{123}
  801. X\x{123}\x{123}\x{123}\=ps
  802. Partial match: X\x{123}\x{123}\x{123}
  803. X\x{123}\x{123}\x{123}\x{123}\=ps
  804. Partial match: X\x{123}\x{123}\x{123}\x{123}
  805. /X\x{123}{2,4}b/utf
  806. \= Expect no match
  807. Xx\=ps
  808. No match
  809. X\x{123}x\=ps
  810. No match
  811. X\x{123}\x{123}x\=ps
  812. No match
  813. X\x{123}\x{123}\x{123}x\=ps
  814. No match
  815. X\x{123}\x{123}\x{123}\x{123}x\=ps
  816. No match
  817. /X\x{123}{2,4}?b/utf
  818. \= Expect no match
  819. Xx\=ps
  820. No match
  821. X\x{123}x\=ps
  822. No match
  823. X\x{123}\x{123}x\=ps
  824. No match
  825. X\x{123}\x{123}\x{123}x\=ps
  826. No match
  827. X\x{123}\x{123}\x{123}\x{123}x\=ps
  828. No match
  829. /X\x{123}{2,4}+b/utf
  830. \= Expect no match
  831. Xx\=ps
  832. No match
  833. X\x{123}x\=ps
  834. No match
  835. X\x{123}\x{123}x\=ps
  836. No match
  837. X\x{123}\x{123}\x{123}x\=ps
  838. No match
  839. X\x{123}\x{123}\x{123}\x{123}x\=ps
  840. No match
  841. /X\d{2,4}b/utf
  842. X\=ps
  843. Partial match: X
  844. X3\=ps
  845. Partial match: X3
  846. X33\=ps
  847. Partial match: X33
  848. X333\=ps
  849. Partial match: X333
  850. X3333\=ps
  851. Partial match: X3333
  852. /X\d{2,4}?b/utf
  853. X\=ps
  854. Partial match: X
  855. X3\=ps
  856. Partial match: X3
  857. X33\=ps
  858. Partial match: X33
  859. X333\=ps
  860. Partial match: X333
  861. X3333\=ps
  862. Partial match: X3333
  863. /X\d{2,4}+b/utf
  864. X\=ps
  865. Partial match: X
  866. X3\=ps
  867. Partial match: X3
  868. X33\=ps
  869. Partial match: X33
  870. X333\=ps
  871. Partial match: X333
  872. X3333\=ps
  873. Partial match: X3333
  874. /X\D{2,4}b/utf
  875. X\=ps
  876. Partial match: X
  877. Xa\=ps
  878. Partial match: Xa
  879. Xaa\=ps
  880. Partial match: Xaa
  881. Xaaa\=ps
  882. Partial match: Xaaa
  883. Xaaaa\=ps
  884. Partial match: Xaaaa
  885. /X\D{2,4}?b/utf
  886. X\=ps
  887. Partial match: X
  888. Xa\=ps
  889. Partial match: Xa
  890. Xaa\=ps
  891. Partial match: Xaa
  892. Xaaa\=ps
  893. Partial match: Xaaa
  894. Xaaaa\=ps
  895. Partial match: Xaaaa
  896. /X\D{2,4}+b/utf
  897. X\=ps
  898. Partial match: X
  899. Xa\=ps
  900. Partial match: Xa
  901. Xaa\=ps
  902. Partial match: Xaa
  903. Xaaa\=ps
  904. Partial match: Xaaa
  905. Xaaaa\=ps
  906. Partial match: Xaaaa
  907. /X\D{2,4}b/utf
  908. X\=ps
  909. Partial match: X
  910. X\x{123}\=ps
  911. Partial match: X\x{123}
  912. X\x{123}\x{123}\=ps
  913. Partial match: X\x{123}\x{123}
  914. X\x{123}\x{123}\x{123}\=ps
  915. Partial match: X\x{123}\x{123}\x{123}
  916. X\x{123}\x{123}\x{123}\x{123}\=ps
  917. Partial match: X\x{123}\x{123}\x{123}\x{123}
  918. /X\D{2,4}?b/utf
  919. X\=ps
  920. Partial match: X
  921. X\x{123}\=ps
  922. Partial match: X\x{123}
  923. X\x{123}\x{123}\=ps
  924. Partial match: X\x{123}\x{123}
  925. X\x{123}\x{123}\x{123}\=ps
  926. Partial match: X\x{123}\x{123}\x{123}
  927. X\x{123}\x{123}\x{123}\x{123}\=ps
  928. Partial match: X\x{123}\x{123}\x{123}\x{123}
  929. /X\D{2,4}+b/utf
  930. X\=ps
  931. Partial match: X
  932. X\x{123}\=ps
  933. Partial match: X\x{123}
  934. X\x{123}\x{123}\=ps
  935. Partial match: X\x{123}\x{123}
  936. X\x{123}\x{123}\x{123}\=ps
  937. Partial match: X\x{123}\x{123}\x{123}
  938. X\x{123}\x{123}\x{123}\x{123}\=ps
  939. Partial match: X\x{123}\x{123}\x{123}\x{123}
  940. /X[abc]{2,4}b/utf
  941. X\=ps
  942. Partial match: X
  943. Xa\=ps
  944. Partial match: Xa
  945. Xaa\=ps
  946. Partial match: Xaa
  947. Xaaa\=ps
  948. Partial match: Xaaa
  949. Xaaaa\=ps
  950. Partial match: Xaaaa
  951. /X[abc]{2,4}?b/utf
  952. X\=ps
  953. Partial match: X
  954. Xa\=ps
  955. Partial match: Xa
  956. Xaa\=ps
  957. Partial match: Xaa
  958. Xaaa\=ps
  959. Partial match: Xaaa
  960. Xaaaa\=ps
  961. Partial match: Xaaaa
  962. /X[abc]{2,4}+b/utf
  963. X\=ps
  964. Partial match: X
  965. Xa\=ps
  966. Partial match: Xa
  967. Xaa\=ps
  968. Partial match: Xaa
  969. Xaaa\=ps
  970. Partial match: Xaaa
  971. Xaaaa\=ps
  972. Partial match: Xaaaa
  973. /X[abc\x{123}]{2,4}b/utf
  974. X\=ps
  975. Partial match: X
  976. X\x{123}\=ps
  977. Partial match: X\x{123}
  978. X\x{123}\x{123}\=ps
  979. Partial match: X\x{123}\x{123}
  980. X\x{123}\x{123}\x{123}\=ps
  981. Partial match: X\x{123}\x{123}\x{123}
  982. X\x{123}\x{123}\x{123}\x{123}\=ps
  983. Partial match: X\x{123}\x{123}\x{123}\x{123}
  984. /X[abc\x{123}]{2,4}?b/utf
  985. X\=ps
  986. Partial match: X
  987. X\x{123}\=ps
  988. Partial match: X\x{123}
  989. X\x{123}\x{123}\=ps
  990. Partial match: X\x{123}\x{123}
  991. X\x{123}\x{123}\x{123}\=ps
  992. Partial match: X\x{123}\x{123}\x{123}
  993. X\x{123}\x{123}\x{123}\x{123}\=ps
  994. Partial match: X\x{123}\x{123}\x{123}\x{123}
  995. /X[abc\x{123}]{2,4}+b/utf
  996. X\=ps
  997. Partial match: X
  998. X\x{123}\=ps
  999. Partial match: X\x{123}
  1000. X\x{123}\x{123}\=ps
  1001. Partial match: X\x{123}\x{123}
  1002. X\x{123}\x{123}\x{123}\=ps
  1003. Partial match: X\x{123}\x{123}\x{123}
  1004. X\x{123}\x{123}\x{123}\x{123}\=ps
  1005. Partial match: X\x{123}\x{123}\x{123}\x{123}
  1006. /X[^a]{2,4}b/utf
  1007. X\=ps
  1008. Partial match: X
  1009. Xz\=ps
  1010. Partial match: Xz
  1011. Xzz\=ps
  1012. Partial match: Xzz
  1013. Xzzz\=ps
  1014. Partial match: Xzzz
  1015. Xzzzz\=ps
  1016. Partial match: Xzzzz
  1017. /X[^a]{2,4}?b/utf
  1018. X\=ps
  1019. Partial match: X
  1020. Xz\=ps
  1021. Partial match: Xz
  1022. Xzz\=ps
  1023. Partial match: Xzz
  1024. Xzzz\=ps
  1025. Partial match: Xzzz
  1026. Xzzzz\=ps
  1027. Partial match: Xzzzz
  1028. /X[^a]{2,4}+b/utf
  1029. X\=ps
  1030. Partial match: X
  1031. Xz\=ps
  1032. Partial match: Xz
  1033. Xzz\=ps
  1034. Partial match: Xzz
  1035. Xzzz\=ps
  1036. Partial match: Xzzz
  1037. Xzzzz\=ps
  1038. Partial match: Xzzzz
  1039. /X[^a]{2,4}b/utf
  1040. X\=ps
  1041. Partial match: X
  1042. X\x{123}\=ps
  1043. Partial match: X\x{123}
  1044. X\x{123}\x{123}\=ps
  1045. Partial match: X\x{123}\x{123}
  1046. X\x{123}\x{123}\x{123}\=ps
  1047. Partial match: X\x{123}\x{123}\x{123}
  1048. X\x{123}\x{123}\x{123}\x{123}\=ps
  1049. Partial match: X\x{123}\x{123}\x{123}\x{123}
  1050. /X[^a]{2,4}?b/utf
  1051. X\=ps
  1052. Partial match: X
  1053. X\x{123}\=ps
  1054. Partial match: X\x{123}
  1055. X\x{123}\x{123}\=ps
  1056. Partial match: X\x{123}\x{123}
  1057. X\x{123}\x{123}\x{123}\=ps
  1058. Partial match: X\x{123}\x{123}\x{123}
  1059. X\x{123}\x{123}\x{123}\x{123}\=ps
  1060. Partial match: X\x{123}\x{123}\x{123}\x{123}
  1061. /X[^a]{2,4}+b/utf
  1062. X\=ps
  1063. Partial match: X
  1064. X\x{123}\=ps
  1065. Partial match: X\x{123}
  1066. X\x{123}\x{123}\=ps
  1067. Partial match: X\x{123}\x{123}
  1068. X\x{123}\x{123}\x{123}\=ps
  1069. Partial match: X\x{123}\x{123}\x{123}
  1070. X\x{123}\x{123}\x{123}\x{123}\=ps
  1071. Partial match: X\x{123}\x{123}\x{123}\x{123}
  1072. /(Y)X\1{2,4}b/utf
  1073. YX\=ps
  1074. Partial match: YX
  1075. YXY\=ps
  1076. Partial match: YXY
  1077. YXYY\=ps
  1078. Partial match: YXYY
  1079. YXYYY\=ps
  1080. Partial match: YXYYY
  1081. YXYYYY\=ps
  1082. Partial match: YXYYYY
  1083. /(Y)X\1{2,4}?b/utf
  1084. YX\=ps
  1085. Partial match: YX
  1086. YXY\=ps
  1087. Partial match: YXY
  1088. YXYY\=ps
  1089. Partial match: YXYY
  1090. YXYYY\=ps
  1091. Partial match: YXYYY
  1092. YXYYYY\=ps
  1093. Partial match: YXYYYY
  1094. /(Y)X\1{2,4}+b/utf
  1095. YX\=ps
  1096. Partial match: YX
  1097. YXY\=ps
  1098. Partial match: YXY
  1099. YXYY\=ps
  1100. Partial match: YXYY
  1101. YXYYY\=ps
  1102. Partial match: YXYYY
  1103. YXYYYY\=ps
  1104. Partial match: YXYYYY
  1105. /(\x{123})X\1{2,4}b/utf
  1106. \x{123}X\=ps
  1107. Partial match: \x{123}X
  1108. \x{123}X\x{123}\=ps
  1109. Partial match: \x{123}X\x{123}
  1110. \x{123}X\x{123}\x{123}\=ps
  1111. Partial match: \x{123}X\x{123}\x{123}
  1112. \x{123}X\x{123}\x{123}\x{123}\=ps
  1113. Partial match: \x{123}X\x{123}\x{123}\x{123}
  1114. \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
  1115. Partial match: \x{123}X\x{123}\x{123}\x{123}\x{123}
  1116. /(\x{123})X\1{2,4}?b/utf
  1117. \x{123}X\=ps
  1118. Partial match: \x{123}X
  1119. \x{123}X\x{123}\=ps
  1120. Partial match: \x{123}X\x{123}
  1121. \x{123}X\x{123}\x{123}\=ps
  1122. Partial match: \x{123}X\x{123}\x{123}
  1123. \x{123}X\x{123}\x{123}\x{123}\=ps
  1124. Partial match: \x{123}X\x{123}\x{123}\x{123}
  1125. \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
  1126. Partial match: \x{123}X\x{123}\x{123}\x{123}\x{123}
  1127. /(\x{123})X\1{2,4}+b/utf
  1128. \x{123}X\=ps
  1129. Partial match: \x{123}X
  1130. \x{123}X\x{123}\=ps
  1131. Partial match: \x{123}X\x{123}
  1132. \x{123}X\x{123}\x{123}\=ps
  1133. Partial match: \x{123}X\x{123}\x{123}
  1134. \x{123}X\x{123}\x{123}\x{123}\=ps
  1135. Partial match: \x{123}X\x{123}\x{123}\x{123}
  1136. \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
  1137. Partial match: \x{123}X\x{123}\x{123}\x{123}\x{123}
  1138. /\bthe cat\b/utf
  1139. the cat\=ps
  1140. 0: the cat
  1141. the cat\=ph
  1142. Partial match: the cat
  1143. /abcd*/utf
  1144. xxxxabcd\=ps
  1145. 0: abcd
  1146. xxxxabcd\=ph
  1147. Partial match: abcd
  1148. /abcd*/i,utf
  1149. xxxxabcd\=ps
  1150. 0: abcd
  1151. xxxxabcd\=ph
  1152. Partial match: abcd
  1153. XXXXABCD\=ps
  1154. 0: ABCD
  1155. XXXXABCD\=ph
  1156. Partial match: ABCD
  1157. /abc\d*/utf
  1158. xxxxabc1\=ps
  1159. 0: abc1
  1160. xxxxabc1\=ph
  1161. Partial match: abc1
  1162. /(a)bc\1*/utf
  1163. xxxxabca\=ps
  1164. 0: abca
  1165. 1: a
  1166. xxxxabca\=ph
  1167. Partial match: abca
  1168. /abc[de]*/utf
  1169. xxxxabcde\=ps
  1170. 0: abcde
  1171. xxxxabcde\=ph
  1172. Partial match: abcde
  1173. /X\W{3}X/utf
  1174. X\=ps
  1175. Partial match: X
  1176. /\sxxx\s/utf,tables=2
  1177. AB\x{85}xxx\x{a0}XYZ
  1178. 0: \x{85}xxx\x{a0}
  1179. AB\x{a0}xxx\x{85}XYZ
  1180. 0: \x{a0}xxx\x{85}
  1181. /\S \S/utf,tables=2
  1182. \x{a2} \x{84}
  1183. 0: \x{a2} \x{84}
  1184. 'A#хц'Bx,newline=any,utf
  1185. ------------------------------------------------------------------
  1186. Bra
  1187. A
  1188. Ket
  1189. End
  1190. ------------------------------------------------------------------
  1191. 'A#хц
  1192. PQ'Bx,newline=any,utf
  1193. ------------------------------------------------------------------
  1194. Bra
  1195. APQ
  1196. Ket
  1197. End
  1198. ------------------------------------------------------------------
  1199. /a+#хaa
  1200. z#XX?/Bx,newline=any,utf
  1201. ------------------------------------------------------------------
  1202. Bra
  1203. a++
  1204. z
  1205. Ket
  1206. End
  1207. ------------------------------------------------------------------
  1208. /a+#хaa
  1209. z#х?/Bx,newline=any,utf
  1210. ------------------------------------------------------------------
  1211. Bra
  1212. a++
  1213. z
  1214. Ket
  1215. End
  1216. ------------------------------------------------------------------
  1217. /\g{A}xxx#bXX(?'A'123) (?'A'456)/Bx,newline=any,utf
  1218. ------------------------------------------------------------------
  1219. Bra
  1220. \1
  1221. xxx
  1222. CBra 1
  1223. 456
  1224. Ket
  1225. Ket
  1226. End
  1227. ------------------------------------------------------------------
  1228. /\g{A}xxx#bх(?'A'123) (?'A'456)/Bx,newline=any,utf
  1229. ------------------------------------------------------------------
  1230. Bra
  1231. \1
  1232. xxx
  1233. CBra 1
  1234. 456
  1235. Ket
  1236. Ket
  1237. End
  1238. ------------------------------------------------------------------
  1239. /^\cģ/utf
  1240. Failed: error 168 at offset 3: \c must be followed by a printable ASCII character
  1241. /(\R*)(.)/s,utf
  1242. \r\n
  1243. 0: \x{0d}
  1244. 1:
  1245. 2: \x{0d}
  1246. \r\r\n\n\r
  1247. 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d}
  1248. 1: \x{0d}\x{0d}\x{0a}\x{0a}
  1249. 2: \x{0d}
  1250. \r\r\n\n\r\n
  1251. 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d}
  1252. 1: \x{0d}\x{0d}\x{0a}\x{0a}
  1253. 2: \x{0d}
  1254. /(\R)*(.)/s,utf
  1255. \r\n
  1256. 0: \x{0d}
  1257. 1: <unset>
  1258. 2: \x{0d}
  1259. \r\r\n\n\r
  1260. 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d}
  1261. 1: \x{0a}
  1262. 2: \x{0d}
  1263. \r\r\n\n\r\n
  1264. 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d}
  1265. 1: \x{0a}
  1266. 2: \x{0d}
  1267. /[^\x{1234}]+/Ii,utf
  1268. Capture group count = 0
  1269. Options: caseless utf
  1270. Subject length lower bound = 1
  1271. /[^\x{1234}]+?/Ii,utf
  1272. Capture group count = 0
  1273. Options: caseless utf
  1274. Subject length lower bound = 1
  1275. /[^\x{1234}]++/Ii,utf
  1276. Capture group count = 0
  1277. Options: caseless utf
  1278. Subject length lower bound = 1
  1279. /[^\x{1234}]{2}/Ii,utf
  1280. Capture group count = 0
  1281. Options: caseless utf
  1282. Subject length lower bound = 2
  1283. /f.*/
  1284. for\=ph
  1285. Partial match: for
  1286. /f.*/s
  1287. for\=ph
  1288. Partial match: for
  1289. /f.*/utf
  1290. for\=ph
  1291. Partial match: for
  1292. /f.*/s,utf
  1293. for\=ph
  1294. Partial match: for
  1295. /\x{d7ff}\x{e000}/utf
  1296. /\x{d800}/utf
  1297. Failed: error 173 at offset 7: disallowed Unicode code point (>= 0xd800 && <= 0xdfff)
  1298. /\x{dfff}/utf
  1299. Failed: error 173 at offset 7: disallowed Unicode code point (>= 0xd800 && <= 0xdfff)
  1300. /\h+/utf
  1301. \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
  1302. 0: \x{1680}\x{2000}\x{202f}\x{3000}
  1303. \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000}
  1304. 0: \x{200a}\x{a0}\x{2000}
  1305. /[\h\x{e000}]+/B,utf
  1306. ------------------------------------------------------------------
  1307. Bra
  1308. [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{e000}]++
  1309. Ket
  1310. End
  1311. ------------------------------------------------------------------
  1312. \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
  1313. 0: \x{1680}\x{2000}\x{202f}\x{3000}
  1314. \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000}
  1315. 0: \x{200a}\x{a0}\x{2000}
  1316. /\H+/utf
  1317. \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
  1318. 0: \x{167f}\x{1681}\x{180d}\x{180f}
  1319. \x{2000}\x{200a}\x{1fff}\x{200b}
  1320. 0: \x{1fff}\x{200b}
  1321. \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
  1322. 0: \x{202e}\x{2030}\x{205e}\x{2060}
  1323. \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001}
  1324. 0: \x{9f}\x{a1}\x{2fff}\x{3001}
  1325. /[\H\x{d7ff}]+/B,utf
  1326. ------------------------------------------------------------------
  1327. Bra
  1328. [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}\x{d7ff}]++
  1329. Ket
  1330. End
  1331. ------------------------------------------------------------------
  1332. \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
  1333. 0: \x{167f}\x{1681}\x{180d}\x{180f}
  1334. \x{2000}\x{200a}\x{1fff}\x{200b}
  1335. 0: \x{1fff}\x{200b}
  1336. \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
  1337. 0: \x{202e}\x{2030}\x{205e}\x{2060}
  1338. \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001}
  1339. 0: \x{9f}\x{a1}\x{2fff}\x{3001}
  1340. /\v+/utf
  1341. \x{2027}\x{2030}\x{2028}\x{2029}
  1342. 0: \x{2028}\x{2029}
  1343. \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
  1344. 0: \x{85}\x{0a}\x{0b}\x{0c}\x{0d}
  1345. /[\v\x{e000}]+/B,utf
  1346. ------------------------------------------------------------------
  1347. Bra
  1348. [\x0a-\x0d\x85\x{2028}-\x{2029}\x{e000}]++
  1349. Ket
  1350. End
  1351. ------------------------------------------------------------------
  1352. \x{2027}\x{2030}\x{2028}\x{2029}
  1353. 0: \x{2028}\x{2029}
  1354. \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
  1355. 0: \x{85}\x{0a}\x{0b}\x{0c}\x{0d}
  1356. /\V+/utf
  1357. \x{2028}\x{2029}\x{2027}\x{2030}
  1358. 0: \x{2027}\x{2030}
  1359. \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86}
  1360. 0: \x{09}\x{0e}\x{84}\x{86}
  1361. /[\V\x{d7ff}]+/B,utf
  1362. ------------------------------------------------------------------
  1363. Bra
  1364. [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}\x{d7ff}]++
  1365. Ket
  1366. End
  1367. ------------------------------------------------------------------
  1368. \x{2028}\x{2029}\x{2027}\x{2030}
  1369. 0: \x{2027}\x{2030}
  1370. \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86}
  1371. 0: \x{09}\x{0e}\x{84}\x{86}
  1372. /\R+/bsr=unicode,utf
  1373. \x{2027}\x{2030}\x{2028}\x{2029}
  1374. 0: \x{2028}\x{2029}
  1375. \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
  1376. 0: \x{85}\x{0a}\x{0b}\x{0c}\x{0d}
  1377. /(..)\1/utf
  1378. ab\=ps
  1379. Partial match: ab
  1380. aba\=ps
  1381. Partial match: aba
  1382. abab\=ps
  1383. 0: abab
  1384. 1: ab
  1385. /(..)\1/i,utf
  1386. ab\=ps
  1387. Partial match: ab
  1388. abA\=ps
  1389. Partial match: abA
  1390. aBAb\=ps
  1391. 0: aBAb
  1392. 1: aB
  1393. /(..)\1{2,}/utf
  1394. ab\=ps
  1395. Partial match: ab
  1396. aba\=ps
  1397. Partial match: aba
  1398. abab\=ps
  1399. Partial match: abab
  1400. ababa\=ps
  1401. Partial match: ababa
  1402. ababab\=ps
  1403. 0: ababab
  1404. 1: ab
  1405. ababab\=ph
  1406. Partial match: ababab
  1407. abababa\=ps
  1408. 0: ababab
  1409. 1: ab
  1410. abababa\=ph
  1411. Partial match: abababa
  1412. /(..)\1{2,}/i,utf
  1413. ab\=ps
  1414. Partial match: ab
  1415. aBa\=ps
  1416. Partial match: aBa
  1417. aBAb\=ps
  1418. Partial match: aBAb
  1419. AbaBA\=ps
  1420. Partial match: AbaBA
  1421. abABAb\=ps
  1422. 0: abABAb
  1423. 1: ab
  1424. aBAbaB\=ph
  1425. Partial match: aBAbaB
  1426. abABabA\=ps
  1427. 0: abABab
  1428. 1: ab
  1429. abaBABa\=ph
  1430. Partial match: abaBABa
  1431. /(..)\1{2,}?x/i,utf
  1432. ab\=ps
  1433. Partial match: ab
  1434. abA\=ps
  1435. Partial match: abA
  1436. aBAb\=ps
  1437. Partial match: aBAb
  1438. abaBA\=ps
  1439. Partial match: abaBA
  1440. abAbaB\=ps
  1441. Partial match: abAbaB
  1442. abaBabA\=ps
  1443. Partial match: abaBabA
  1444. abAbABaBx\=ps
  1445. 0: abAbABaBx
  1446. 1: ab
  1447. /./utf,newline=crlf
  1448. \r\=ps
  1449. 0: \x{0d}
  1450. \r\=ph
  1451. Partial match: \x{0d}
  1452. /.{2,3}/utf,newline=crlf
  1453. \r\=ps
  1454. Partial match: \x{0d}
  1455. \r\=ph
  1456. Partial match: \x{0d}
  1457. \r\r\=ps
  1458. 0: \x{0d}\x{0d}
  1459. \r\r\=ph
  1460. Partial match: \x{0d}\x{0d}
  1461. \r\r\r\=ps
  1462. 0: \x{0d}\x{0d}\x{0d}
  1463. \r\r\r\=ph
  1464. Partial match: \x{0d}\x{0d}\x{0d}
  1465. /.{2,3}?/utf,newline=crlf
  1466. \r\=ps
  1467. Partial match: \x{0d}
  1468. \r\=ph
  1469. Partial match: \x{0d}
  1470. \r\r\=ps
  1471. 0: \x{0d}\x{0d}
  1472. \r\r\=ph
  1473. Partial match: \x{0d}\x{0d}
  1474. \r\r\r\=ps
  1475. 0: \x{0d}\x{0d}
  1476. \r\r\r\=ph
  1477. 0: \x{0d}\x{0d}
  1478. /[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/B,utf
  1479. ------------------------------------------------------------------
  1480. Bra
  1481. [^\x{100}]
  1482. [^\x{1234}]
  1483. [^\x{ffff}]
  1484. [^\x{10000}]
  1485. [^\x{10ffff}]
  1486. Ket
  1487. End
  1488. ------------------------------------------------------------------
  1489. /[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/Bi,utf
  1490. ------------------------------------------------------------------
  1491. Bra
  1492. /i [^\x{100}]
  1493. /i [^\x{1234}]
  1494. /i [^\x{ffff}]
  1495. /i [^\x{10000}]
  1496. /i [^\x{10ffff}]
  1497. Ket
  1498. End
  1499. ------------------------------------------------------------------
  1500. /[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/B,utf
  1501. ------------------------------------------------------------------
  1502. Bra
  1503. [^\x{100}]*
  1504. [^\x{10000}]+
  1505. [^\x{10ffff}]??
  1506. [^\x{8000}]{4}
  1507. [^\x{8000}]*
  1508. [^\x{7fff}]{2}
  1509. [^\x{7fff}]{0,7}?
  1510. [^\x{fffff}]{5}
  1511. [^\x{fffff}]?+
  1512. Ket
  1513. End
  1514. ------------------------------------------------------------------
  1515. /[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/Bi,utf
  1516. ------------------------------------------------------------------
  1517. Bra
  1518. /i [^\x{100}]*
  1519. /i [^\x{10000}]+
  1520. /i [^\x{10ffff}]??
  1521. /i [^\x{8000}]{4}
  1522. /i [^\x{8000}]*
  1523. /i [^\x{7fff}]{2}
  1524. /i [^\x{7fff}]{0,7}?
  1525. /i [^\x{fffff}]{5}
  1526. /i [^\x{fffff}]?+
  1527. Ket
  1528. End
  1529. ------------------------------------------------------------------
  1530. /(?<=\x{1234}\x{1234})\bxy/I,utf
  1531. Capture group count = 0
  1532. Max lookbehind = 2
  1533. Options: utf
  1534. First code unit = 'x'
  1535. Last code unit = 'y'
  1536. Subject length lower bound = 2
  1537. /(?<!^)ETA/utf
  1538. \= Expect no match
  1539. ETA
  1540. No match
  1541. /\u0100/B,utf,alt_bsux,allow_empty_class,match_unset_backref
  1542. ------------------------------------------------------------------
  1543. Bra
  1544. \x{100}
  1545. Ket
  1546. End
  1547. ------------------------------------------------------------------
  1548. /[\u0100-\u0200]/B,utf,alt_bsux,allow_empty_class,match_unset_backref
  1549. ------------------------------------------------------------------
  1550. Bra
  1551. [\x{100}-\x{200}]
  1552. Ket
  1553. End
  1554. ------------------------------------------------------------------
  1555. /\ud800/utf,alt_bsux,allow_empty_class,match_unset_backref
  1556. Failed: error 173 at offset 6: disallowed Unicode code point (>= 0xd800 && <= 0xdfff)
  1557. /^\u{0000000000010ffff}/utf,extra_alt_bsux
  1558. \x{10ffff}
  1559. 0: \x{10ffff}
  1560. /\u{ 1bb1}/utf,extra_alt_bsux
  1561. u{ 1bb1}
  1562. 0: u{ 1bb1}
  1563. \= Expect no match
  1564. \x{1bb1}
  1565. No match
  1566. /\u/utf,alt_bsux
  1567. \\u
  1568. 0: u
  1569. /^a+[a\x{200}]/B,utf
  1570. ------------------------------------------------------------------
  1571. Bra
  1572. ^
  1573. a+
  1574. [a\x{200}]
  1575. Ket
  1576. End
  1577. ------------------------------------------------------------------
  1578. aa
  1579. 0: aa
  1580. /[b-d\x{200}-\x{250}]*[ae-h]?#[\x{200}-\x{250}]{0,8}[\x00-\xff]*#[\x{200}-\x{250}]+[a-z]/B,utf
  1581. ------------------------------------------------------------------
  1582. Bra
  1583. [b-d\x{200}-\x{250}]*+
  1584. [ae-h]?+
  1585. #
  1586. [\x{200}-\x{250}]{0,8}+
  1587. [\x00-\xff]*
  1588. #
  1589. [\x{200}-\x{250}]++
  1590. [a-z]
  1591. Ket
  1592. End
  1593. ------------------------------------------------------------------
  1594. /[\p{L}]/IB
  1595. ------------------------------------------------------------------
  1596. Bra
  1597. [\p{L}]
  1598. Ket
  1599. End
  1600. ------------------------------------------------------------------
  1601. Capture group count = 0
  1602. Subject length lower bound = 1
  1603. /[\p{^L}]/IB
  1604. ------------------------------------------------------------------
  1605. Bra
  1606. [\P{L}]
  1607. Ket
  1608. End
  1609. ------------------------------------------------------------------
  1610. Capture group count = 0
  1611. Subject length lower bound = 1
  1612. /[\P{L}]/IB
  1613. ------------------------------------------------------------------
  1614. Bra
  1615. [\P{L}]
  1616. Ket
  1617. End
  1618. ------------------------------------------------------------------
  1619. Capture group count = 0
  1620. Subject length lower bound = 1
  1621. /[\P{^L}]/IB
  1622. ------------------------------------------------------------------
  1623. Bra
  1624. [\p{L}]
  1625. Ket
  1626. End
  1627. ------------------------------------------------------------------
  1628. Capture group count = 0
  1629. Subject length lower bound = 1
  1630. /[abc\p{L}\x{0660}]/IB,utf
  1631. ------------------------------------------------------------------
  1632. Bra
  1633. [a-c\p{L}\x{660}]
  1634. Ket
  1635. End
  1636. ------------------------------------------------------------------
  1637. Capture group count = 0
  1638. Options: utf
  1639. Subject length lower bound = 1
  1640. /[\p{Nd}]/IB,utf
  1641. ------------------------------------------------------------------
  1642. Bra
  1643. [\p{Nd}]
  1644. Ket
  1645. End
  1646. ------------------------------------------------------------------
  1647. Capture group count = 0
  1648. Options: utf
  1649. Subject length lower bound = 1
  1650. 1234
  1651. 0: 1
  1652. /[\p{Nd}+-]+/IB,utf
  1653. ------------------------------------------------------------------
  1654. Bra
  1655. [+\-\p{Nd}]++
  1656. Ket
  1657. End
  1658. ------------------------------------------------------------------
  1659. Capture group count = 0
  1660. Options: utf
  1661. Subject length lower bound = 1
  1662. 1234
  1663. 0: 1234
  1664. 12-34
  1665. 0: 12-34
  1666. 12+\x{661}-34
  1667. 0: 12+\x{661}-34
  1668. \= Expect no match
  1669. abcd
  1670. No match
  1671. /(?:[\PPa*]*){8,}/
  1672. /[\P{Any}]/B
  1673. ------------------------------------------------------------------
  1674. Bra
  1675. [\P{Any}]
  1676. Ket
  1677. End
  1678. ------------------------------------------------------------------
  1679. /[\P{Any}\E]/B
  1680. ------------------------------------------------------------------
  1681. Bra
  1682. [\P{Any}]
  1683. Ket
  1684. End
  1685. ------------------------------------------------------------------
  1686. /(\P{Yi}+\277)/
  1687. /(\P{Yi}+\277)?/
  1688. /(?<=\P{Yi}{3}A)X/
  1689. /\p{Yi}+(\P{Yi}+)(?1)/
  1690. /(\P{Yi}{2}\277)?/
  1691. /[\P{Yi}A]/
  1692. /[\P{Yi}\P{Yi}\P{Yi}A]/
  1693. /[^\P{Yi}A]/
  1694. /[^\P{Yi}\P{Yi}\P{Yi}A]/
  1695. /(\P{Yi}*\277)*/
  1696. /(\P{Yi}*?\277)*/
  1697. /(\p{Yi}*+\277)*/
  1698. /(\P{Yi}?\277)*/
  1699. /(\P{Yi}??\277)*/
  1700. /(\p{Yi}?+\277)*/
  1701. /(\P{Yi}{0,3}\277)*/
  1702. /(\P{Yi}{0,3}?\277)*/
  1703. /(\p{Yi}{0,3}+\277)*/
  1704. /\p{Zl}{2,3}+/B,utf
  1705. ------------------------------------------------------------------
  1706. Bra
  1707. prop Zl {2}
  1708. prop Zl ?+
  1709. Ket
  1710. End
  1711. ------------------------------------------------------------------
  1712. 


  1713. 0: \x{2028}\x{2028}
  1714. \x{2028}\x{2028}\x{2028}
  1715. 0: \x{2028}\x{2028}\x{2028}
  1716. /\p{Zl}/B,utf
  1717. ------------------------------------------------------------------
  1718. Bra
  1719. prop Zl
  1720. Ket
  1721. End
  1722. ------------------------------------------------------------------
  1723. /\p{Lu}{3}+/B,utf
  1724. ------------------------------------------------------------------
  1725. Bra
  1726. prop Lu {3}
  1727. Ket
  1728. End
  1729. ------------------------------------------------------------------
  1730. /\pL{2}+/B,utf
  1731. ------------------------------------------------------------------
  1732. Bra
  1733. prop L {2}
  1734. Ket
  1735. End
  1736. ------------------------------------------------------------------
  1737. /\p{Cc}{2}+/B,utf
  1738. ------------------------------------------------------------------
  1739. Bra
  1740. prop Cc {2}
  1741. Ket
  1742. End
  1743. ------------------------------------------------------------------
  1744. /^\p{Cf}/utf
  1745. \x{180e}
  1746. 0: \x{180e}
  1747. \x{061c}
  1748. 0: \x{61c}
  1749. \x{2066}
  1750. 0: \x{2066}
  1751. \x{2067}
  1752. 0: \x{2067}
  1753. \x{2068}
  1754. 0: \x{2068}
  1755. \x{2069}
  1756. 0: \x{2069}
  1757. /^\p{Cs}/utf
  1758. \x{dfff}\=no_utf_check
  1759. 0: \x{dfff}
  1760. \= Expect no match
  1761. \x{09f}
  1762. No match
  1763. /^\p{Mn}/utf
  1764. \x{1a1b}
  1765. 0: \x{1a1b}
  1766. /^\p{Pe}/utf
  1767. \x{2309}
  1768. 0: \x{2309}
  1769. \x{230b}
  1770. 0: \x{230b}
  1771. /^\p{Ps}/utf
  1772. \x{2308}
  1773. 0: \x{2308}
  1774. \x{230a}
  1775. 0: \x{230a}
  1776. /^\p{Sc}+/utf
  1777. $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}
  1778. 0: $\x{a2}\x{a3}\x{a4}\x{a5}
  1779. \x{9f2}
  1780. 0: \x{9f2}
  1781. \= Expect no match
  1782. X
  1783. No match
  1784. \x{2c2}
  1785. No match
  1786. /^\p{Zs}/utf
  1787. \ \
  1788. 0:
  1789. \x{a0}
  1790. 0: \x{a0}
  1791. \x{1680}
  1792. 0: \x{1680}
  1793. \x{2000}
  1794. 0: \x{2000}
  1795. \x{2001}
  1796. 0: \x{2001}
  1797. \= Expect no match
  1798. \x{2028}
  1799. No match
  1800. \x{200d}
  1801. No match
  1802. # These are here because Perl has problems with the negative versions of the
  1803. # properties and has changed how it behaves for caseless matching.
  1804. /\p{^Lu}/i,utf
  1805. 1234
  1806. 0: 1
  1807. \= Expect no match
  1808. ABC
  1809. No match
  1810. /\P{Lu}/i,utf
  1811. 1234
  1812. 0: 1
  1813. \= Expect no match
  1814. ABC
  1815. No match
  1816. /\p{Ll}/i,utf
  1817. a
  1818. 0: a
  1819. Az
  1820. 0: z
  1821. \= Expect no match
  1822. ABC
  1823. No match
  1824. /\p{Lu}/i,utf
  1825. A
  1826. 0: A
  1827. a\x{10a0}B
  1828. 0: \x{10a0}
  1829. \= Expect no match
  1830. a
  1831. No match
  1832. \x{1d00}
  1833. No match
  1834. /\p{Lu}/i,utf
  1835. A
  1836. 0: A
  1837. aZ
  1838. 0: Z
  1839. \= Expect no match
  1840. abc
  1841. No match
  1842. /[\x{c0}\x{391}]/i,utf
  1843. \x{c0}
  1844. 0: \x{c0}
  1845. \x{e0}
  1846. 0: \x{e0}
  1847. # The next two are special cases where the lengths of the different cases of
  1848. # the same character differ. The first went wrong with heap frame storage; the
  1849. # second was broken in all cases.
  1850. /^\x{023a}+?(\x{0130}+)/i,utf
  1851. \x{023a}\x{2c65}\x{0130}
  1852. 0: \x{23a}\x{2c65}\x{130}
  1853. 1: \x{130}
  1854. /^\x{023a}+([^X])/i,utf
  1855. \x{023a}\x{2c65}X
  1856. 0: \x{23a}\x{2c65}
  1857. 1: \x{2c65}
  1858. /\x{c0}+\x{116}+/i,utf
  1859. \x{c0}\x{e0}\x{116}\x{117}
  1860. 0: \x{c0}\x{e0}\x{116}\x{117}
  1861. /[\x{c0}\x{116}]+/i,utf
  1862. \x{c0}\x{e0}\x{116}\x{117}
  1863. 0: \x{c0}\x{e0}\x{116}\x{117}
  1864. /(\x{de})\1/i,utf
  1865. \x{de}\x{de}
  1866. 0: \x{de}\x{de}
  1867. 1: \x{de}
  1868. \x{de}\x{fe}
  1869. 0: \x{de}\x{fe}
  1870. 1: \x{de}
  1871. \x{fe}\x{fe}
  1872. 0: \x{fe}\x{fe}
  1873. 1: \x{fe}
  1874. \x{fe}\x{de}
  1875. 0: \x{fe}\x{de}
  1876. 1: \x{fe}
  1877. /^\x{c0}$/i,utf
  1878. \x{c0}
  1879. 0: \x{c0}
  1880. \x{e0}
  1881. 0: \x{e0}
  1882. /^\x{e0}$/i,utf
  1883. \x{c0}
  1884. 0: \x{c0}
  1885. \x{e0}
  1886. 0: \x{e0}
  1887. # The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE
  1888. # will match it only with UCP support, because without that it has no notion
  1889. # of case for anything other than the ASCII letters.
  1890. /((?i)[\x{c0}])/utf
  1891. \x{c0}
  1892. 0: \x{c0}
  1893. 1: \x{c0}
  1894. \x{e0}
  1895. 0: \x{e0}
  1896. 1: \x{e0}
  1897. /(?i:[\x{c0}])/utf
  1898. \x{c0}
  1899. 0: \x{c0}
  1900. \x{e0}
  1901. 0: \x{e0}
  1902. # These are PCRE's extra properties to help with Unicodizing \d etc.
  1903. /^\p{Xan}/utf
  1904. ABCD
  1905. 0: A
  1906. 1234
  1907. 0: 1
  1908. \x{6ca}
  1909. 0: \x{6ca}
  1910. \x{a6c}
  1911. 0: \x{a6c}
  1912. \x{10a7}
  1913. 0: \x{10a7}
  1914. \= Expect no match
  1915. _ABC
  1916. No match
  1917. /^\p{Xan}+/utf
  1918. ABCD1234\x{6ca}\x{a6c}\x{10a7}_
  1919. 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}
  1920. \= Expect no match
  1921. _ABC
  1922. No match
  1923. /^\p{Xan}+?/utf
  1924. \x{6ca}\x{a6c}\x{10a7}_
  1925. 0: \x{6ca}
  1926. /^\p{Xan}*/utf
  1927. ABCD1234\x{6ca}\x{a6c}\x{10a7}_
  1928. 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}
  1929. /^\p{Xan}{2,9}/utf
  1930. ABCD1234\x{6ca}\x{a6c}\x{10a7}_
  1931. 0: ABCD1234\x{6ca}
  1932. /^\p{Xan}{2,9}?/utf
  1933. \x{6ca}\x{a6c}\x{10a7}_
  1934. 0: \x{6ca}\x{a6c}
  1935. /^[\p{Xan}]/utf
  1936. ABCD1234_
  1937. 0: A
  1938. 1234abcd_
  1939. 0: 1
  1940. \x{6ca}
  1941. 0: \x{6ca}
  1942. \x{a6c}
  1943. 0: \x{a6c}
  1944. \x{10a7}
  1945. 0: \x{10a7}
  1946. \= Expect no match
  1947. _ABC
  1948. No match
  1949. /^[\p{Xan}]+/utf
  1950. ABCD1234\x{6ca}\x{a6c}\x{10a7}_
  1951. 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}
  1952. \= Expect no match
  1953. _ABC
  1954. No match
  1955. /^>\p{Xsp}/utf
  1956. >\x{1680}\x{2028}\x{0b}
  1957. 0: >\x{1680}
  1958. >\x{a0}
  1959. 0: >\x{a0}
  1960. \= Expect no match
  1961. \x{0b}
  1962. No match
  1963. /^>\p{Xsp}+/utf
  1964. > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  1965. 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  1966. /^>\p{Xsp}+?/utf
  1967. >\x{1680}\x{2028}\x{0b}
  1968. 0: >\x{1680}
  1969. /^>\p{Xsp}*/utf
  1970. > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  1971. 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  1972. /^>\p{Xsp}{2,9}/utf
  1973. > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  1974. 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  1975. /^>\p{Xsp}{2,9}?/utf
  1976. > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  1977. 0: > \x{09}
  1978. /^>[\p{Xsp}]/utf
  1979. >\x{2028}\x{0b}
  1980. 0: >\x{2028}
  1981. /^>[\p{Xsp}]+/utf
  1982. > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  1983. 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  1984. /^>\p{Xps}/utf
  1985. >\x{1680}\x{2028}\x{0b}
  1986. 0: >\x{1680}
  1987. >\x{a0}
  1988. 0: >\x{a0}
  1989. \= Expect no match
  1990. \x{0b}
  1991. No match
  1992. /^>\p{Xps}+/utf
  1993. > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  1994. 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  1995. /^>\p{Xps}+?/utf
  1996. >\x{1680}\x{2028}\x{0b}
  1997. 0: >\x{1680}
  1998. /^>\p{Xps}*/utf
  1999. > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  2000. 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  2001. /^>\p{Xps}{2,9}/utf
  2002. > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  2003. 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  2004. /^>\p{Xps}{2,9}?/utf
  2005. > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  2006. 0: > \x{09}
  2007. /^>[\p{Xps}]/utf
  2008. >\x{2028}\x{0b}
  2009. 0: >\x{2028}
  2010. /^>[\p{Xps}]+/utf
  2011. > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  2012. 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
  2013. /^\p{Xwd}/utf
  2014. ABCD
  2015. 0: A
  2016. 1234
  2017. 0: 1
  2018. \x{6ca}
  2019. 0: \x{6ca}
  2020. \x{a6c}
  2021. 0: \x{a6c}
  2022. \x{10a7}
  2023. 0: \x{10a7}
  2024. _ABC
  2025. 0: _
  2026. \= Expect no match
  2027. []
  2028. No match
  2029. /^\p{Xwd}+/utf
  2030. ABCD1234\x{6ca}\x{a6c}\x{10a7}_
  2031. 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
  2032. /^\p{Xwd}+?/utf
  2033. \x{6ca}\x{a6c}\x{10a7}_
  2034. 0: \x{6ca}
  2035. /^\p{Xwd}*/utf
  2036. ABCD1234\x{6ca}\x{a6c}\x{10a7}_
  2037. 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
  2038. /^\p{Xwd}{2,9}/utf
  2039. A_B12\x{6ca}\x{a6c}\x{10a7}
  2040. 0: A_B12\x{6ca}\x{a6c}\x{10a7}
  2041. /^\p{Xwd}{2,9}?/utf
  2042. \x{6ca}\x{a6c}\x{10a7}_
  2043. 0: \x{6ca}\x{a6c}
  2044. /^[\p{Xwd}]/utf
  2045. ABCD1234_
  2046. 0: A
  2047. 1234abcd_
  2048. 0: 1
  2049. \x{6ca}
  2050. 0: \x{6ca}
  2051. \x{a6c}
  2052. 0: \x{a6c}
  2053. \x{10a7}
  2054. 0: \x{10a7}
  2055. _ABC
  2056. 0: _
  2057. \= Expect no match
  2058. []
  2059. No match
  2060. /^[\p{Xwd}]+/utf
  2061. ABCD1234\x{6ca}\x{a6c}\x{10a7}_
  2062. 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
  2063. # A check not in UTF-8 mode
  2064. /^[\p{Xwd}]+/
  2065. ABCD1234_
  2066. 0: ABCD1234_
  2067. # Some negative checks
  2068. /^[\P{Xwd}]+/utf
  2069. !.+\x{019}\x{482}AB
  2070. 0: !.+\x{19}\x{482}
  2071. /^[\p{^Xwd}]+/utf
  2072. !.+\x{019}\x{589}AB
  2073. 0: !.+\x{19}\x{589}
  2074. /[\D]/B,utf,ucp
  2075. ------------------------------------------------------------------
  2076. Bra
  2077. [\P{Nd}]
  2078. Ket
  2079. End
  2080. ------------------------------------------------------------------
  2081. 1\x{3c8}2
  2082. 0: \x{3c8}
  2083. /[\d]/B,utf,ucp
  2084. ------------------------------------------------------------------
  2085. Bra
  2086. [\p{Nd}]
  2087. Ket
  2088. End
  2089. ------------------------------------------------------------------
  2090. >\x{6f4}<
  2091. 0: \x{6f4}
  2092. /[\S]/B,utf,ucp
  2093. ------------------------------------------------------------------
  2094. Bra
  2095. [\P{Xsp}]
  2096. Ket
  2097. End
  2098. ------------------------------------------------------------------
  2099. \x{1680}\x{6f4}\x{1680}
  2100. 0: \x{6f4}
  2101. /[\s]/B,utf,ucp
  2102. ------------------------------------------------------------------
  2103. Bra
  2104. [\p{Xsp}]
  2105. Ket
  2106. End
  2107. ------------------------------------------------------------------
  2108. >\x{1680}<
  2109. 0: \x{1680}
  2110. /[\W]/B,utf,ucp
  2111. ------------------------------------------------------------------
  2112. Bra
  2113. [\P{Xwd}]
  2114. Ket
  2115. End
  2116. ------------------------------------------------------------------
  2117. A\x{1735}B
  2118. 0: \x{1735}
  2119. /[\w]/B,utf,ucp
  2120. ------------------------------------------------------------------
  2121. Bra
  2122. [\p{Xwd}]
  2123. Ket
  2124. End
  2125. ------------------------------------------------------------------
  2126. >\x{1723}<
  2127. 0: \x{1723}
  2128. /\D/B,utf,ucp
  2129. ------------------------------------------------------------------
  2130. Bra
  2131. notprop Nd
  2132. Ket
  2133. End
  2134. ------------------------------------------------------------------
  2135. 1\x{3c8}2
  2136. 0: \x{3c8}
  2137. /\d/B,utf,ucp
  2138. ------------------------------------------------------------------
  2139. Bra
  2140. prop Nd
  2141. Ket
  2142. End
  2143. ------------------------------------------------------------------
  2144. >\x{6f4}<
  2145. 0: \x{6f4}
  2146. /\S/B,utf,ucp
  2147. ------------------------------------------------------------------
  2148. Bra
  2149. notprop Xsp
  2150. Ket
  2151. End
  2152. ------------------------------------------------------------------
  2153. \x{1680}\x{6f4}\x{1680}
  2154. 0: \x{6f4}
  2155. /\s/B,utf,ucp
  2156. ------------------------------------------------------------------
  2157. Bra
  2158. prop Xsp
  2159. Ket
  2160. End
  2161. ------------------------------------------------------------------
  2162. >\x{1680}>
  2163. 0: \x{1680}
  2164. /\W/B,utf,ucp
  2165. ------------------------------------------------------------------
  2166. Bra
  2167. notprop Xwd
  2168. Ket
  2169. End
  2170. ------------------------------------------------------------------
  2171. A\x{1735}B
  2172. 0: \x{1735}
  2173. /\w/B,utf,ucp
  2174. ------------------------------------------------------------------
  2175. Bra
  2176. prop Xwd
  2177. Ket
  2178. End
  2179. ------------------------------------------------------------------
  2180. >\x{1723}<
  2181. 0: \x{1723}
  2182. /[[:alpha:]]/B,ucp
  2183. ------------------------------------------------------------------
  2184. Bra
  2185. [\p{L}]
  2186. Ket
  2187. End
  2188. ------------------------------------------------------------------
  2189. /[[:lower:]]/B,ucp
  2190. ------------------------------------------------------------------
  2191. Bra
  2192. [\p{Ll}]
  2193. Ket
  2194. End
  2195. ------------------------------------------------------------------
  2196. /[[:upper:]]/B,ucp
  2197. ------------------------------------------------------------------
  2198. Bra
  2199. [\p{Lu}]
  2200. Ket
  2201. End
  2202. ------------------------------------------------------------------
  2203. /[[:alnum:]]/B,ucp
  2204. ------------------------------------------------------------------
  2205. Bra
  2206. [\p{Xan}]
  2207. Ket
  2208. End
  2209. ------------------------------------------------------------------
  2210. /[[:ascii:]]/B,ucp
  2211. ------------------------------------------------------------------
  2212. Bra
  2213. [\x00-\x7f]
  2214. Ket
  2215. End
  2216. ------------------------------------------------------------------
  2217. /[[:cntrl:]]/B,ucp
  2218. ------------------------------------------------------------------
  2219. Bra
  2220. [\p{Cc}]
  2221. Ket
  2222. End
  2223. ------------------------------------------------------------------
  2224. /[[:digit:]]/B,ucp
  2225. ------------------------------------------------------------------
  2226. Bra
  2227. [\p{Nd}]
  2228. Ket
  2229. End
  2230. ------------------------------------------------------------------
  2231. /[[:digit:]]/B,ucp,ascii_digit
  2232. ------------------------------------------------------------------
  2233. Bra
  2234. [0-9]
  2235. Ket
  2236. End
  2237. ------------------------------------------------------------------
  2238. /[[:graph:]]/B,ucp
  2239. ------------------------------------------------------------------
  2240. Bra
  2241. [[:graph:]]
  2242. Ket
  2243. End
  2244. ------------------------------------------------------------------
  2245. /[[:print:]]/B,ucp
  2246. ------------------------------------------------------------------
  2247. Bra
  2248. [[:print:]]
  2249. Ket
  2250. End
  2251. ------------------------------------------------------------------
  2252. /[[:punct:]]/B,ucp
  2253. ------------------------------------------------------------------
  2254. Bra
  2255. [[:punct:]]
  2256. Ket
  2257. End
  2258. ------------------------------------------------------------------
  2259. /[[:space:]]/B,ucp
  2260. ------------------------------------------------------------------
  2261. Bra
  2262. [\p{Xps}]
  2263. Ket
  2264. End
  2265. ------------------------------------------------------------------
  2266. /[[:word:]]/B,ucp
  2267. ------------------------------------------------------------------
  2268. Bra
  2269. [\p{Xwd}]
  2270. Ket
  2271. End
  2272. ------------------------------------------------------------------
  2273. /[[:xdigit:]]/B,ucp
  2274. ------------------------------------------------------------------
  2275. Bra
  2276. [[:xdigit:]]
  2277. Ket
  2278. End
  2279. ------------------------------------------------------------------
  2280. /[[:xdigit:]]/B,ucp,ascii_digit
  2281. ------------------------------------------------------------------
  2282. Bra
  2283. [0-9A-Fa-f]
  2284. Ket
  2285. End
  2286. ------------------------------------------------------------------
  2287. # Unicode properties for \b and \B
  2288. /\b...\B/utf,ucp
  2289. abc_
  2290. 0: abc
  2291. \x{37e}abc\x{376}
  2292. 0: abc
  2293. \x{37e}\x{376}\x{371}\x{393}\x{394}
  2294. 0: \x{376}\x{371}\x{393}
  2295. !\x{c0}++\x{c1}\x{c2}
  2296. 0: ++\x{c1}
  2297. !\x{c0}+++++
  2298. 0: \x{c0}++
  2299. # Without PCRE_UCP, non-ASCII always fail, even if < 256
  2300. /\b...\B/utf
  2301. abc_
  2302. 0: abc
  2303. \= Expect no match
  2304. \x{37e}abc\x{376}
  2305. No match
  2306. \x{37e}\x{376}\x{371}\x{393}\x{394}
  2307. No match
  2308. !\x{c0}++\x{c1}\x{c2}
  2309. No match
  2310. !\x{c0}+++++
  2311. No match
  2312. # With PCRE_UCP, non-UTF8 chars that are < 256 still check properties
  2313. /\b...\B/ucp
  2314. abc_
  2315. 0: abc
  2316. !\x{c0}++\x{c1}\x{c2}
  2317. 0: ++\xc1
  2318. !\x{c0}+++++
  2319. 0: \xc0++
  2320. # Some of these are silly, but they check various combinations
  2321. /[[:^alpha:][:^cntrl:]]+/B,utf,ucp
  2322. ------------------------------------------------------------------
  2323. Bra
  2324. [\P{L}\P{Cc}]++
  2325. Ket
  2326. End
  2327. ------------------------------------------------------------------
  2328. 123
  2329. 0: 123
  2330. abc
  2331. 0: abc
  2332. /[[:^cntrl:][:^alpha:]]+/B,utf,ucp
  2333. ------------------------------------------------------------------
  2334. Bra
  2335. [\P{Cc}\P{L}]++
  2336. Ket
  2337. End
  2338. ------------------------------------------------------------------
  2339. 123
  2340. 0: 123
  2341. abc
  2342. 0: abc
  2343. /[[:alpha:]]+/B,utf,ucp
  2344. ------------------------------------------------------------------
  2345. Bra
  2346. [\p{L}]++
  2347. Ket
  2348. End
  2349. ------------------------------------------------------------------
  2350. abc
  2351. 0: abc
  2352. /[[:^alpha:]\S]+/B,utf,ucp
  2353. ------------------------------------------------------------------
  2354. Bra
  2355. [\P{L}\P{Xsp}]++
  2356. Ket
  2357. End
  2358. ------------------------------------------------------------------
  2359. 123
  2360. 0: 123
  2361. abc
  2362. 0: abc
  2363. /[^\d]+/B,utf,ucp
  2364. ------------------------------------------------------------------
  2365. Bra
  2366. [^\p{Nd}]++
  2367. Ket
  2368. End
  2369. ------------------------------------------------------------------
  2370. abc123
  2371. 0: abc
  2372. abc\x{123}
  2373. 0: abc\x{123}
  2374. \x{660}abc
  2375. 0: abc
  2376. /\p{Lu}+9\p{Lu}+B\p{Lu}+b/B
  2377. ------------------------------------------------------------------
  2378. Bra
  2379. prop Lu ++
  2380. 9
  2381. prop Lu +
  2382. B
  2383. prop Lu ++
  2384. b
  2385. Ket
  2386. End
  2387. ------------------------------------------------------------------
  2388. /\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/B
  2389. ------------------------------------------------------------------
  2390. Bra
  2391. notprop Lu +
  2392. 9
  2393. notprop Lu ++
  2394. B
  2395. notprop Lu +
  2396. b
  2397. Ket
  2398. End
  2399. ------------------------------------------------------------------
  2400. /\P{Lu}+9\P{Lu}+B\P{Lu}+b/B
  2401. ------------------------------------------------------------------
  2402. Bra
  2403. notprop Lu +
  2404. 9
  2405. notprop Lu ++
  2406. B
  2407. notprop Lu +
  2408. b
  2409. Ket
  2410. End
  2411. ------------------------------------------------------------------
  2412. /\p{Han}+X\p{Greek}+\x{370}/B,utf
  2413. ------------------------------------------------------------------
  2414. Bra
  2415. prop Han ++
  2416. X
  2417. prop Greek +
  2418. \x{370}
  2419. Ket
  2420. End
  2421. ------------------------------------------------------------------
  2422. /\p{Xan}+!\p{Xan}+A/B
  2423. ------------------------------------------------------------------
  2424. Bra
  2425. prop Xan ++
  2426. !
  2427. prop Xan +
  2428. A
  2429. Ket
  2430. End
  2431. ------------------------------------------------------------------
  2432. /\p{Xsp}+!\p{Xsp}\t/B
  2433. ------------------------------------------------------------------
  2434. Bra
  2435. prop Xsp ++
  2436. !
  2437. prop Xsp
  2438. \x09
  2439. Ket
  2440. End
  2441. ------------------------------------------------------------------
  2442. /\p{Xps}+!\p{Xps}\t/B
  2443. ------------------------------------------------------------------
  2444. Bra
  2445. prop Xps ++
  2446. !
  2447. prop Xps
  2448. \x09
  2449. Ket
  2450. End
  2451. ------------------------------------------------------------------
  2452. /\p{Xwd}+!\p{Xwd}_/B
  2453. ------------------------------------------------------------------
  2454. Bra
  2455. prop Xwd ++
  2456. !
  2457. prop Xwd
  2458. _
  2459. Ket
  2460. End
  2461. ------------------------------------------------------------------
  2462. /A+\p{N}A+\dB+\p{N}*B+\d*/B,ucp
  2463. ------------------------------------------------------------------
  2464. Bra
  2465. A++
  2466. prop N
  2467. A++
  2468. prop Nd
  2469. B+
  2470. prop N *+
  2471. B++
  2472. prop Nd *+
  2473. Ket
  2474. End
  2475. ------------------------------------------------------------------
  2476. # These behaved oddly in Perl, so they are kept in this test
  2477. /(\x{23a}\x{23a}\x{23a})?\1/i,utf
  2478. \= Expect no match
  2479. \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
  2480. No match
  2481. /(ȺȺȺ)?\1/i,utf
  2482. \= Expect no match
  2483. ȺȺȺⱥⱥ
  2484. No match
  2485. /(\x{23a}\x{23a}\x{23a})?\1/i,utf
  2486. \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
  2487. 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
  2488. 1: \x{23a}\x{23a}\x{23a}
  2489. /(ȺȺȺ)?\1/i,utf
  2490. ȺȺȺⱥⱥⱥ
  2491. 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
  2492. 1: \x{23a}\x{23a}\x{23a}
  2493. /(\x{23a}\x{23a}\x{23a})\1/i,utf
  2494. \= Expect no match
  2495. \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
  2496. No match
  2497. /(ȺȺȺ)\1/i,utf
  2498. \= Expect no match
  2499. ȺȺȺⱥⱥ
  2500. No match
  2501. /(\x{23a}\x{23a}\x{23a})\1/i,utf
  2502. \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
  2503. 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
  2504. 1: \x{23a}\x{23a}\x{23a}
  2505. /(ȺȺȺ)\1/i,utf
  2506. ȺȺȺⱥⱥⱥ
  2507. 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
  2508. 1: \x{23a}\x{23a}\x{23a}
  2509. /(\x{2c65}\x{2c65})\1/i,utf
  2510. \x{2c65}\x{2c65}\x{23a}\x{23a}
  2511. 0: \x{2c65}\x{2c65}\x{23a}\x{23a}
  2512. 1: \x{2c65}\x{2c65}
  2513. /(ⱥⱥ)\1/i,utf
  2514. ⱥⱥȺȺ
  2515. 0: \x{2c65}\x{2c65}\x{23a}\x{23a}
  2516. 1: \x{2c65}\x{2c65}
  2517. /(\x{23a}\x{23a}\x{23a})\1Y/i,utf
  2518. X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ
  2519. 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}Y
  2520. 1: \x{23a}\x{23a}\x{23a}
  2521. /(\x{2c65}\x{2c65})\1Y/i,utf
  2522. X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ
  2523. 0: \x{2c65}\x{2c65}\x{23a}\x{23a}Y
  2524. 1: \x{2c65}\x{2c65}
  2525. # These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE
  2526. /^[\p{Batak}]/utf
  2527. \x{1bc0}
  2528. 0: \x{1bc0}
  2529. \x{1bff}
  2530. 0: \x{1bff}
  2531. \= Expect no match
  2532. \x{1bf4}
  2533. No match
  2534. /^[\p{Brahmi}]/utf
  2535. \x{11000}
  2536. 0: \x{11000}
  2537. \x{1106f}
  2538. 0: \x{1106f}
  2539. \= Expect no match
  2540. \x{1104e}
  2541. No match
  2542. /^[\p{Mandaic}]/utf
  2543. \x{840}
  2544. 0: \x{840}
  2545. \x{85e}
  2546. 0: \x{85e}
  2547. \= Expect no match
  2548. \x{85c}
  2549. No match
  2550. \x{85d}
  2551. No match
  2552. /(\X*)(.)/s,utf
  2553. A\x{300}
  2554. 0: A
  2555. 1:
  2556. 2: A
  2557. /^S(\X*)e(\X*)$/utf
  2558. Stéréo
  2559. 0: Ste\x{301}re\x{301}o
  2560. 1: te\x{301}r
  2561. 2: \x{301}o
  2562. /^\X/utf
  2563. ́réo
  2564. 0: \x{301}
  2565. /^a\X41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames
  2566. aX41z
  2567. 0: aX41z
  2568. \= Expect no match
  2569. aAz
  2570. No match
  2571. /\X/
  2572. a\=ps
  2573. 0: a
  2574. a\=ph
  2575. Partial match: a
  2576. /\Xa/
  2577. aa\=ps
  2578. 0: aa
  2579. aa\=ph
  2580. 0: aa
  2581. /\X{2}/
  2582. aa\=ps
  2583. 0: aa
  2584. aa\=ph
  2585. Partial match: aa
  2586. /\X+a/
  2587. a\=ps
  2588. Partial match: a
  2589. aa\=ps
  2590. 0: aa
  2591. aa\=ph
  2592. Partial match: aa
  2593. /\X+?a/
  2594. a\=ps
  2595. Partial match: a
  2596. ab\=ps
  2597. Partial match: ab
  2598. aa\=ps
  2599. 0: aa
  2600. aa\=ph
  2601. 0: aa
  2602. aba\=ps
  2603. 0: aba
  2604. # These Unicode 6.1.0 scripts are not known to Perl.
  2605. /\p{Chakma}\d/utf,ucp
  2606. \x{11100}\x{1113c}
  2607. 0: \x{11100}\x{1113c}
  2608. /\p{Takri}\d/utf,ucp
  2609. \x{11680}\x{116c0}
  2610. 0: \x{11680}\x{116c0}
  2611. /^\X/utf
  2612. A\=ps
  2613. 0: A
  2614. A\=ph
  2615. Partial match: A
  2616. A\x{300}\x{301}\=ps
  2617. 0: A\x{300}\x{301}
  2618. A\x{300}\x{301}\=ph
  2619. Partial match: A\x{300}\x{301}
  2620. A\x{301}\=ps
  2621. 0: A\x{301}
  2622. A\x{301}\=ph
  2623. Partial match: A\x{301}
  2624. /^\X{2,3}/utf
  2625. A\=ps
  2626. Partial match: A
  2627. A\=ph
  2628. Partial match: A
  2629. AA\=ps
  2630. 0: AA
  2631. AA\=ph
  2632. Partial match: AA
  2633. A\x{300}\x{301}\=ps
  2634. Partial match: A\x{300}\x{301}
  2635. A\x{300}\x{301}\=ph
  2636. Partial match: A\x{300}\x{301}
  2637. A\x{300}\x{301}A\x{300}\x{301}\=ps
  2638. 0: A\x{300}\x{301}A\x{300}\x{301}
  2639. A\x{300}\x{301}A\x{300}\x{301}\=ph
  2640. Partial match: A\x{300}\x{301}A\x{300}\x{301}
  2641. /^\X{2}/utf
  2642. AA\=ps
  2643. 0: AA
  2644. AA\=ph
  2645. Partial match: AA
  2646. A\x{300}\x{301}A\x{300}\x{301}\=ps
  2647. 0: A\x{300}\x{301}A\x{300}\x{301}
  2648. A\x{300}\x{301}A\x{300}\x{301}\=ph
  2649. Partial match: A\x{300}\x{301}A\x{300}\x{301}
  2650. /^\X+/utf
  2651. AA\=ps
  2652. 0: AA
  2653. AA\=ph
  2654. Partial match: AA
  2655. /^\X+?Z/utf
  2656. AA\=ps
  2657. Partial match: AA
  2658. AA\=ph
  2659. Partial match: AA
  2660. /A\x{3a3}B/IBi,utf
  2661. ------------------------------------------------------------------
  2662. Bra
  2663. /i A
  2664. clist 03a3 03c2 03c3
  2665. /i B
  2666. Ket
  2667. End
  2668. ------------------------------------------------------------------
  2669. Capture group count = 0
  2670. Options: caseless utf
  2671. First code unit = 'A' (caseless)
  2672. Last code unit = 'B' (caseless)
  2673. Subject length lower bound = 3
  2674. /[\x{3a3}]/Bi,utf
  2675. ------------------------------------------------------------------
  2676. Bra
  2677. clist 03a3 03c2 03c3
  2678. Ket
  2679. End
  2680. ------------------------------------------------------------------
  2681. /[^\x{3a3}]/Bi,utf
  2682. ------------------------------------------------------------------
  2683. Bra
  2684. not clist 03a3 03c2 03c3
  2685. Ket
  2686. End
  2687. ------------------------------------------------------------------
  2688. /[\x{3a3}]+/Bi,utf
  2689. ------------------------------------------------------------------
  2690. Bra
  2691. clist 03a3 03c2 03c3 ++
  2692. Ket
  2693. End
  2694. ------------------------------------------------------------------
  2695. /[^\x{3a3}]+/Bi,utf
  2696. ------------------------------------------------------------------
  2697. Bra
  2698. not clist 03a3 03c2 03c3 ++
  2699. Ket
  2700. End
  2701. ------------------------------------------------------------------
  2702. /a*\x{3a3}/Bi,utf
  2703. ------------------------------------------------------------------
  2704. Bra
  2705. /i a*+
  2706. clist 03a3 03c2 03c3
  2707. Ket
  2708. End
  2709. ------------------------------------------------------------------
  2710. /\x{3a3}+a/Bi,utf
  2711. ------------------------------------------------------------------
  2712. Bra
  2713. clist 03a3 03c2 03c3 ++
  2714. /i a
  2715. Ket
  2716. End
  2717. ------------------------------------------------------------------
  2718. /\x{3a3}*\x{3c2}/Bi,utf
  2719. ------------------------------------------------------------------
  2720. Bra
  2721. clist 03a3 03c2 03c3 *
  2722. clist 03a3 03c2 03c3
  2723. Ket
  2724. End
  2725. ------------------------------------------------------------------
  2726. /\x{3a3}{3}/i,utf,aftertext
  2727. \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
  2728. 0: \x{3a3}\x{3c3}\x{3c2}
  2729. 0+ \x{3a3}\x{3c3}\x{3c2}
  2730. /\x{3a3}{2,4}/i,utf,aftertext
  2731. \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
  2732. 0: \x{3a3}\x{3c3}\x{3c2}\x{3a3}
  2733. 0+ \x{3c3}\x{3c2}
  2734. /\x{3a3}{2,4}?/i,utf,aftertext
  2735. \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
  2736. 0: \x{3a3}\x{3c3}
  2737. 0+ \x{3c2}\x{3a3}\x{3c3}\x{3c2}
  2738. /\x{3a3}+./i,utf,aftertext
  2739. \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
  2740. 0: \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
  2741. 0+
  2742. /\x{3a3}++./i,utf,aftertext
  2743. \= Expect no match
  2744. \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
  2745. No match
  2746. /\x{3a3}*\x{3c2}/Bi,utf
  2747. ------------------------------------------------------------------
  2748. Bra
  2749. clist 03a3 03c2 03c3 *
  2750. clist 03a3 03c2 03c3
  2751. Ket
  2752. End
  2753. ------------------------------------------------------------------
  2754. /[^\x{3a3}]*\x{3c2}/Bi,utf
  2755. ------------------------------------------------------------------
  2756. Bra
  2757. not clist 03a3 03c2 03c3 *+
  2758. clist 03a3 03c2 03c3
  2759. Ket
  2760. End
  2761. ------------------------------------------------------------------
  2762. /[^a]*\x{3c2}/Bi,utf
  2763. ------------------------------------------------------------------
  2764. Bra
  2765. /i [^a]*
  2766. clist 03a3 03c2 03c3
  2767. Ket
  2768. End
  2769. ------------------------------------------------------------------
  2770. /ist/Bi,utf
  2771. ------------------------------------------------------------------
  2772. Bra
  2773. /i i
  2774. clist 0053 0073 017f
  2775. /i t
  2776. Ket
  2777. End
  2778. ------------------------------------------------------------------
  2779. \= Expect no match
  2780. ikt
  2781. No match
  2782. /is+t/i,utf
  2783. iSs\x{17f}t
  2784. 0: iSs\x{17f}t
  2785. \= Expect no match
  2786. ikt
  2787. No match
  2788. /is+?t/i,utf
  2789. \= Expect no match
  2790. ikt
  2791. No match
  2792. /is?t/i,utf
  2793. \= Expect no match
  2794. ikt
  2795. No match
  2796. /is{2}t/i,utf
  2797. \= Expect no match
  2798. iskt
  2799. No match
  2800. # This property is a PCRE special
  2801. /^\p{Xuc}/utf
  2802. $abc
  2803. 0: $
  2804. @abc
  2805. 0: @
  2806. `abc
  2807. 0: `
  2808. \x{1234}abc
  2809. 0: \x{1234}
  2810. \= Expect no match
  2811. abc
  2812. No match
  2813. /^\p{Xuc}+/utf
  2814. $@`\x{a0}\x{1234}\x{e000}**
  2815. 0: $@`\x{a0}\x{1234}\x{e000}
  2816. \= Expect no match
  2817. \x{9f}
  2818. No match
  2819. /^\p{Xuc}+?/utf
  2820. $@`\x{a0}\x{1234}\x{e000}**
  2821. 0: $
  2822. \= Expect no match
  2823. \x{9f}
  2824. No match
  2825. /^\p{Xuc}+?\*/utf
  2826. $@`\x{a0}\x{1234}\x{e000}**
  2827. 0: $@`\x{a0}\x{1234}\x{e000}*
  2828. \= Expect no match
  2829. \x{9f}
  2830. No match
  2831. /^\p{Xuc}++/utf
  2832. $@`\x{a0}\x{1234}\x{e000}**
  2833. 0: $@`\x{a0}\x{1234}\x{e000}
  2834. \= Expect no match
  2835. \x{9f}
  2836. No match
  2837. /^\p{Xuc}{3,5}/utf
  2838. $@`\x{a0}\x{1234}\x{e000}**
  2839. 0: $@`\x{a0}\x{1234}
  2840. \= Expect no match
  2841. \x{9f}
  2842. No match
  2843. /^\p{Xuc}{3,5}?/utf
  2844. $@`\x{a0}\x{1234}\x{e000}**
  2845. 0: $@`
  2846. \= Expect no match
  2847. \x{9f}
  2848. No match
  2849. /^[\p{Xuc}]/utf
  2850. $@`\x{a0}\x{1234}\x{e000}**
  2851. 0: $
  2852. \= Expect no match
  2853. \x{9f}
  2854. No match
  2855. /^[\p{Xuc}]+/utf
  2856. $@`\x{a0}\x{1234}\x{e000}**
  2857. 0: $@`\x{a0}\x{1234}\x{e000}
  2858. \= Expect no match
  2859. \x{9f}
  2860. No match
  2861. /^\P{Xuc}/utf
  2862. abc
  2863. 0: a
  2864. \= Expect no match
  2865. $abc
  2866. No match
  2867. @abc
  2868. No match
  2869. `abc
  2870. No match
  2871. \x{1234}abc
  2872. No match
  2873. /^[\P{Xuc}]/utf
  2874. abc
  2875. 0: a
  2876. \= Expect no match
  2877. $abc
  2878. No match
  2879. @abc
  2880. No match
  2881. `abc
  2882. No match
  2883. \x{1234}abc
  2884. No match
  2885. # Some auto-possessification tests
  2886. /\pN+\z/B
  2887. ------------------------------------------------------------------
  2888. Bra
  2889. prop N ++
  2890. \z
  2891. Ket
  2892. End
  2893. ------------------------------------------------------------------
  2894. /\PN+\z/B
  2895. ------------------------------------------------------------------
  2896. Bra
  2897. notprop N ++
  2898. \z
  2899. Ket
  2900. End
  2901. ------------------------------------------------------------------
  2902. /\pN+/B
  2903. ------------------------------------------------------------------
  2904. Bra
  2905. prop N ++
  2906. Ket
  2907. End
  2908. ------------------------------------------------------------------
  2909. /\PN+/B
  2910. ------------------------------------------------------------------
  2911. Bra
  2912. notprop N ++
  2913. Ket
  2914. End
  2915. ------------------------------------------------------------------
  2916. /\p{Any}+\p{Any} \p{Any}+\P{Any} \p{Any}+\p{L&} \p{Any}+\p{L} \p{Any}+\p{Lu} \p{Any}+\p{Han} \p{Any}+\p{Xan} \p{Any}+\p{Xsp} \p{Any}+\p{Xps} \p{Xwd}+\p{Any} \p{Any}+\p{Xuc}/Bx,ucp
  2917. ------------------------------------------------------------------
  2918. Bra
  2919. AllAny+
  2920. AllAny
  2921. AllAny+
  2922. notprop Any
  2923. AllAny+
  2924. prop Lc
  2925. AllAny+
  2926. prop L
  2927. AllAny+
  2928. prop Lu
  2929. AllAny+
  2930. prop Han
  2931. AllAny+
  2932. prop Xan
  2933. AllAny+
  2934. prop Xsp
  2935. AllAny+
  2936. prop Xps
  2937. prop Xwd +
  2938. AllAny
  2939. AllAny+
  2940. prop Xuc
  2941. Ket
  2942. End
  2943. ------------------------------------------------------------------
  2944. /\p{L&}+\p{Any} \p{L&}+\p{L&} \P{L&}+\p{L&} \p{L&}+\p{L} \p{L&}+\p{Lu} \p{L&}+\p{Han} \p{L&}+\p{Xan} \p{L&}+\P{Xan} \p{L&}+\p{Xsp} \p{L&}+\p{Xps} \p{Xwd}+\p{L&} \p{L&}+\p{Xuc}/Bx,ucp
  2945. ------------------------------------------------------------------
  2946. Bra
  2947. prop Lc +
  2948. AllAny
  2949. prop Lc +
  2950. prop Lc
  2951. notprop Lc ++
  2952. prop Lc
  2953. prop Lc +
  2954. prop L
  2955. prop Lc +
  2956. prop Lu
  2957. prop Lc +
  2958. prop Han
  2959. prop Lc +
  2960. prop Xan
  2961. prop Lc ++
  2962. notprop Xan
  2963. prop Lc ++
  2964. prop Xsp
  2965. prop Lc ++
  2966. prop Xps
  2967. prop Xwd +
  2968. prop Lc
  2969. prop Lc +
  2970. prop Xuc
  2971. Ket
  2972. End
  2973. ------------------------------------------------------------------
  2974. /\p{N}+\p{Any} \p{N}+\p{L&} \p{N}+\p{L} \p{N}+\P{L} \p{N}+\P{N} \p{N}+\p{Lu} \p{N}+\p{Han} \p{N}+\p{Xan} \p{N}+\p{Xsp} \p{N}+\p{Xps} \p{Xwd}+\p{N} \p{N}+\p{Xuc}/Bx,ucp
  2975. ------------------------------------------------------------------
  2976. Bra
  2977. prop N +
  2978. AllAny
  2979. prop N +
  2980. prop Lc
  2981. prop N ++
  2982. prop L
  2983. prop N +
  2984. notprop L
  2985. prop N ++
  2986. notprop N
  2987. prop N ++
  2988. prop Lu
  2989. prop N +
  2990. prop Han
  2991. prop N +
  2992. prop Xan
  2993. prop N ++
  2994. prop Xsp
  2995. prop N ++
  2996. prop Xps
  2997. prop Xwd +
  2998. prop N
  2999. prop N +
  3000. prop Xuc
  3001. Ket
  3002. End
  3003. ------------------------------------------------------------------
  3004. /\p{Lu}+\p{Any} \p{Lu}+\p{L&} \p{Lu}+\p{L} \p{Lu}+\p{Lu} \P{Lu}+\p{Lu} \p{Lu}+\p{Nd} \p{Lu}+\P{Nd} \p{Lu}+\p{Han} \p{Lu}+\p{Xan} \p{Lu}+\p{Xsp} \p{Lu}+\p{Xps} \p{Xwd}+\p{Lu} \p{Lu}+\p{Xuc}/Bx,ucp
  3005. ------------------------------------------------------------------
  3006. Bra
  3007. prop Lu +
  3008. AllAny
  3009. prop Lu +
  3010. prop Lc
  3011. prop Lu +
  3012. prop L
  3013. prop Lu +
  3014. prop Lu
  3015. notprop Lu ++
  3016. prop Lu
  3017. prop Lu ++
  3018. prop Nd
  3019. prop Lu +
  3020. notprop Nd
  3021. prop Lu +
  3022. prop Han
  3023. prop Lu +
  3024. prop Xan
  3025. prop Lu ++
  3026. prop Xsp
  3027. prop Lu ++
  3028. prop Xps
  3029. prop Xwd +
  3030. prop Lu
  3031. prop Lu +
  3032. prop Xuc
  3033. Ket
  3034. End
  3035. ------------------------------------------------------------------
  3036. /\p{Han}+\p{Lu} \p{Han}+\p{L&} \p{Han}+\p{L} \p{Han}+\p{Lu} \p{Han}+\p{Arabic} \p{Arabic}+\p{Arabic} \p{Han}+\p{Xan} \p{Han}+\p{Xsp} \p{Han}+\p{Xps} \p{Xwd}+\p{Han} \p{Han}+\p{Xuc}/Bx,ucp
  3037. ------------------------------------------------------------------
  3038. Bra
  3039. prop Han +
  3040. prop Lu
  3041. prop Han +
  3042. prop Lc
  3043. prop Han +
  3044. prop L
  3045. prop Han +
  3046. prop Lu
  3047. prop Han ++
  3048. prop Arabic
  3049. prop Arabic +
  3050. prop Arabic
  3051. prop Han +
  3052. prop Xan
  3053. prop Han +
  3054. prop Xsp
  3055. prop Han +
  3056. prop Xps
  3057. prop Xwd +
  3058. prop Han
  3059. prop Han +
  3060. prop Xuc
  3061. Ket
  3062. End
  3063. ------------------------------------------------------------------
  3064. /\p{Xan}+\p{Any} \p{Xan}+\p{L&} \P{Xan}+\p{L&} \p{Xan}+\p{L} \p{Xan}+\p{Lu} \p{Xan}+\p{Han} \p{Xan}+\p{Xan} \p{Xan}+\P{Xan} \p{Xan}+\p{Xsp} \p{Xan}+\p{Xps} \p{Xwd}+\p{Xan} \p{Xan}+\p{Xuc}/Bx,ucp
  3065. ------------------------------------------------------------------
  3066. Bra
  3067. prop Xan +
  3068. AllAny
  3069. prop Xan +
  3070. prop Lc
  3071. notprop Xan ++
  3072. prop Lc
  3073. prop Xan +
  3074. prop L
  3075. prop Xan +
  3076. prop Lu
  3077. prop Xan +
  3078. prop Han
  3079. prop Xan +
  3080. prop Xan
  3081. prop Xan ++
  3082. notprop Xan
  3083. prop Xan ++
  3084. prop Xsp
  3085. prop Xan ++
  3086. prop Xps
  3087. prop Xwd +
  3088. prop Xan
  3089. prop Xan +
  3090. prop Xuc
  3091. Ket
  3092. End
  3093. ------------------------------------------------------------------
  3094. /\p{Xsp}+\p{Any} \p{Xsp}+\p{L&} \p{Xsp}+\p{L} \p{Xsp}+\p{Lu} \p{Xsp}+\p{Han} \p{Xsp}+\p{Xan} \p{Xsp}+\p{Xsp} \P{Xsp}+\p{Xsp} \p{Xsp}+\p{Xps} \p{Xwd}+\p{Xsp} \p{Xsp}+\p{Xuc}/Bx,ucp
  3095. ------------------------------------------------------------------
  3096. Bra
  3097. prop Xsp +
  3098. AllAny
  3099. prop Xsp ++
  3100. prop Lc
  3101. prop Xsp ++
  3102. prop L
  3103. prop Xsp ++
  3104. prop Lu
  3105. prop Xsp +
  3106. prop Han
  3107. prop Xsp ++
  3108. prop Xan
  3109. prop Xsp +
  3110. prop Xsp
  3111. notprop Xsp ++
  3112. prop Xsp
  3113. prop Xsp +
  3114. prop Xps
  3115. prop Xwd ++
  3116. prop Xsp
  3117. prop Xsp +
  3118. prop Xuc
  3119. Ket
  3120. End
  3121. ------------------------------------------------------------------
  3122. /\p{Xwd}+\p{Any} \p{Xwd}+\p{L&} \p{Xwd}+\p{L} \p{Xwd}+\p{Lu} \p{Xwd}+\p{Han} \p{Xwd}+\p{Xan} \p{Xwd}+\p{Xsp} \p{Xwd}+\p{Xps} \p{Xwd}+\p{Xwd} \p{Xwd}+\P{Xwd} \p{Xwd}+\p{Xuc}/Bx,ucp
  3123. ------------------------------------------------------------------
  3124. Bra
  3125. prop Xwd +
  3126. AllAny
  3127. prop Xwd +
  3128. prop Lc
  3129. prop Xwd +
  3130. prop L
  3131. prop Xwd +
  3132. prop Lu
  3133. prop Xwd +
  3134. prop Han
  3135. prop Xwd +
  3136. prop Xan
  3137. prop Xwd ++
  3138. prop Xsp
  3139. prop Xwd ++
  3140. prop Xps
  3141. prop Xwd +
  3142. prop Xwd
  3143. prop Xwd ++
  3144. notprop Xwd
  3145. prop Xwd +
  3146. prop Xuc
  3147. Ket
  3148. End
  3149. ------------------------------------------------------------------
  3150. /\p{Xuc}+\p{Any} \p{Xuc}+\p{L&} \p{Xuc}+\p{L} \p{Xuc}+\p{Lu} \p{Xuc}+\p{Han} \p{Xuc}+\p{Xan} \p{Xuc}+\p{Xsp} \p{Xuc}+\p{Xps} \p{Xwd}+\p{Xuc} \p{Xuc}+\p{Xuc} \p{Xuc}+\P{Xuc}/Bx,ucp
  3151. ------------------------------------------------------------------
  3152. Bra
  3153. prop Xuc +
  3154. AllAny
  3155. prop Xuc +
  3156. prop Lc
  3157. prop Xuc +
  3158. prop L
  3159. prop Xuc +
  3160. prop Lu
  3161. prop Xuc +
  3162. prop Han
  3163. prop Xuc +
  3164. prop Xan
  3165. prop Xuc +
  3166. prop Xsp
  3167. prop Xuc +
  3168. prop Xps
  3169. prop Xwd +
  3170. prop Xuc
  3171. prop Xuc +
  3172. prop Xuc
  3173. prop Xuc ++
  3174. notprop Xuc
  3175. Ket
  3176. End
  3177. ------------------------------------------------------------------
  3178. /\p{N}+\p{Ll} \p{N}+\p{Nd} \p{N}+\P{Nd}/Bx,ucp
  3179. ------------------------------------------------------------------
  3180. Bra
  3181. prop N ++
  3182. prop Ll
  3183. prop N +
  3184. prop Nd
  3185. prop N +
  3186. notprop Nd
  3187. Ket
  3188. End
  3189. ------------------------------------------------------------------
  3190. /\p{Xan}+\p{L} \p{Xan}+\p{N} \p{Xan}+\p{C} \p{Xan}+\P{L} \P{Xan}+\p{N} \p{Xan}+\P{C}/Bx,ucp
  3191. ------------------------------------------------------------------
  3192. Bra
  3193. prop Xan +
  3194. prop L
  3195. prop Xan +
  3196. prop N
  3197. prop Xan ++
  3198. prop C
  3199. prop Xan +
  3200. notprop L
  3201. notprop Xan ++
  3202. prop N
  3203. prop Xan +
  3204. notprop C
  3205. Ket
  3206. End
  3207. ------------------------------------------------------------------
  3208. /\p{L}+\p{Xan} \p{N}+\p{Xan} \p{C}+\p{Xan} \P{L}+\p{Xan} \p{N}+\p{Xan} \P{C}+\p{Xan} \p{L}+\P{Xan}/Bx,ucp
  3209. ------------------------------------------------------------------
  3210. Bra
  3211. prop L +
  3212. prop Xan
  3213. prop N +
  3214. prop Xan
  3215. prop C ++
  3216. prop Xan
  3217. notprop L +
  3218. prop Xan
  3219. prop N +
  3220. prop Xan
  3221. notprop C +
  3222. prop Xan
  3223. prop L ++
  3224. notprop Xan
  3225. Ket
  3226. End
  3227. ------------------------------------------------------------------
  3228. /\p{Xan}+\p{Lu} \p{Xan}+\p{Nd} \p{Xan}+\p{Cc} \p{Xan}+\P{Ll} \P{Xan}+\p{No} \p{Xan}+\P{Cf}/Bx,ucp
  3229. ------------------------------------------------------------------
  3230. Bra
  3231. prop Xan +
  3232. prop Lu
  3233. prop Xan +
  3234. prop Nd
  3235. prop Xan ++
  3236. prop Cc
  3237. prop Xan +
  3238. notprop Ll
  3239. notprop Xan ++
  3240. prop No
  3241. prop Xan +
  3242. notprop Cf
  3243. Ket
  3244. End
  3245. ------------------------------------------------------------------
  3246. /\p{Lu}+\p{Xan} \p{Nd}+\p{Xan} \p{Cs}+\p{Xan} \P{Lt}+\p{Xan} \p{Nl}+\p{Xan} \P{Cc}+\p{Xan} \p{Lt}+\P{Xan}/Bx,ucp
  3247. ------------------------------------------------------------------
  3248. Bra
  3249. prop Lu +
  3250. prop Xan
  3251. prop Nd +
  3252. prop Xan
  3253. prop Cs ++
  3254. prop Xan
  3255. notprop Lt +
  3256. prop Xan
  3257. prop Nl +
  3258. prop Xan
  3259. notprop Cc +
  3260. prop Xan
  3261. prop Lt ++
  3262. notprop Xan
  3263. Ket
  3264. End
  3265. ------------------------------------------------------------------
  3266. /\w+\p{P} \w+\p{Po} \w+\s \p{Xan}+\s \s+\p{Xan} \s+\w/Bx,ucp
  3267. ------------------------------------------------------------------
  3268. Bra
  3269. prop Xwd +
  3270. prop P
  3271. prop Xwd +
  3272. prop Po
  3273. prop Xwd ++
  3274. prop Xsp
  3275. prop Xan ++
  3276. prop Xsp
  3277. prop Xsp ++
  3278. prop Xan
  3279. prop Xsp ++
  3280. prop Xwd
  3281. Ket
  3282. End
  3283. ------------------------------------------------------------------
  3284. /\w+\P{P} \W+\p{Po} \w+\S \P{Xan}+\s \s+\P{Xan} \s+\W/Bx,ucp
  3285. ------------------------------------------------------------------
  3286. Bra
  3287. prop Xwd +
  3288. notprop P
  3289. notprop Xwd +
  3290. prop Po
  3291. prop Xwd +
  3292. notprop Xsp
  3293. notprop Xan +
  3294. prop Xsp
  3295. prop Xsp +
  3296. notprop Xan
  3297. prop Xsp +
  3298. notprop Xwd
  3299. Ket
  3300. End
  3301. ------------------------------------------------------------------
  3302. /\w+\p{Po} \w+\p{Pc} \W+\p{Po} \W+\p{Pc} \w+\P{Po} \w+\P{Pc}/Bx,ucp
  3303. ------------------------------------------------------------------
  3304. Bra
  3305. prop Xwd +
  3306. prop Po
  3307. prop Xwd ++
  3308. prop Pc
  3309. notprop Xwd +
  3310. prop Po
  3311. notprop Xwd +
  3312. prop Pc
  3313. prop Xwd +
  3314. notprop Po
  3315. prop Xwd +
  3316. notprop Pc
  3317. Ket
  3318. End
  3319. ------------------------------------------------------------------
  3320. /\p{Nl}+\p{Xan} \P{Nl}+\p{Xan} \p{Nl}+\P{Xan} \P{Nl}+\P{Xan}/Bx,ucp
  3321. ------------------------------------------------------------------
  3322. Bra
  3323. prop Nl +
  3324. prop Xan
  3325. notprop Nl +
  3326. prop Xan
  3327. prop Nl ++
  3328. notprop Xan
  3329. notprop Nl +
  3330. notprop Xan
  3331. Ket
  3332. End
  3333. ------------------------------------------------------------------
  3334. /\p{Xan}+\p{Nl} \P{Xan}+\p{Nl} \p{Xan}+\P{Nl} \P{Xan}+\P{Nl}/Bx,ucp
  3335. ------------------------------------------------------------------
  3336. Bra
  3337. prop Xan +
  3338. prop Nl
  3339. notprop Xan ++
  3340. prop Nl
  3341. prop Xan +
  3342. notprop Nl
  3343. notprop Xan +
  3344. notprop Nl
  3345. Ket
  3346. End
  3347. ------------------------------------------------------------------
  3348. /\p{Xan}+\p{Nd} \P{Xan}+\p{Nd} \p{Xan}+\P{Nd} \P{Xan}+\P{Nd}/Bx,ucp
  3349. ------------------------------------------------------------------
  3350. Bra
  3351. prop Xan +
  3352. prop Nd
  3353. notprop Xan ++
  3354. prop Nd
  3355. prop Xan +
  3356. notprop Nd
  3357. notprop Xan +
  3358. notprop Nd
  3359. Ket
  3360. End
  3361. ------------------------------------------------------------------
  3362. # End auto-possessification tests
  3363. /\w+/B,utf,ucp,auto_callout
  3364. ------------------------------------------------------------------
  3365. Bra
  3366. Callout 255 0 3
  3367. prop Xwd ++
  3368. Callout 255 3 0
  3369. Ket
  3370. End
  3371. ------------------------------------------------------------------
  3372. abcd
  3373. --->abcd
  3374. +0 ^ \w+
  3375. +3 ^ ^ End of pattern
  3376. 0: abcd
  3377. /[\p{N}]?+/B,no_auto_possess
  3378. ------------------------------------------------------------------
  3379. Bra
  3380. [\p{N}]?+
  3381. Ket
  3382. End
  3383. ------------------------------------------------------------------
  3384. /[\p{L}ab]{2,3}+/B,no_auto_possess
  3385. ------------------------------------------------------------------
  3386. Bra
  3387. [ab\p{L}]{2,3}+
  3388. Ket
  3389. End
  3390. ------------------------------------------------------------------
  3391. /\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx
  3392. ------------------------------------------------------------------
  3393. Bra
  3394. \D+
  3395. extuni
  3396. \d+
  3397. extuni
  3398. \S+
  3399. extuni
  3400. \s+
  3401. extuni
  3402. \W+
  3403. extuni
  3404. \w+
  3405. extuni
  3406. \R+
  3407. extuni
  3408. \H+
  3409. extuni
  3410. \h+
  3411. extuni
  3412. \V+
  3413. extuni
  3414. \v+
  3415. extuni
  3416. a+
  3417. extuni
  3418. \x0a+
  3419. extuni
  3420. Any+
  3421. extuni
  3422. Ket
  3423. End
  3424. ------------------------------------------------------------------
  3425. /.+\X/Bsx
  3426. ------------------------------------------------------------------
  3427. Bra
  3428. AllAny+
  3429. extuni
  3430. Ket
  3431. End
  3432. ------------------------------------------------------------------
  3433. /\X+$/Bmx
  3434. ------------------------------------------------------------------
  3435. Bra
  3436. extuni+
  3437. /m $
  3438. Ket
  3439. End
  3440. ------------------------------------------------------------------
  3441. /\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx
  3442. ------------------------------------------------------------------
  3443. Bra
  3444. extuni+
  3445. \D
  3446. extuni+
  3447. \d
  3448. extuni+
  3449. \S
  3450. extuni+
  3451. \s
  3452. extuni+
  3453. \W
  3454. extuni+
  3455. \w
  3456. extuni+
  3457. Any
  3458. extuni+
  3459. \R
  3460. extuni+
  3461. \H
  3462. extuni+
  3463. \h
  3464. extuni+
  3465. \V
  3466. extuni+
  3467. \v
  3468. extuni+
  3469. extuni
  3470. extuni+
  3471. \Z
  3472. extuni++
  3473. \z
  3474. extuni+
  3475. $
  3476. Ket
  3477. End
  3478. ------------------------------------------------------------------
  3479. /\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp
  3480. ------------------------------------------------------------------
  3481. Bra
  3482. prop Nd ++
  3483. prop Xsp {0,5}+
  3484. =
  3485. prop Xsp *+
  3486. notprop Xsp ?
  3487. =
  3488. prop Xwd {0,4}+
  3489. notprop Xwd *+
  3490. Ket
  3491. End
  3492. ------------------------------------------------------------------
  3493. /[RST]+/Bi,utf,ucp
  3494. ------------------------------------------------------------------
  3495. Bra
  3496. [R-Tr-t\x{17f}]++
  3497. Ket
  3498. End
  3499. ------------------------------------------------------------------
  3500. /[R-T]+/Bi,utf,ucp
  3501. ------------------------------------------------------------------
  3502. Bra
  3503. [R-Tr-t\x{17f}]++
  3504. Ket
  3505. End
  3506. ------------------------------------------------------------------
  3507. /[Q-U]+/Bi,utf,ucp
  3508. ------------------------------------------------------------------
  3509. Bra
  3510. [Q-Uq-u\x{17f}]++
  3511. Ket
  3512. End
  3513. ------------------------------------------------------------------
  3514. /^s?c/Iim,utf
  3515. Capture group count = 0
  3516. Options: caseless multiline utf
  3517. First code unit at start or follows newline
  3518. Last code unit = 'c' (caseless)
  3519. Subject length lower bound = 1
  3520. scat
  3521. 0: sc
  3522. /\X?abc/utf,no_start_optimize
  3523. \xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06
  3524. 0: A\x{300}abc
  3525. /\x{100}\x{200}\K\x{300}/utf,startchar
  3526. \x{100}\x{200}\x{300}
  3527. 0: \x{100}\x{200}\x{300}
  3528. ^^^^^^^^^^^^^^
  3529. # Test UTF characters in a substitution
  3530. /ábc/utf,replace=XሴZ
  3531. 123ábc123
  3532. 1: 123X\x{1234}Z123
  3533. /(?<=abc)(|def)/g,utf,replace=<$0>
  3534. 123abcáyzabcdef789abcሴqr
  3535. 4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr
  3536. /[A-`]/iB,utf
  3537. ------------------------------------------------------------------
  3538. Bra
  3539. [A-z\x{212a}\x{17f}]
  3540. Ket
  3541. End
  3542. ------------------------------------------------------------------
  3543. abcdefghijklmno
  3544. 0: a
  3545. /(?<=\K\x{17f})/g,utf,aftertext,allow_lookaround_bsk
  3546. \x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
  3547. 0: \x{17f}
  3548. 0+ \x{17f}\x{17f}\x{17f}\x{17f}
  3549. 0: \x{17f}
  3550. 0+ \x{17f}\x{17f}\x{17f}
  3551. 0: \x{17f}
  3552. 0+ \x{17f}\x{17f}
  3553. 0: \x{17f}
  3554. 0+ \x{17f}
  3555. 0: \x{17f}
  3556. 0+
  3557. /(?<=\K\x{17f})/altglobal,utf,aftertext,allow_lookaround_bsk
  3558. \x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
  3559. 0: \x{17f}
  3560. 0+ \x{17f}\x{17f}\x{17f}\x{17f}
  3561. 0: \x{17f}
  3562. 0+ \x{17f}\x{17f}\x{17f}
  3563. 0: \x{17f}
  3564. 0+ \x{17f}\x{17f}
  3565. 0: \x{17f}
  3566. 0+ \x{17f}
  3567. 0: \x{17f}
  3568. 0+
  3569. "\xa\xf<(.\pZ*\P{Xwd}+^\xa8\3'3yq.::?(?J:()\xd1+!~:3'(8?:)':(?'d'(?'d'^u]!.+.+\\A\Ah(n+?9){7}+\K;(?'X'u'(?'c'(?'z'(?<y>\xb::\xf0'|\xd3(\xae?'w(z\x8?P>l)\x8?P>a)'\H\R\xd1+!!~:3'(?:h$N{26875}\W+?\\=D{2}\x89(?i:Uy0\N({2\xa(\v\x85*){y*\A(()\p{L}+?\P{^Xan}'+?\xff\+pS\?|).{;y*\A(()\p{L}+?\8}\d?1(|)(/1){7}.+[Lp{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(\xbf(R))\x8a\X*?\x8a\xb\xd1^9\3*+(\xc1,\k'R'\xb4)\xcc(z\z(?J)(?'X'\x1b(\xb\xd1^9\?'3*+P{^Xan}+?\xff\+(\xc1.]k+\xb'Pm'\xb4)\xcc4f\xa7'\xd1V(?i:U,{2,2})'(?'X'))?-%--\x95$9*\4'|\xd1(\x9c''%\x94$9)#(?'R')3\x7?('P\xed7'\xa8\xb1^u\xeaw\1\0\0\(|(?1){7}.+[\p{Me}].\s\xdcC*^\x14?(?(<y>))(?<!^)$C((;*?(R*?))+(?(R)\x8a\X*?\x8a\xb\xd1^9\3*+|(\xc1,\k'R'\xb4)\xcc! z)\z(?JJ)(?'X';(\xb\xd1^9\?'3*+(\xc1.]k+\xb'Pm'\xb4))':(?'d')(?'RD'(d')|)|$)'|(?<x>\g{d});\g{x}\x11\g{d}\x81\|$((?'X'\'X'(?'W''\x92()'9'\x83*))\xba*\!?^ <){)':;\xcc4'\xd1'(?'X'28))?-%--\x95$9*\4'|\xd1((''e\x94*$9:)*#(?'R')3)\x7?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+0!~:(?)'d'E:yD!\s(?'R'\x1e;\x10:U))|'\x9g!\xb0*){)\\x16:;()\x1e\x10\x87*:(?<y>)\xd1+!~:(?)'}'\d'E:yD!\s(?'R'\x1e;\x10:U))|'))|)g!\xb0*R+9{29+)#(?'P'})*?pS\{3,}\x85,{0,}l{*UTF)(\xe{7}){3722,{9,}d{2,?|))|{)\(A?&d}}{\xa,}2}){3,}7,l{)22}(,}l:7{2,4}}29\x19+)#?'P'})*v?))\x5"
  3570. Failed: error 122 at offset 1227: unmatched closing parenthesis
  3571. /$(&.+[\p{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(?(R)){0,6}?|){12\x8a\X*?\x8a\x0b\xd1^9\3*+(\xc1,\k'P'\xb4)\xcc(z\z(?JJ)(?'X'8};(\x0b\xd1^9\?'3*+(\xc1.]k+\x0b'Pm'\xb4\xcc4'\xd1'(?'X'))?-%--\x95$9*\4'|\xd1(''%\x95*$9)#(?'R')3\x07?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/
  3572. "(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'"
  3573. Failed: error 162 at offset 113: subpattern name expected
  3574. /[\pS#moq]/
  3575. =
  3576. 0: =
  3577. /(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark
  3578. cxxxz
  3579. 0: xxx
  3580. MK: a\x{12345}b\x{09}(d)c
  3581. /abcd/utf,replace=x\x{824}y\o{3333}z(\Q12\$34$$\x34\E5$$),substitute_extended
  3582. abcd
  3583. 1: x\x{824}y\x{6db}z(12\$34$$\x345$)
  3584. /a(\x{e0}\x{101})(\x{c0}\x{102})/utf,replace=a\u$1\U$1\E$1\l$2\L$2\Eab\U\x{e0}\x{101}\L\x{d0}\x{160}\EDone,substitute_extended
  3585. a\x{e0}\x{101}\x{c0}\x{102}
  3586. 1: a\x{c0}\x{101}\x{c0}\x{100}\x{e0}\x{101}\x{e0}\x{102}\x{e0}\x{103}ab\x{c0}\x{100}\x{f0}\x{161}Done
  3587. /((?<digit>\d)|(?<letter>\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}>
  3588. ab12cde
  3589. 7: <not digit; letter><not digit; letter><digit; not a letter><digit; not a letter><not digit; letter><not digit; letter><not digit; letter>
  3590. /(*UCP)(*UTF)[[:>:]]X/B
  3591. ------------------------------------------------------------------
  3592. Bra
  3593. \b (ucp)
  3594. Assert back
  3595. Reverse
  3596. prop Xwd
  3597. Ket
  3598. X
  3599. Ket
  3600. End
  3601. ------------------------------------------------------------------
  3602. /abc/utf,replace=xyz
  3603. abc\=zero_terminate
  3604. 1: xyz
  3605. /a[[:punct:]b]/ucp,bincode
  3606. ------------------------------------------------------------------
  3607. Bra
  3608. a
  3609. [b[:punct:]]
  3610. Ket
  3611. End
  3612. ------------------------------------------------------------------
  3613. /a[[:punct:]b]/utf,ucp,bincode
  3614. ------------------------------------------------------------------
  3615. Bra
  3616. a
  3617. [b[:punct:]]
  3618. Ket
  3619. End
  3620. ------------------------------------------------------------------
  3621. /a[b[:punct:]]/utf,ucp,bincode
  3622. ------------------------------------------------------------------
  3623. Bra
  3624. a
  3625. [b[:punct:]]
  3626. Ket
  3627. End
  3628. ------------------------------------------------------------------
  3629. /[[:^ascii:]]/utf,ucp,bincode
  3630. ------------------------------------------------------------------
  3631. Bra
  3632. [\x80-\xff] (neg)
  3633. Ket
  3634. End
  3635. ------------------------------------------------------------------
  3636. /[[:^ascii:]\w]/utf,ucp,bincode
  3637. ------------------------------------------------------------------
  3638. Bra
  3639. [\x80-\xff\p{Xwd}\x{100}-\x{10ffff}]
  3640. Ket
  3641. End
  3642. ------------------------------------------------------------------
  3643. /[\w[:^ascii:]]/utf,ucp,bincode
  3644. ------------------------------------------------------------------
  3645. Bra
  3646. [\x80-\xff\p{Xwd}\x{100}-\x{10ffff}]
  3647. Ket
  3648. End
  3649. ------------------------------------------------------------------
  3650. /[^[:ascii:]\W]/utf,ucp,bincode
  3651. ------------------------------------------------------------------
  3652. Bra
  3653. [^\x00-\x7f\P{Xwd}]
  3654. Ket
  3655. End
  3656. ------------------------------------------------------------------
  3657. \x{de}
  3658. 0: \x{de}
  3659. \x{200}
  3660. 0: \x{200}
  3661. \= Expect no match
  3662. \x{589}
  3663. No match
  3664. \x{37e}
  3665. No match
  3666. /[[:^ascii:]a]/utf,ucp,bincode
  3667. ------------------------------------------------------------------
  3668. Bra
  3669. [a\x80-\xff] (neg)
  3670. Ket
  3671. End
  3672. ------------------------------------------------------------------
  3673. /L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout
  3674. ------------------------------------------------------------------
  3675. Bra
  3676. Callout 255 0 14
  3677. L?
  3678. Callout 255 14 0
  3679. Ket
  3680. End
  3681. ------------------------------------------------------------------
  3682. /L(?#(|++<!(2)?/B,utf,ucp,auto_callout
  3683. ------------------------------------------------------------------
  3684. Bra
  3685. Callout 255 0 14
  3686. L?+
  3687. Callout 255 14 0
  3688. Ket
  3689. End
  3690. ------------------------------------------------------------------
  3691. /(*UTF)C\x09((?<!'(?x)!*H? #\xcc\x9a[^$]/
  3692. Failed: error 114 at offset 39: missing closing parenthesis
  3693. /[\D]/utf
  3694. \x{1d7cf}
  3695. 0: \x{1d7cf}
  3696. /[\D\P{Nd}]/utf
  3697. \x{1d7cf}
  3698. 0: \x{1d7cf}
  3699. /[^\D]/utf
  3700. a9b
  3701. 0: 9
  3702. \= Expect no match
  3703. \x{1d7cf}
  3704. No match
  3705. /[^\D\P{Nd}]/utf
  3706. a9b
  3707. 0: 9
  3708. \x{1d7cf}
  3709. 0: \x{1d7cf}
  3710. \= Expect no match
  3711. \x{10000}
  3712. No match
  3713. # Hex uses pattern length, not zero-terminated. This tests for overrunning
  3714. # the given length of a pattern.
  3715. /'(*UTF)'/hex
  3716. /'#('/hex,extended,utf
  3717. /a(?<=A\XB)/utf
  3718. Failed: error 125 at offset 1: length of lookbehind assertion is not limited
  3719. /../utf,auto_callout
  3720. \n\x{123}\x{123}\x{123}\x{123}
  3721. --->\x{0a}\x{123}\x{123}\x{123}\x{123}
  3722. +0 ^ .
  3723. +0 ^ .
  3724. +1 ^ ^ .
  3725. +2 ^ ^ End of pattern
  3726. 0: \x{123}\x{123}
  3727. # This tests processing wide characters in extended mode.
  3728. /XȀ/x,utf
  3729. # These three test a bug fix that was not clearing up after a locale setting
  3730. # when the test or a subsequent one matched a wide character.
  3731. //locale=C
  3732. /[\P{Yi}]/utf
  3733. \x{2f000}
  3734. 0: \x{2f000}
  3735. /[\P{Yi}]/utf,locale=C
  3736. \x{2f000}
  3737. 0: \x{2f000}
  3738. /^(?<!(?=􃡜))/B,utf
  3739. ------------------------------------------------------------------
  3740. Bra
  3741. ^
  3742. Assert back not
  3743. Assert
  3744. \x{10385c}
  3745. Ket
  3746. Ket
  3747. Ket
  3748. End
  3749. ------------------------------------------------------------------
  3750. # Horizontal and vertical space lists ignore caseless
  3751. /[\HH]/Bi,utf
  3752. ------------------------------------------------------------------
  3753. Bra
  3754. [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}]
  3755. Ket
  3756. End
  3757. ------------------------------------------------------------------
  3758. /[^\HH]/Bi,utf
  3759. ------------------------------------------------------------------
  3760. Bra
  3761. [^\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}]
  3762. Ket
  3763. End
  3764. ------------------------------------------------------------------
  3765. //g,utf
  3766. \=zero_terminate
  3767. 0:
  3768. /^(?1)\p{Nd}{3}(a)/
  3769. a123a
  3770. 0: a123a
  3771. 1: a
  3772. /\p{Nd}{0,3}[\pL](*:abc)(?C1)xxx/callout_info
  3773. Callout 1 x
  3774. # ---------------------------------------------------------------------------
  3775. # A bunch of tests that hit lines of code that others do not (at least when
  3776. # these were created).
  3777. /^[^a]{3,}?x/i,utf,no_start_optimize,no_auto_possess
  3778. \= Expect no match
  3779. bbb
  3780. No match
  3781. cc
  3782. No match
  3783. /^[ac]{3,}?x/i,utf,no_start_optimize,no_auto_possess
  3784. \= Expect no match
  3785. aaa\x{100}
  3786. No match
  3787. /^X\X/no_start_optimize,no_auto_possess
  3788. \= Expect no match
  3789. X
  3790. No match
  3791. /^X\p{L&}+?/no_start_optimize,no_auto_possess
  3792. \= Expect no match
  3793. X
  3794. No match
  3795. /^X\p{L}+?/no_start_optimize,no_auto_possess
  3796. \= Expect no match
  3797. X
  3798. No match
  3799. /^X\p{Lu}+?/no_start_optimize,no_auto_possess
  3800. \= Expect no match
  3801. X
  3802. No match
  3803. /^X\p{Arabic}+?/no_start_optimize,no_auto_possess
  3804. \= Expect no match
  3805. X
  3806. No match
  3807. /^X\p{Xan}+?/ucp,no_start_optimize,no_auto_possess
  3808. \= Expect no match
  3809. X
  3810. No match
  3811. /^X\s+?/ucp,no_start_optimize,no_auto_possess
  3812. \= Expect no match
  3813. X
  3814. No match
  3815. XX
  3816. No match
  3817. /^X\S+?/ucp,no_start_optimize,no_auto_possess
  3818. XX
  3819. 0: XX
  3820. \= Expect no match
  3821. X
  3822. No match
  3823. /^X\w+?/ucp,no_start_optimize,no_auto_possess
  3824. \= Expect no match
  3825. X
  3826. No match
  3827. /^X[^\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess
  3828. \= Expect no match
  3829. X
  3830. No match
  3831. /^X[\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess
  3832. \= Expect no match
  3833. X
  3834. No match
  3835. /^X\p{Xuc}+?/utf,no_start_optimize,no_auto_possess
  3836. \= Expect no match
  3837. X
  3838. No match
  3839. /^X.+?Z/s,utf,no_start_optimize,no_auto_possess
  3840. \= Expect no match
  3841. X
  3842. No match
  3843. /^X\R+?/utf,no_start_optimize,no_auto_possess
  3844. \= Expect no match
  3845. X
  3846. No match
  3847. /^X\H+?/utf,no_start_optimize,no_auto_possess
  3848. \= Expect no match
  3849. X
  3850. No match
  3851. /^X\V+?/utf,no_start_optimize,no_auto_possess
  3852. \= Expect no match
  3853. X
  3854. No match
  3855. /^X\s+?/utf,no_start_optimize,no_auto_possess
  3856. \= Expect no match
  3857. X
  3858. No match
  3859. XX
  3860. No match
  3861. /^X\S+?/utf,no_start_optimize,no_auto_possess
  3862. \= Expect no match
  3863. X
  3864. No match
  3865. /^X\p{Any}{1,3}?Z/s,no_start_optimize,no_auto_possess
  3866. XYYYZ
  3867. 0: XYYYZ
  3868. \= Expect no match
  3869. XY
  3870. No match
  3871. XYY
  3872. No match
  3873. XYYY
  3874. No match
  3875. XYYYYZ
  3876. No match
  3877. /^X\p{L&}{1,3}?Z/s,no_start_optimize,no_auto_possess
  3878. \= Expect no match
  3879. XY
  3880. No match
  3881. XY!
  3882. No match
  3883. /^X\p{L}{1,3}?Z/s,no_start_optimize,no_auto_possess
  3884. \= Expect no match
  3885. XY
  3886. No match
  3887. XY!
  3888. No match
  3889. /^X\p{Lu}{1,3}?Z/s,no_start_optimize,no_auto_possess
  3890. \= Expect no match
  3891. XY
  3892. No match
  3893. XY!
  3894. No match
  3895. /^X\P{Han}{1,3}?Z/s,utf,no_start_optimize,no_auto_possess
  3896. \= Expect no match
  3897. XY
  3898. No match
  3899. XY!
  3900. No match
  3901. XY\x{2f00}!
  3902. No match
  3903. /^X\p{Xan}{1,3}?Z/s,no_start_optimize,no_auto_possess
  3904. \= Expect no match
  3905. XY
  3906. No match
  3907. XY!
  3908. No match
  3909. /^X\p{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess
  3910. \= Expect no match
  3911. X\n
  3912. No match
  3913. X\n!
  3914. No match
  3915. X\n\n!
  3916. No match
  3917. /^X\P{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess
  3918. \= Expect no match
  3919. XYY\n
  3920. No match
  3921. /^X\p{Xwd}{1,3}?Z/s,no_start_optimize,no_auto_possess
  3922. \= Expect no match
  3923. XY
  3924. No match
  3925. XY!
  3926. No match
  3927. XYY!
  3928. No match
  3929. /^X\x{b5}+?Z/i,utf,no_start_optimize,no_auto_possess
  3930. \= Expect no match
  3931. X
  3932. No match
  3933. X\x{b5}
  3934. No match
  3935. X\x{b5}\x{b5}Y
  3936. No match
  3937. /^X\p{Xuc}+?Z/utf,no_start_optimize,no_auto_possess
  3938. \= Expect no match
  3939. X
  3940. No match
  3941. X$
  3942. No match
  3943. X@@Y
  3944. No match
  3945. /(*CRLF)^X.+?Z/utf,no_start_optimize,no_auto_possess
  3946. \= Expect partial match
  3947. XYY\r\=ph
  3948. Partial match: XYY\x{0d}
  3949. \= Expect no match
  3950. X
  3951. No match
  3952. /^X.+?Z/s,utf,no_start_optimize,no_auto_possess
  3953. \= Expect no match
  3954. X
  3955. No match
  3956. XYY
  3957. No match
  3958. /^X\R+?Z/utf,no_start_optimize,no_auto_possess
  3959. \= Expect no match
  3960. X\nX
  3961. No match
  3962. X\n\rX
  3963. No match
  3964. X\n\r\nX
  3965. No match
  3966. X\n\n
  3967. No match
  3968. X\n\x{0c}
  3969. No match
  3970. /(*BSR_ANYCRLF)^X\R+?Z/utf,no_start_optimize,no_auto_possess
  3971. \= Expect no match
  3972. X\nX
  3973. No match
  3974. X\n\rX
  3975. No match
  3976. X\n\r\nX
  3977. No match
  3978. X\n\n
  3979. No match
  3980. X\n\x{0c}
  3981. No match
  3982. /^X\H+?Z/utf,no_start_optimize,no_auto_possess
  3983. \= Expect no match
  3984. XY\t
  3985. No match
  3986. XYY
  3987. No match
  3988. /^X\h+?Z/utf,no_start_optimize,no_auto_possess
  3989. \= Expect no match
  3990. X\t\t
  3991. No match
  3992. X\tY
  3993. No match
  3994. /^X\V+?Z/utf,no_start_optimize,no_auto_possess
  3995. \= Expect no match
  3996. XY\n
  3997. No match
  3998. XYY
  3999. No match
  4000. /^X\v+?Z/utf,no_start_optimize,no_auto_possess
  4001. \= Expect no match
  4002. X\n\n
  4003. No match
  4004. X\nY
  4005. No match
  4006. /^X\D+?Z/utf,no_start_optimize,no_auto_possess
  4007. \= Expect no match
  4008. XY9
  4009. No match
  4010. XYY
  4011. No match
  4012. /^X\d+?Z/utf,no_start_optimize,no_auto_possess
  4013. \= Expect no match
  4014. X99
  4015. No match
  4016. X9Y
  4017. No match
  4018. /^X\S+?Z/utf,no_start_optimize,no_auto_possess
  4019. \= Expect no match
  4020. XY\n
  4021. No match
  4022. XYY
  4023. No match
  4024. /^X\s+?Z/utf,no_start_optimize,no_auto_possess
  4025. \= Expect no match
  4026. X\n\n
  4027. No match
  4028. X\nY
  4029. No match
  4030. /^X\W+?Z/utf,no_start_optimize,no_auto_possess
  4031. \= Expect no match
  4032. X.A
  4033. No match
  4034. X++
  4035. No match
  4036. /^X\p{L&}{1,3}Z/no_start_optimize,no_auto_possess
  4037. \= Expect no match
  4038. XY
  4039. No match
  4040. XY!
  4041. No match
  4042. /^X\p{L}{1,3}Z/no_start_optimize,no_auto_possess
  4043. \= Expect no match
  4044. XY
  4045. No match
  4046. /^X\p{Xan}{1,3}Z/no_start_optimize,no_auto_possess
  4047. \= Expect no match
  4048. XY
  4049. No match
  4050. /^X\P{Xsp}{1,3}Z/no_start_optimize,no_auto_possess
  4051. \= Expect no match
  4052. XYY
  4053. No match
  4054. /^X\p{Xuc}+Z/utf,no_start_optimize,no_auto_possess
  4055. \= Expect no match
  4056. X$
  4057. No match
  4058. # ----------------------------------------------------------------------
  4059. # These test the dangerous PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL option.
  4060. /\x{d800}/B,utf,bad_escape_is_literal
  4061. ------------------------------------------------------------------
  4062. Bra
  4063. x{d800}
  4064. Ket
  4065. End
  4066. ------------------------------------------------------------------
  4067. /\ud800/B,utf,alt_bsux,bad_escape_is_literal
  4068. ------------------------------------------------------------------
  4069. Bra
  4070. ud800
  4071. Ket
  4072. End
  4073. ------------------------------------------------------------------
  4074. # ----------------------------------------------------------------------
  4075. /Aሴ+B/literal,utf,no_utf_check
  4076. Aሴ+B
  4077. 0: A\x{1234}+B
  4078. # These are here because I upgraded to Unicode 10.0.0 before Perl did, so it
  4079. # doesn't recognize all these scripts. In time these three tests can be moved
  4080. # to test 4.
  4081. /^(\p{Adlam}+)(\p{Bhaiksuki}+)(\p{Marchen}+)(\p{Newa}+)(\p{Osage}+)
  4082. (\p{Tangut}+)(\p{Masaram_Gondi}+)(\p{Nushu}+)(\p{Soyombo}+)
  4083. (\p{Zanabazar_Square}+)/x,utf
  4084. \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}
  4085. 0: \x{1e900}\x{1e924}\x{1e953}\x{11c00}\x{11c2d}\x{11c3e}\x{11c70}\x{11c77}\x{11cab}\x{11400}\x{1142f}\x{11455}\x{104b0}\x{104d8}\x{104fb}\x{16fe0}\x{18800}\x{18af2}\x{11d00}\x{11d3a}\x{11d59}\x{16fe1}\x{1b170}\x{1b2fb}\x{11a50}\x{11a58}\x{11aa2}\x{11a00}\x{11a07}\x{11a47}
  4086. 1: \x{1e900}\x{1e924}\x{1e953}
  4087. 2: \x{11c00}\x{11c2d}\x{11c3e}
  4088. 3: \x{11c70}\x{11c77}\x{11cab}
  4089. 4: \x{11400}\x{1142f}\x{11455}
  4090. 5: \x{104b0}\x{104d8}\x{104fb}
  4091. 6: \x{16fe0}\x{18800}\x{18af2}
  4092. 7: \x{11d00}\x{11d3a}\x{11d59}
  4093. 8: \x{16fe1}\x{1b170}\x{1b2fb}
  4094. 9: \x{11a50}\x{11a58}\x{11aa2}
  4095. 10: \x{11a00}\x{11a07}\x{11a47}
  4096. /^\x{1E900}\x{104B0}/i,utf
  4097. \x{1E900}\x{104B0}
  4098. 0: \x{1e900}\x{104b0}
  4099. \x{1E922}\x{104D8}
  4100. 0: \x{1e922}\x{104d8}
  4101. /^(?:(\X)(?C))+$/utf
  4102. \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}\=callout_capture,callout_no_where
  4103. Callout 0: last capture = 1
  4104. 1: \x{1e900}
  4105. Callout 0: last capture = 1
  4106. 1: \x{1e924}
  4107. Callout 0: last capture = 1
  4108. 1: \x{1e953}
  4109. Callout 0: last capture = 1
  4110. 1: \x{11c00}
  4111. Callout 0: last capture = 1
  4112. 1: \x{11c2d}\x{11c3e}
  4113. Callout 0: last capture = 1
  4114. 1: \x{11c70}
  4115. Callout 0: last capture = 1
  4116. 1: \x{11c77}\x{11cab}
  4117. Callout 0: last capture = 1
  4118. 1: \x{11400}
  4119. Callout 0: last capture = 1
  4120. 1: \x{1142f}
  4121. Callout 0: last capture = 1
  4122. 1: \x{11455}
  4123. Callout 0: last capture = 1
  4124. 1: \x{104b0}
  4125. Callout 0: last capture = 1
  4126. 1: \x{104d8}
  4127. Callout 0: last capture = 1
  4128. 1: \x{104fb}
  4129. Callout 0: last capture = 1
  4130. 1: \x{16fe0}
  4131. Callout 0: last capture = 1
  4132. 1: \x{18800}
  4133. Callout 0: last capture = 1
  4134. 1: \x{18af2}
  4135. Callout 0: last capture = 1
  4136. 1: \x{11d00}\x{11d3a}
  4137. Callout 0: last capture = 1
  4138. 1: \x{11d59}
  4139. Callout 0: last capture = 1
  4140. 1: \x{16fe1}
  4141. Callout 0: last capture = 1
  4142. 1: \x{1b170}
  4143. Callout 0: last capture = 1
  4144. 1: \x{1b2fb}
  4145. Callout 0: last capture = 1
  4146. 1: \x{11a50}\x{11a58}
  4147. Callout 0: last capture = 1
  4148. 1: \x{11aa2}
  4149. Callout 0: last capture = 1
  4150. 1: \x{11a00}\x{11a07}\x{11a47}
  4151. 0: \x{1e900}\x{1e924}\x{1e953}\x{11c00}\x{11c2d}\x{11c3e}\x{11c70}\x{11c77}\x{11cab}\x{11400}\x{1142f}\x{11455}\x{104b0}\x{104d8}\x{104fb}\x{16fe0}\x{18800}\x{18af2}\x{11d00}\x{11d3a}\x{11d59}\x{16fe1}\x{1b170}\x{1b2fb}\x{11a50}\x{11a58}\x{11aa2}\x{11a00}\x{11a07}\x{11a47}
  4152. 1: \x{11a00}\x{11a07}\x{11a47}
  4153. # Similarly for Unicode 11.0.0
  4154. /^(\p{Dogra}+)(\p{Gunjala_Gondi}+)(\p{Hanifi_Rohingya}+)(\p{Makasar}+)
  4155. (\p{Medefaidrin}+)(\p{Old_Sogdian}+)(\p{Sogdian}+)/x,utf
  4156. \x{11800}\x{11da9}\x{10d27}\x{11ee0}\x{16e48}\x{10f27}\x{10f30}
  4157. 0: \x{11800}\x{11da9}\x{10d27}\x{11ee0}\x{16e48}\x{10f27}\x{10f30}
  4158. 1: \x{11800}
  4159. 2: \x{11da9}
  4160. 3: \x{10d27}
  4161. 4: \x{11ee0}
  4162. 5: \x{16e48}
  4163. 6: \x{10f27}
  4164. 7: \x{10f30}
  4165. # Regional indicators
  4166. /^(\X)(\X)/utf,aftertext
  4167. \x{1F1E6}\x{1F1E7}\x{1F1E7}B
  4168. 0: \x{1f1e6}\x{1f1e7}\x{1f1e7}
  4169. 0+ B
  4170. 1: \x{1f1e6}\x{1f1e7}
  4171. 2: \x{1f1e7}
  4172. \x{1F1E6}\x{1F1E7}\x{1F1E7}\x{1F1E6}B
  4173. 0: \x{1f1e6}\x{1f1e7}\x{1f1e7}\x{1f1e6}
  4174. 0+ B
  4175. 1: \x{1f1e6}\x{1f1e7}
  4176. 2: \x{1f1e7}\x{1f1e6}
  4177. # More differences from Perl
  4178. /^\p{Common}/utf
  4179. \x{60c}
  4180. 0: \x{60c}
  4181. \x{61f}
  4182. 0: \x{61f}
  4183. \x{964}
  4184. 0: \x{964}
  4185. \x{965}
  4186. 0: \x{965}
  4187. /^\p{Inherited}/utf
  4188. \x{64b}
  4189. 0: \x{64b}
  4190. \x{654}
  4191. 0: \x{654}
  4192. \x{655}
  4193. 0: \x{655}
  4194. \x{1D1AA}
  4195. 0: \x{1d1aa}
  4196. /\N{U+}/
  4197. Failed: error 193 at offset 2: \N{U+dddd} is supported only in Unicode (UTF) mode
  4198. /\N{U+}/utf
  4199. Failed: error 178 at offset 5: digits missing in \x{} or \o{} or \N{U+}
  4200. /\N{U}/
  4201. Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u
  4202. # This tests the non-UTF Unicode NEL pattern whitespace character, only
  4203. # recognized by PCRE2 with /x when there is Unicode support.
  4204. /A
  4205. �B/x
  4206. AB
  4207. 0: AB
  4208. # This tests Unicode Pattern White Space characters in verb names when they
  4209. # are being processed with PCRE2_EXTENDED. Note: there are UTF-8 characters
  4210. # with code points greater than 255 between A, B, and C in the pattern.
  4211. /(*: A‎B
C)abc/x,utf,mark,alt_verbnames
  4212. abc
  4213. 0: abc
  4214. MK: ABC
  4215. # Script run tests: auto-possessification
  4216. /^(*sr:.*)/B,utf
  4217. ------------------------------------------------------------------
  4218. Bra
  4219. ^
  4220. Script run
  4221. Any*
  4222. Ket
  4223. Ket
  4224. End
  4225. ------------------------------------------------------------------
  4226. paypаl.com A classic example of why script run checks are a good thing
  4227. 0: payp
  4228. /^(*sr:.*(*ACCEPT))/utf
  4229. paypаl.com But *ACCEPT breaks things
  4230. 0: payp\x{430}l.com But *ACCEPT breaks things
  4231. /^(*sr:\x{2e80}*)/B,utf
  4232. ------------------------------------------------------------------
  4233. Bra
  4234. ^
  4235. Script run
  4236. \x{2e80}*+
  4237. Ket
  4238. Ket
  4239. End
  4240. ------------------------------------------------------------------
  4241. /^(*sr:\x{2e80}*)\x{2e80}/B,utf
  4242. ------------------------------------------------------------------
  4243. Bra
  4244. ^
  4245. Script run
  4246. \x{2e80}*
  4247. Ket
  4248. \x{2e80}
  4249. Ket
  4250. End
  4251. ------------------------------------------------------------------
  4252. /(?<!)(*sr:)/B
  4253. ------------------------------------------------------------------
  4254. Bra
  4255. Assert back not
  4256. Ket
  4257. Script run
  4258. Ket
  4259. Ket
  4260. End
  4261. ------------------------------------------------------------------
  4262. /(?<=abc(?=X(*sr:BXY)CCC)XBXYCCC)./B
  4263. ------------------------------------------------------------------
  4264. Bra
  4265. Assert back
  4266. Reverse
  4267. abc
  4268. Assert
  4269. X
  4270. Script run
  4271. BXY
  4272. Ket
  4273. CCC
  4274. Ket
  4275. XBXYCCC
  4276. Ket
  4277. Any
  4278. Ket
  4279. End
  4280. ------------------------------------------------------------------
  4281. abcXBXYCCC!
  4282. 0: !
  4283. # Some script run patterns are broken in Perl 5.28.0. These can be moved into
  4284. # test 4 when a mended version of Perl is released.
  4285. /^(*sr:.{4})/utf
  4286. \x{0980}12\x{0993} Bengali Common-digits Bengali
  4287. 0: \x{980}12\x{993}
  4288. \x{0780}12\x{07b1} Thaana Common-digits Thaana
  4289. 0: \x{780}12\x{7b1}
  4290. \x{0e01}12\x{0e5b} Thai Common-digits Thai
  4291. 0: \x{e01}12\x{e5b}
  4292. \x{1780}12\x{19ff} Khmer Common-digits Khmer
  4293. 0: \x{1780}12\x{19ff}
  4294. \x{0904}12\x{0939} Devanagari Common-digits Devanagari
  4295. 0: \x{904}12\x{939}
  4296. A\x{ff10}\x{ff19}B Latin Common-notascii-digits Latin
  4297. 0: A\x{ff10}\x{ff19}B
  4298. A\x{1d7ce}\x{1d7cf}B Latin fancy-common-digits Latin
  4299. 0: A\x{1d7ce}\x{1d7cf}B
  4300. # These ones involve non-ASCII but nevertheless Common digits. As of October
  4301. # 2018 even blead Perl wasn't handling all of these - but is going to.
  4302. /^(*sr:.{4})/utf
  4303. A\x{ff10}\x{ff19}B Latin Common-notascii-digits Latin
  4304. 0: A\x{ff10}\x{ff19}B
  4305. \x{ff10}\x{ff19}.. Common-notascii-digits Common Common
  4306. 0: \x{ff10}\x{ff19}..
  4307. A\x{ff10}BC Latin Common-notascii-digit Latin Latin
  4308. 0: A\x{ff10}BC
  4309. A\x{1d7ce}\x{1d7cf}B Latin fancy-common-digits Latin
  4310. 0: A\x{1d7ce}\x{1d7cf}B
  4311. \x{1d7ce}\x{1d7cf},, fancy-common-digits Common Common
  4312. 0: \x{1d7ce}\x{1d7cf},,
  4313. A\x{1d7ce}BC Latin fancy-common-digit Latin Latin
  4314. 0: A\x{1d7ce}BC
  4315. # Some Unicode 12.1.0 new script characters
  4316. /\p{Elymaic}\p{Nandinagari}\p{Nyiakeng_Puachue_Hmong}\p{Wancho}/utf
  4317. \x{10fe5}\x{119AC}\x{1E10E}\x{1E2D1}
  4318. 0: \x{10fe5}\x{119ac}\x{1e10e}\x{1e2d1}
  4319. # Some Unicode 13.0.0 new script characters
  4320. /\p{Chorasmian}\p{Dives_Akuru}\p{Khitan_Small_Script}\p{Yezidi}/utf
  4321. \x{10FB0}\x{11900}\x{18B00}\x{10E80}
  4322. 0: \x{10fb0}\x{11900}\x{18b00}\x{10e80}
  4323. # -------
  4324. # Test reference and errors in non-ASCII characters in group names
  4325. /(?'𑠅ABC'...)/I,utf
  4326. Capture group count = 1
  4327. Named capture groups:
  4328. 𑠅ABC 1
  4329. Options: utf
  4330. Subject length lower bound = 3
  4331. abcde\=copy=𑠅ABC
  4332. 0: abc
  4333. 1: abc
  4334. C abc (3) 𑠅ABC (group 1)
  4335. # Bad ones
  4336. /(?'AB၌C'...)\g{AB၌C}/utf
  4337. Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator?)
  4338. /(?'٠ABC'...)/utf
  4339. Failed: error 144 at offset 3: subpattern name must start with a non-digit
  4340. /(?'²ABC'...)/utf
  4341. Failed: error 162 at offset 3: subpattern name expected
  4342. /(?'X²ABC'...)/utf
  4343. Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
  4344. # -------
  4345. /\p{Any}*xyz/I
  4346. Capture group count = 0
  4347. Compile options: <none>
  4348. Overall options: anchored
  4349. Last code unit = 'z'
  4350. Subject length lower bound = 3
  4351. /(|�)7/caseless,ucp
  4352. /(\xc1)\1/i,ucp
  4353. \xc1\xe1\=no_jit
  4354. 0: \xc1\xe1
  4355. 1: \xc1
  4356. /\p{L&}+\p{bidi_control}/B
  4357. ------------------------------------------------------------------
  4358. Bra
  4359. prop Lc +
  4360. prop Bidicontrol
  4361. Ket
  4362. End
  4363. ------------------------------------------------------------------
  4364. /\p{bidi_control}+\p{L&}/B
  4365. ------------------------------------------------------------------
  4366. Bra
  4367. prop Bidicontrol +
  4368. prop Lc
  4369. Ket
  4370. End
  4371. ------------------------------------------------------------------
  4372. /\p{han}/B
  4373. ------------------------------------------------------------------
  4374. Bra
  4375. prop Han
  4376. Ket
  4377. End
  4378. ------------------------------------------------------------------
  4379. /\p{script:han}/B
  4380. ------------------------------------------------------------------
  4381. Bra
  4382. prop script:Han
  4383. Ket
  4384. End
  4385. ------------------------------------------------------------------
  4386. /\p{sc:han}/B
  4387. ------------------------------------------------------------------
  4388. Bra
  4389. prop script:Han
  4390. Ket
  4391. End
  4392. ------------------------------------------------------------------
  4393. /\p{script extensions:han}/B
  4394. ------------------------------------------------------------------
  4395. Bra
  4396. prop Han
  4397. Ket
  4398. End
  4399. ------------------------------------------------------------------
  4400. /\p{scx:han}/B
  4401. ------------------------------------------------------------------
  4402. Bra
  4403. prop Han
  4404. Ket
  4405. End
  4406. ------------------------------------------------------------------
  4407. # Test error - invalid script name
  4408. /\p{sc:L}/
  4409. Failed: error 147 at offset 8: unknown property after \P or \p
  4410. # Some Boolean property tests that differ from Perl
  4411. /\p{emojimodifierbase}\p{ebase}/g,utf
  4412. >AN<>\x{261d}\x{1faf6}<>yz<
  4413. 0: \x{261d}\x{1faf6}
  4414. /\p{graphemelink}\p{grlink}/g,utf
  4415. >AN<>\x{11d97}\x{94d}<>yz<
  4416. 0: \x{11d97}\x{94d}
  4417. /\p{soft dotted}\p{sd}/g,utf
  4418. >AF23<>\x{1df1a}\x{69}<>yz<
  4419. 0: \x{1df1a}i
  4420. # ------------------------------------------------
  4421. /\p{\2b[:x�igi:t:_/
  4422. Failed: error 146 at offset 17: malformed \P or \p sequence
  4423. # Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without
  4424. # the restriction.
  4425. /AskZ/i,utf,caseless_restrict
  4426. AskZ
  4427. 0: AskZ
  4428. aSKz
  4429. 0: aSKz
  4430. \= Expect no match
  4431. A\x{17f}kZ
  4432. No match
  4433. As\x{212a}Z
  4434. No match
  4435. /AskZ/i,utf
  4436. AskZ
  4437. 0: AskZ
  4438. aSKz
  4439. 0: aSKz
  4440. A\x{17f}kZ
  4441. 0: A\x{17f}kZ
  4442. As\x{212a}Z
  4443. 0: As\x{212a}Z
  4444. /A\x{17f}\x{212a}Z/ir,utf
  4445. \= Expect no match
  4446. AskZ
  4447. No match
  4448. /A\x{17f}\x{212a}Z/i,utf
  4449. AskZ
  4450. 0: AskZ
  4451. /[AskZ]+/i,utf,caseless_restrict
  4452. AskZ
  4453. 0: AskZ
  4454. aSKz
  4455. 0: aSKz
  4456. A\x{17f}kZ
  4457. 0: A
  4458. As\x{212a}Z
  4459. 0: As
  4460. /[AskZ]+/i,utf
  4461. AskZ
  4462. 0: AskZ
  4463. aSKz
  4464. 0: aSKz
  4465. A\x{17f}kZ
  4466. 0: A\x{17f}kZ
  4467. As\x{212a}Z
  4468. 0: As\x{212a}Z
  4469. /[\x{17f}\x{212a}]+/ir,utf
  4470. \= Expect no match
  4471. AskZ
  4472. No match
  4473. /[\x{17f}\x{212a}]+/i,utf
  4474. AskZ
  4475. 0: sk
  4476. /[^s]+/ir,utf
  4477. A\x{17f}Z
  4478. 0: A\x{17f}Z
  4479. /[^s]+/i,utf
  4480. A\x{17f}Z
  4481. 0: A
  4482. /[^k]+/ir,utf
  4483. A\x{212a}Z
  4484. 0: A\x{212a}Z
  4485. /[^k]+/i,utf
  4486. A\x{212a}Z
  4487. 0: A
  4488. /[^sk]+/ir,utf
  4489. A\x{17f}\x{212a}Z
  4490. 0: A\x{17f}\x{212a}Z
  4491. /[^sk]+/i,utf
  4492. A\x{17f}\x{212a}Z
  4493. 0: A
  4494. /[^\x{17f}]+/ir,utf
  4495. AsSZ
  4496. 0: AsSZ
  4497. /[^\x{17f}]+/i,utf
  4498. AsSZ
  4499. 0: A
  4500. /[Ss]+/irB,utf
  4501. ------------------------------------------------------------------
  4502. Bra
  4503. /i S++
  4504. Ket
  4505. End
  4506. ------------------------------------------------------------------
  4507. Sss\x{17f}ss
  4508. 0: Sss
  4509. /[Ss]+/iB,utf
  4510. ------------------------------------------------------------------
  4511. Bra
  4512. [Ss\x{17f}\x{17f}]++
  4513. Ket
  4514. End
  4515. ------------------------------------------------------------------
  4516. Sss\x{17f}ss
  4517. 0: Sss\x{17f}ss
  4518. /[S\x{17f}]/irB,utf
  4519. ------------------------------------------------------------------
  4520. Bra
  4521. [Ss\x{17f}]
  4522. Ket
  4523. End
  4524. ------------------------------------------------------------------
  4525. /[S\x{17f}]/iB,utf
  4526. ------------------------------------------------------------------
  4527. Bra
  4528. [Ss\x{17f}\x{17f}]
  4529. Ket
  4530. End
  4531. ------------------------------------------------------------------
  4532. /[\x{17f}s]/irB,utf
  4533. ------------------------------------------------------------------
  4534. Bra
  4535. [Ss\x{17f}]
  4536. Ket
  4537. End
  4538. ------------------------------------------------------------------
  4539. /[\x{17f}s]/iB,utf
  4540. ------------------------------------------------------------------
  4541. Bra
  4542. [Ss\x{17f}\x{17f}]
  4543. Ket
  4544. End
  4545. ------------------------------------------------------------------
  4546. /[\x{4b}\x{6b}]/irB,utf
  4547. ------------------------------------------------------------------
  4548. Bra
  4549. /i K
  4550. Ket
  4551. End
  4552. ------------------------------------------------------------------
  4553. /[\x{4b}\x{6b}]/iB,utf
  4554. ------------------------------------------------------------------
  4555. Bra
  4556. [Kk\x{212a}\x{212a}]
  4557. Ket
  4558. End
  4559. ------------------------------------------------------------------
  4560. /s(?r)s(?-r)s(?r:s)s/i,utf
  4561. \x{17f}S\x{17f}S\x{17f}
  4562. 0: \x{17f}S\x{17f}S\x{17f}
  4563. \= Expect no match
  4564. \x{17f}\x{17f}\x{17f}S\x{17f}
  4565. No match
  4566. \x{17f}S\x{17f}\x{17f}\x{17f}
  4567. No match
  4568. /k(?^i)k/ir,utf
  4569. K\x{212a}
  4570. 0: K\x{212a}
  4571. \= Expect no match
  4572. \x{212a}\x{212a}
  4573. No match
  4574. # End caseless restrict tests
  4575. # TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without.
  4576. # DIGITS
  4577. /\d+/i,utf
  4578. 123\x{660}456
  4579. 0: 123
  4580. /\d+/i,utf,ucp
  4581. 123\x{660}456
  4582. 0: 123\x{660}456
  4583. /\d+/i,utf,ucp,ascii_bsd
  4584. 123\x{660}456
  4585. 0: 123
  4586. /[\d]+/i,utf
  4587. 123\x{660}456
  4588. 0: 123
  4589. /[\d]+/i,utf,ucp
  4590. 123\x{660}456
  4591. 0: 123\x{660}456
  4592. /[\d]+/i,utf,ucp,ascii_bsd
  4593. 123\x{660}456
  4594. 0: 123
  4595. /\d(?aD)\d(?-aD)\d/utf,ucp
  4596. \x{660}9\x{660}
  4597. 0: \x{660}9\x{660}
  4598. \= Expect no match
  4599. \x{660}\x{660}\x{660}
  4600. No match
  4601. /\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd
  4602. 999
  4603. 0: 999
  4604. 9\x{660}9
  4605. 0: 9\x{660}9
  4606. /\d(?a)\d(?-a)\d/utf,ucp
  4607. \x{660}9\x{660}
  4608. 0: \x{660}9\x{660}
  4609. \= Expect no match
  4610. \x{660}\x{660}\x{660}
  4611. No match
  4612. /\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd
  4613. 999
  4614. 0: 999
  4615. 9\x{660}9
  4616. 0: 9\x{660}9
  4617. # SPACES
  4618. />\s+</i,utf
  4619. > <
  4620. 0: > <
  4621. \= Expect no match
  4622. >\x{a0} <
  4623. No match
  4624. />\s+</i,utf,ucp
  4625. > <
  4626. 0: > <
  4627. >\x{a0} <
  4628. 0: >\x{a0} <
  4629. />\s+</i,utf,ucp,ascii_bss
  4630. > <
  4631. 0: > <
  4632. \= Expect no match
  4633. >\x{a0} <
  4634. No match
  4635. />[\s]+</i,utf
  4636. > <
  4637. 0: > <
  4638. \= Expect no match
  4639. >\x{a0} <
  4640. No match
  4641. />[\s]+</i,utf,ucp
  4642. > <
  4643. 0: > <
  4644. >\x{a0} <
  4645. 0: >\x{a0} <
  4646. />[\s]+</i,utf,ucp,ascii_bss
  4647. > <
  4648. 0: > <
  4649. \= Expect no match
  4650. >\x{a0} <
  4651. No match
  4652. />\s(?aS)\s(?-aS)\s</utf,ucp
  4653. >\x{a0} \x{a0}<
  4654. 0: >\x{a0} \x{a0}<
  4655. \= Expect no match
  4656. >\x{a0}\x{a0}\x{a0}<
  4657. No match
  4658. />\s(?a)\s(?-a)\s</utf,ucp
  4659. >\x{a0} \x{a0}<
  4660. 0: >\x{a0} \x{a0}<
  4661. \= Expect no match
  4662. >\x{a0}\x{a0}\x{a0}<
  4663. No match
  4664. # WORDS
  4665. /\w+/i,utf
  4666. 123\x{660}abc
  4667. 0: 123
  4668. /\w+/i,utf,ucp
  4669. 123\x{660}abc
  4670. 0: 123\x{660}abc
  4671. /\w+/i,utf,ucp,ascii_bsw
  4672. 123\x{660}abc
  4673. 0: 123
  4674. /[\w]+/i,utf
  4675. 123\x{660}abc
  4676. 0: 123
  4677. /[\w]+/i,utf,ucp
  4678. 123\x{660}abc
  4679. 0: 123\x{660}abc
  4680. /[\w]+/i,utf,ucp,ascii_bsw
  4681. 123\x{660}abc
  4682. 0: 123
  4683. /\w(?aW)\w(?-aW)\w/utf,ucp
  4684. \x{660}A\x{c0}
  4685. 0: \x{660}A\x{c0}
  4686. \= Expect no match
  4687. \x{660}\x{c0}\x{c0}
  4688. No match
  4689. /\w(?a)\w(?-a)\w/utf,ucp
  4690. \x{660}A\x{c0}
  4691. 0: \x{660}A\x{c0}
  4692. \= Expect no match
  4693. \x{660}\x{c0}\x{c0}
  4694. No match
  4695. # WORD BOUNDARY
  4696. /\bABC\b/utf
  4697. \x{c0}ABC\x{d0}
  4698. 0: ABC
  4699. /\bABC\b/utf,ucp
  4700. \= Expect no match
  4701. \x{c0}ABC\x{d0}
  4702. No match
  4703. /\bABC\b/utf,ucp,ascii_bsw
  4704. \x{c0}ABC\x{d0}
  4705. 0: ABC
  4706. /\bABC\b/utf,ucp,ascii_all
  4707. \x{c0}ABC\x{d0}
  4708. 0: ABC
  4709. # POSIX
  4710. /^[[:digit:]]+$/utf,ucp
  4711. 123456
  4712. 0: 123456
  4713. 123\x{660}456
  4714. 0: 123\x{660}456
  4715. /^[[:digit:]]+$/utf,ucp,ascii_digit
  4716. 123456
  4717. 0: 123456
  4718. \= Expect no match
  4719. 123\x{660}456
  4720. No match
  4721. /[[:digit:]]+/g,utf,ucp,ascii_digit
  4722. 123\x{660}456
  4723. 0: 123
  4724. 0: 456
  4725. /(?-aT)[[:digit:]](?aT)[[:digit:]]/utf,ucp,ascii_digit
  4726. 11
  4727. 0: 11
  4728. \x{ff11}1
  4729. 0: \x{ff11}1
  4730. \= Expect no match
  4731. 1\x{ff11}
  4732. No match
  4733. /(?-aT:[[:digit:]])[[:digit:]]/utf,ucp,ascii_digit
  4734. 11
  4735. 0: 11
  4736. \x{ff11}1
  4737. 0: \x{ff11}1
  4738. \= Expect no match
  4739. 1\x{ff11}
  4740. No match
  4741. /(?-aT:[[:digit:]])[[:digit:]]/utf,never_ucp,ascii_digit
  4742. 11
  4743. 0: 11
  4744. \= Expect no match
  4745. \x{ff11}1
  4746. No match
  4747. 1\x{ff11}
  4748. No match
  4749. /[[:digit:]]+/utf,ucp,ascii_posix
  4750. 123\x{660}456
  4751. 0: 123
  4752. /(?-aP)[[:digit:]](?aP)[[:digit:]]/utf,ucp,ascii_posix
  4753. 11
  4754. 0: 11
  4755. \x{ff11}1
  4756. 0: \x{ff11}1
  4757. \= Expect no match
  4758. 1\x{ff11}
  4759. No match
  4760. /(?-aP:[[:digit:]])[[:digit:]]/utf,ucp,ascii_posix
  4761. 11
  4762. 0: 11
  4763. \x{ff11}1
  4764. 0: \x{ff11}1
  4765. \= Expect no match
  4766. 1\x{ff11}
  4767. No match
  4768. /(?-a:[[:digit:]])[[:digit:]]/a,utf,ucp
  4769. 11
  4770. 0: 11
  4771. \x{ff11}1
  4772. 0: \x{ff11}1
  4773. \= Expect no match
  4774. 1\x{ff11}
  4775. No match
  4776. /^[[:xdigit:]]+$/utf,ucp
  4777. f0
  4778. 0: f0
  4779. 1A
  4780. 0: 1A
  4781. d\x{ff10}
  4782. 0: d\x{ff10}
  4783. \x{ff26}8
  4784. 0: \x{ff26}8
  4785. \= Expect no match
  4786. 8g\=no_jit
  4787. No match
  4788. /^[[:xdigit:]]+$/utf,ucp,ascii_digit
  4789. f0
  4790. 0: f0
  4791. 1A
  4792. 0: 1A
  4793. \= Expect no match
  4794. d\x{ff10}
  4795. No match
  4796. \x{ff26}8
  4797. No match
  4798. 8g
  4799. No match
  4800. />[[:space:]]+</utf,ucp
  4801. >\x{a0} \x{a0}<
  4802. 0: >\x{a0} \x{a0}<
  4803. >\x{a0}\x{a0}\x{a0}<
  4804. 0: >\x{a0}\x{a0}\x{a0}<
  4805. />[[:space:]]+</utf,ucp,ascii_posix
  4806. \= Expect no match
  4807. >\x{a0} \x{a0}<
  4808. No match
  4809. /(?aP)[[:alnum:]]+/i,ucp,utf
  4810. abcáxyz
  4811. 0: abc
  4812. abc\x{660}xyz
  4813. 0: abc
  4814. /(?aP)[[:alnum:]\d]+/i,ucp,utf
  4815. abc\x{660}xyz
  4816. 0: abc\x{660}xyz
  4817. /(*UCP)(*UTF)[[:alnum:]](?aP:[[:alnum:]])[[:alnum:]]/
  4818. \x{660}A\x{660}
  4819. 0: \x{660}A\x{660}
  4820. \= Expect no match
  4821. \x{660}\x{660}\x{660}
  4822. No match
  4823. # VARIOUS
  4824. /[\d\s\w]+/a,ucp,utf
  4825. 9 A\x{660}À
  4826. 0: 9 A
  4827. 9 AÀ\x{660}
  4828. 0: 9 A
  4829. # End PCRE2_EXTRA_ASCII_xxx tests
  4830. /(?<!(|l ))/utf
  4831. (?<!(|l ))
  4832. No match
  4833. # End of testinput5