github的一些开源项目
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1858 lines
48 KiB

  1. # This set of tests is for UTF-16 and UTF-32 support, including Unicode
  2. # properties. It is relevant only to the 16-bit and 32-bit libraries. The
  3. # output is different for each library, so there are separate output files.
  4. /���xxx/IB,utf,no_utf_check
  5. ** Failed: invalid UTF-8 string cannot be converted to 32-bit string
  6. /abc/utf
  7. �]
  8. ** Failed: invalid UTF-8 string cannot be used as input in UTF mode
  9. # Check maximum character size
  10. /\x{ffff}/IB,utf
  11. ------------------------------------------------------------------
  12. Bra
  13. \x{ffff}
  14. Ket
  15. End
  16. ------------------------------------------------------------------
  17. Capture group count = 0
  18. Options: utf
  19. First code unit = \x{ffff}
  20. Subject length lower bound = 1
  21. /\x{10000}/IB,utf
  22. ------------------------------------------------------------------
  23. Bra
  24. \x{10000}
  25. Ket
  26. End
  27. ------------------------------------------------------------------
  28. Capture group count = 0
  29. Options: utf
  30. First code unit = \x{10000}
  31. Subject length lower bound = 1
  32. /\x{100}/IB,utf
  33. ------------------------------------------------------------------
  34. Bra
  35. \x{100}
  36. Ket
  37. End
  38. ------------------------------------------------------------------
  39. Capture group count = 0
  40. Options: utf
  41. First code unit = \x{100}
  42. Subject length lower bound = 1
  43. /\x{1000}/IB,utf
  44. ------------------------------------------------------------------
  45. Bra
  46. \x{1000}
  47. Ket
  48. End
  49. ------------------------------------------------------------------
  50. Capture group count = 0
  51. Options: utf
  52. First code unit = \x{1000}
  53. Subject length lower bound = 1
  54. /\x{10000}/IB,utf
  55. ------------------------------------------------------------------
  56. Bra
  57. \x{10000}
  58. Ket
  59. End
  60. ------------------------------------------------------------------
  61. Capture group count = 0
  62. Options: utf
  63. First code unit = \x{10000}
  64. Subject length lower bound = 1
  65. /\x{100000}/IB,utf
  66. ------------------------------------------------------------------
  67. Bra
  68. \x{100000}
  69. Ket
  70. End
  71. ------------------------------------------------------------------
  72. Capture group count = 0
  73. Options: utf
  74. First code unit = \x{100000}
  75. Subject length lower bound = 1
  76. /\x{10ffff}/IB,utf
  77. ------------------------------------------------------------------
  78. Bra
  79. \x{10ffff}
  80. Ket
  81. End
  82. ------------------------------------------------------------------
  83. Capture group count = 0
  84. Options: utf
  85. First code unit = \x{10ffff}
  86. Subject length lower bound = 1
  87. /[\x{ff}]/IB,utf
  88. ------------------------------------------------------------------
  89. Bra
  90. \x{ff}
  91. Ket
  92. End
  93. ------------------------------------------------------------------
  94. Capture group count = 0
  95. Options: utf
  96. First code unit = \xff
  97. Subject length lower bound = 1
  98. /[\x{100}]/IB,utf
  99. ------------------------------------------------------------------
  100. Bra
  101. \x{100}
  102. Ket
  103. End
  104. ------------------------------------------------------------------
  105. Capture group count = 0
  106. Options: utf
  107. First code unit = \x{100}
  108. Subject length lower bound = 1
  109. /\x80/IB,utf
  110. ------------------------------------------------------------------
  111. Bra
  112. \x{80}
  113. Ket
  114. End
  115. ------------------------------------------------------------------
  116. Capture group count = 0
  117. Options: utf
  118. First code unit = \x80
  119. Subject length lower bound = 1
  120. /\xff/IB,utf
  121. ------------------------------------------------------------------
  122. Bra
  123. \x{ff}
  124. Ket
  125. End
  126. ------------------------------------------------------------------
  127. Capture group count = 0
  128. Options: utf
  129. First code unit = \xff
  130. Subject length lower bound = 1
  131. /\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
  132. ------------------------------------------------------------------
  133. Bra
  134. \x{d55c}\x{ad6d}\x{c5b4}
  135. Ket
  136. End
  137. ------------------------------------------------------------------
  138. Capture group count = 0
  139. Options: utf
  140. First code unit = \x{d55c}
  141. Last code unit = \x{c5b4}
  142. Subject length lower bound = 3
  143. \x{D55c}\x{ad6d}\x{C5B4}
  144. 0: \x{d55c}\x{ad6d}\x{c5b4}
  145. /\x{65e5}\x{672c}\x{8a9e}/IB,utf
  146. ------------------------------------------------------------------
  147. Bra
  148. \x{65e5}\x{672c}\x{8a9e}
  149. Ket
  150. End
  151. ------------------------------------------------------------------
  152. Capture group count = 0
  153. Options: utf
  154. First code unit = \x{65e5}
  155. Last code unit = \x{8a9e}
  156. Subject length lower bound = 3
  157. \x{65e5}\x{672c}\x{8a9e}
  158. 0: \x{65e5}\x{672c}\x{8a9e}
  159. /\x{80}/IB,utf
  160. ------------------------------------------------------------------
  161. Bra
  162. \x{80}
  163. Ket
  164. End
  165. ------------------------------------------------------------------
  166. Capture group count = 0
  167. Options: utf
  168. First code unit = \x80
  169. Subject length lower bound = 1
  170. /\x{084}/IB,utf
  171. ------------------------------------------------------------------
  172. Bra
  173. \x{84}
  174. Ket
  175. End
  176. ------------------------------------------------------------------
  177. Capture group count = 0
  178. Options: utf
  179. First code unit = \x84
  180. Subject length lower bound = 1
  181. /\x{104}/IB,utf
  182. ------------------------------------------------------------------
  183. Bra
  184. \x{104}
  185. Ket
  186. End
  187. ------------------------------------------------------------------
  188. Capture group count = 0
  189. Options: utf
  190. First code unit = \x{104}
  191. Subject length lower bound = 1
  192. /\x{861}/IB,utf
  193. ------------------------------------------------------------------
  194. Bra
  195. \x{861}
  196. Ket
  197. End
  198. ------------------------------------------------------------------
  199. Capture group count = 0
  200. Options: utf
  201. First code unit = \x{861}
  202. Subject length lower bound = 1
  203. /\x{212ab}/IB,utf
  204. ------------------------------------------------------------------
  205. Bra
  206. \x{212ab}
  207. Ket
  208. End
  209. ------------------------------------------------------------------
  210. Capture group count = 0
  211. Options: utf
  212. First code unit = \x{212ab}
  213. Subject length lower bound = 1
  214. /[^ab\xC0-\xF0]/IB,utf
  215. ------------------------------------------------------------------
  216. Bra
  217. [\x00-`c-\xbf\xf1-\xff] (neg)
  218. Ket
  219. End
  220. ------------------------------------------------------------------
  221. Capture group count = 0
  222. Options: utf
  223. Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
  224. \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
  225. \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
  226. 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
  227. Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
  228. \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e
  229. \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d
  230. \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac
  231. \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb
  232. \xbc \xbd \xbe \xbf \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb
  233. \xfc \xfd \xfe \xff
  234. Subject length lower bound = 1
  235. \x{f1}
  236. 0: \x{f1}
  237. \x{bf}
  238. 0: \x{bf}
  239. \x{100}
  240. 0: \x{100}
  241. \x{1000}
  242. 0: \x{1000}
  243. \= Expect no match
  244. \x{c0}
  245. No match
  246. \x{f0}
  247. No match
  248. /Ā{3,4}/IB,utf
  249. ------------------------------------------------------------------
  250. Bra
  251. \x{100}{3}
  252. \x{100}?+
  253. Ket
  254. End
  255. ------------------------------------------------------------------
  256. Capture group count = 0
  257. Options: utf
  258. First code unit = \x{100}
  259. Last code unit = \x{100}
  260. Subject length lower bound = 3
  261. \x{100}\x{100}\x{100}\x{100\x{100}
  262. 0: \x{100}\x{100}\x{100}
  263. /(\x{100}+|x)/IB,utf
  264. ------------------------------------------------------------------
  265. Bra
  266. CBra 1
  267. \x{100}++
  268. Alt
  269. x
  270. Ket
  271. Ket
  272. End
  273. ------------------------------------------------------------------
  274. Capture group count = 1
  275. Options: utf
  276. Starting code units: x \xff
  277. Subject length lower bound = 1
  278. /(\x{100}*a|x)/IB,utf
  279. ------------------------------------------------------------------
  280. Bra
  281. CBra 1
  282. \x{100}*+
  283. a
  284. Alt
  285. x
  286. Ket
  287. Ket
  288. End
  289. ------------------------------------------------------------------
  290. Capture group count = 1
  291. Options: utf
  292. Starting code units: a x \xff
  293. Subject length lower bound = 1
  294. /(\x{100}{0,2}a|x)/IB,utf
  295. ------------------------------------------------------------------
  296. Bra
  297. CBra 1
  298. \x{100}{0,2}+
  299. a
  300. Alt
  301. x
  302. Ket
  303. Ket
  304. End
  305. ------------------------------------------------------------------
  306. Capture group count = 1
  307. Options: utf
  308. Starting code units: a x \xff
  309. Subject length lower bound = 1
  310. /(\x{100}{1,2}a|x)/IB,utf
  311. ------------------------------------------------------------------
  312. Bra
  313. CBra 1
  314. \x{100}
  315. \x{100}{0,1}+
  316. a
  317. Alt
  318. x
  319. Ket
  320. Ket
  321. End
  322. ------------------------------------------------------------------
  323. Capture group count = 1
  324. Options: utf
  325. Starting code units: x \xff
  326. Subject length lower bound = 1
  327. /\x{100}/IB,utf
  328. ------------------------------------------------------------------
  329. Bra
  330. \x{100}
  331. Ket
  332. End
  333. ------------------------------------------------------------------
  334. Capture group count = 0
  335. Options: utf
  336. First code unit = \x{100}
  337. Subject length lower bound = 1
  338. /a\x{100}\x{101}*/IB,utf
  339. ------------------------------------------------------------------
  340. Bra
  341. a\x{100}
  342. \x{101}*+
  343. Ket
  344. End
  345. ------------------------------------------------------------------
  346. Capture group count = 0
  347. Options: utf
  348. First code unit = 'a'
  349. Last code unit = \x{100}
  350. Subject length lower bound = 2
  351. /a\x{100}\x{101}+/IB,utf
  352. ------------------------------------------------------------------
  353. Bra
  354. a\x{100}
  355. \x{101}++
  356. Ket
  357. End
  358. ------------------------------------------------------------------
  359. Capture group count = 0
  360. Options: utf
  361. First code unit = 'a'
  362. Last code unit = \x{101}
  363. Subject length lower bound = 3
  364. /[^\x{c4}]/IB
  365. ------------------------------------------------------------------
  366. Bra
  367. [^\x{c4}]
  368. Ket
  369. End
  370. ------------------------------------------------------------------
  371. Capture group count = 0
  372. Subject length lower bound = 1
  373. /[\x{100}]/IB,utf
  374. ------------------------------------------------------------------
  375. Bra
  376. \x{100}
  377. Ket
  378. End
  379. ------------------------------------------------------------------
  380. Capture group count = 0
  381. Options: utf
  382. First code unit = \x{100}
  383. Subject length lower bound = 1
  384. \x{100}
  385. 0: \x{100}
  386. Z\x{100}
  387. 0: \x{100}
  388. \x{100}Z
  389. 0: \x{100}
  390. /[\xff]/IB,utf
  391. ------------------------------------------------------------------
  392. Bra
  393. \x{ff}
  394. Ket
  395. End
  396. ------------------------------------------------------------------
  397. Capture group count = 0
  398. Options: utf
  399. First code unit = \xff
  400. Subject length lower bound = 1
  401. >\x{ff}<
  402. 0: \x{ff}
  403. /[^\xff]/IB,utf
  404. ------------------------------------------------------------------
  405. Bra
  406. [^\x{ff}]
  407. Ket
  408. End
  409. ------------------------------------------------------------------
  410. Capture group count = 0
  411. Options: utf
  412. Subject length lower bound = 1
  413. /\x{100}abc(xyz(?1))/IB,utf
  414. ------------------------------------------------------------------
  415. Bra
  416. \x{100}abc
  417. CBra 1
  418. xyz
  419. Recurse
  420. Ket
  421. Ket
  422. End
  423. ------------------------------------------------------------------
  424. Capture group count = 1
  425. Options: utf
  426. First code unit = \x{100}
  427. Last code unit = 'z'
  428. Subject length lower bound = 7
  429. /\777/I,utf
  430. Capture group count = 0
  431. Options: utf
  432. First code unit = \x{1ff}
  433. Subject length lower bound = 1
  434. \x{1ff}
  435. 0: \x{1ff}
  436. \777
  437. 0: \x{1ff}
  438. /\x{100}+\x{200}/IB,utf
  439. ------------------------------------------------------------------
  440. Bra
  441. \x{100}++
  442. \x{200}
  443. Ket
  444. End
  445. ------------------------------------------------------------------
  446. Capture group count = 0
  447. Options: utf
  448. First code unit = \x{100}
  449. Last code unit = \x{200}
  450. Subject length lower bound = 2
  451. /\x{100}+X/IB,utf
  452. ------------------------------------------------------------------
  453. Bra
  454. \x{100}++
  455. X
  456. Ket
  457. End
  458. ------------------------------------------------------------------
  459. Capture group count = 0
  460. Options: utf
  461. First code unit = \x{100}
  462. Last code unit = 'X'
  463. Subject length lower bound = 2
  464. /^[\QĀ\E-\QŐ\E/B,utf
  465. Failed: error 106 at offset 13: missing terminating ] for character class
  466. /X/utf
  467. XX\x{d800}\=no_utf_check
  468. 0: X
  469. XX\x{da00}\=no_utf_check
  470. 0: X
  471. XX\x{dc00}\=no_utf_check
  472. 0: X
  473. XX\x{de00}\=no_utf_check
  474. 0: X
  475. XX\x{dfff}\=no_utf_check
  476. 0: X
  477. \= Expect UTF error
  478. XX\x{d800}
  479. Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
  480. XX\x{da00}
  481. Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
  482. XX\x{dc00}
  483. Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
  484. XX\x{de00}
  485. Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
  486. XX\x{dfff}
  487. Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
  488. XX\x{110000}
  489. Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 2
  490. XX\x{d800}\x{1234}
  491. Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
  492. \= Expect no match
  493. XX\x{d800}\=offset=3
  494. No match
  495. /(?<=.)X/utf
  496. XX\x{d800}\=offset=3
  497. Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
  498. /(*UTF16)\x{11234}/
  499. Failed: error 160 at offset 7: (*VERB) not recognized or malformed
  500. abcd\x{11234}pqr
  501. /(*UTF)\x{11234}/I
  502. Capture group count = 0
  503. Compile options: <none>
  504. Overall options: utf
  505. First code unit = \x{11234}
  506. Subject length lower bound = 1
  507. abcd\x{11234}pqr
  508. 0: \x{11234}
  509. /(*UTF-32)\x{11234}/
  510. Failed: error 160 at offset 5: (*VERB) not recognized or malformed
  511. abcd\x{11234}pqr
  512. /(*UTF-32)\x{112}/
  513. Failed: error 160 at offset 5: (*VERB) not recognized or malformed
  514. abcd\x{11234}pqr
  515. /(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
  516. Failed: error 160 at offset 14: (*VERB) not recognized or malformed
  517. /(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
  518. Capture group count = 0
  519. Compile options: <none>
  520. Overall options: utf
  521. \R matches any Unicode newline
  522. Forced newline is CRLF
  523. First code unit = 'a'
  524. Last code unit = 'b'
  525. Subject length lower bound = 3
  526. /\h/I,utf
  527. Capture group count = 0
  528. Options: utf
  529. Starting code units: \x09 \x20 \xa0 \xff
  530. Subject length lower bound = 1
  531. ABC\x{09}
  532. 0: \x{09}
  533. ABC\x{20}
  534. 0:
  535. ABC\x{a0}
  536. 0: \x{a0}
  537. ABC\x{1680}
  538. 0: \x{1680}
  539. ABC\x{180e}
  540. 0: \x{180e}
  541. ABC\x{2000}
  542. 0: \x{2000}
  543. ABC\x{202f}
  544. 0: \x{202f}
  545. ABC\x{205f}
  546. 0: \x{205f}
  547. ABC\x{3000}
  548. 0: \x{3000}
  549. /\v/I,utf
  550. Capture group count = 0
  551. Options: utf
  552. Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
  553. Subject length lower bound = 1
  554. ABC\x{0a}
  555. 0: \x{0a}
  556. ABC\x{0b}
  557. 0: \x{0b}
  558. ABC\x{0c}
  559. 0: \x{0c}
  560. ABC\x{0d}
  561. 0: \x{0d}
  562. ABC\x{85}
  563. 0: \x{85}
  564. ABC\x{2028}
  565. 0: \x{2028}
  566. /\h*A/I,utf
  567. Capture group count = 0
  568. Options: utf
  569. Starting code units: \x09 \x20 A \xa0 \xff
  570. Last code unit = 'A'
  571. Subject length lower bound = 1
  572. CDBABC
  573. 0: A
  574. \x{2000}ABC
  575. 0: \x{2000}A
  576. /\R*A/I,bsr=unicode,utf
  577. Capture group count = 0
  578. Options: utf
  579. \R matches any Unicode newline
  580. Starting code units: \x0a \x0b \x0c \x0d A \x85 \xff
  581. Last code unit = 'A'
  582. Subject length lower bound = 1
  583. CDBABC
  584. 0: A
  585. \x{2028}A
  586. 0: \x{2028}A
  587. /\v+A/I,utf
  588. Capture group count = 0
  589. Options: utf
  590. Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
  591. Last code unit = 'A'
  592. Subject length lower bound = 2
  593. /\s?xxx\s/I,utf
  594. Capture group count = 0
  595. Options: utf
  596. Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x
  597. Last code unit = 'x'
  598. Subject length lower bound = 4
  599. /\sxxx\s/I,utf,tables=2
  600. Capture group count = 0
  601. Options: utf
  602. Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0
  603. Last code unit = 'x'
  604. Subject length lower bound = 5
  605. AB\x{85}xxx\x{a0}XYZ
  606. 0: \x{85}xxx\x{a0}
  607. AB\x{a0}xxx\x{85}XYZ
  608. 0: \x{a0}xxx\x{85}
  609. /\S \S/I,utf,tables=2
  610. Capture group count = 0
  611. Options: utf
  612. Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
  613. \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
  614. \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
  615. D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
  616. i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84
  617. \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94
  618. \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4
  619. \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
  620. \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
  621. \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
  622. \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
  623. \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
  624. \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
  625. \xff
  626. Last code unit = ' '
  627. Subject length lower bound = 3
  628. \x{a2} \x{84}
  629. 0: \x{a2} \x{84}
  630. A Z
  631. 0: A Z
  632. /a+/utf
  633. a\x{123}aa\=offset=1
  634. 0: aa
  635. a\x{123}aa\=offset=2
  636. 0: aa
  637. a\x{123}aa\=offset=3
  638. 0: a
  639. \= Expect no match
  640. a\x{123}aa\=offset=4
  641. No match
  642. \= Expect bad offset error
  643. a\x{123}aa\=offset=5
  644. Failed: error -33: bad offset value
  645. a\x{123}aa\=offset=6
  646. Failed: error -33: bad offset value
  647. /\x{1234}+/Ii,utf
  648. Capture group count = 0
  649. Options: caseless utf
  650. First code unit = \x{1234}
  651. Subject length lower bound = 1
  652. /\x{1234}+?/Ii,utf
  653. Capture group count = 0
  654. Options: caseless utf
  655. First code unit = \x{1234}
  656. Subject length lower bound = 1
  657. /\x{1234}++/Ii,utf
  658. Capture group count = 0
  659. Options: caseless utf
  660. First code unit = \x{1234}
  661. Subject length lower bound = 1
  662. /\x{1234}{2}/Ii,utf
  663. Capture group count = 0
  664. Options: caseless utf
  665. First code unit = \x{1234}
  666. Last code unit = \x{1234}
  667. Subject length lower bound = 2
  668. /[^\x{c4}]/IB,utf
  669. ------------------------------------------------------------------
  670. Bra
  671. [^\x{c4}]
  672. Ket
  673. End
  674. ------------------------------------------------------------------
  675. Capture group count = 0
  676. Options: utf
  677. Subject length lower bound = 1
  678. /X+\x{200}/IB,utf
  679. ------------------------------------------------------------------
  680. Bra
  681. X++
  682. \x{200}
  683. Ket
  684. End
  685. ------------------------------------------------------------------
  686. Capture group count = 0
  687. Options: utf
  688. First code unit = 'X'
  689. Last code unit = \x{200}
  690. Subject length lower bound = 2
  691. /\R/I,utf
  692. Capture group count = 0
  693. Options: utf
  694. Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
  695. Subject length lower bound = 1
  696. # Check bad offset
  697. /a/utf
  698. \= Expect bad UTF-16 offset, or no match in 32-bit
  699. \x{10000}\=offset=1
  700. No match
  701. \x{10000}ab\=offset=1
  702. 0: a
  703. \= Expect 16-bit match, 32-bit no match
  704. \x{10000}ab\=offset=2
  705. No match
  706. \= Expect no match
  707. \x{10000}ab\=offset=3
  708. No match
  709. \= Expect no match in 16-bit, bad offset in 32-bit
  710. \x{10000}ab\=offset=4
  711. Failed: error -33: bad offset value
  712. \= Expect bad offset
  713. \x{10000}ab\=offset=5
  714. Failed: error -33: bad offset value
  715. /���/utf
  716. Failed: error -27 at offset 0: UTF-32 error: code points 0xd800-0xdfff are not defined
  717. /\w+\x{C4}/B,utf
  718. ------------------------------------------------------------------
  719. Bra
  720. \w++
  721. \x{c4}
  722. Ket
  723. End
  724. ------------------------------------------------------------------
  725. a\x{C4}\x{C4}
  726. 0: a\x{c4}
  727. /\w+\x{C4}/B,utf,tables=2
  728. ------------------------------------------------------------------
  729. Bra
  730. \w+
  731. \x{c4}
  732. Ket
  733. End
  734. ------------------------------------------------------------------
  735. a\x{C4}\x{C4}
  736. 0: a\x{c4}\x{c4}
  737. /\W+\x{C4}/B,utf
  738. ------------------------------------------------------------------
  739. Bra
  740. \W+
  741. \x{c4}
  742. Ket
  743. End
  744. ------------------------------------------------------------------
  745. !\x{C4}
  746. 0: !\x{c4}
  747. /\W+\x{C4}/B,utf,tables=2
  748. ------------------------------------------------------------------
  749. Bra
  750. \W++
  751. \x{c4}
  752. Ket
  753. End
  754. ------------------------------------------------------------------
  755. !\x{C4}
  756. 0: !\x{c4}
  757. /\W+\x{A1}/B,utf
  758. ------------------------------------------------------------------
  759. Bra
  760. \W+
  761. \x{a1}
  762. Ket
  763. End
  764. ------------------------------------------------------------------
  765. !\x{A1}
  766. 0: !\x{a1}
  767. /\W+\x{A1}/B,utf,tables=2
  768. ------------------------------------------------------------------
  769. Bra
  770. \W+
  771. \x{a1}
  772. Ket
  773. End
  774. ------------------------------------------------------------------
  775. !\x{A1}
  776. 0: !\x{a1}
  777. /X\s+\x{A0}/B,utf
  778. ------------------------------------------------------------------
  779. Bra
  780. X
  781. \s++
  782. \x{a0}
  783. Ket
  784. End
  785. ------------------------------------------------------------------
  786. X\x20\x{A0}\x{A0}
  787. 0: X \x{a0}
  788. /X\s+\x{A0}/B,utf,tables=2
  789. ------------------------------------------------------------------
  790. Bra
  791. X
  792. \s+
  793. \x{a0}
  794. Ket
  795. End
  796. ------------------------------------------------------------------
  797. X\x20\x{A0}\x{A0}
  798. 0: X \x{a0}\x{a0}
  799. /\S+\x{A0}/B,utf
  800. ------------------------------------------------------------------
  801. Bra
  802. \S+
  803. \x{a0}
  804. Ket
  805. End
  806. ------------------------------------------------------------------
  807. X\x{A0}\x{A0}
  808. 0: X\x{a0}\x{a0}
  809. /\S+\x{A0}/B,utf,tables=2
  810. ------------------------------------------------------------------
  811. Bra
  812. \S++
  813. \x{a0}
  814. Ket
  815. End
  816. ------------------------------------------------------------------
  817. X\x{A0}\x{A0}
  818. 0: X\x{a0}
  819. /\x{a0}+\s!/B,utf
  820. ------------------------------------------------------------------
  821. Bra
  822. \x{a0}++
  823. \s
  824. !
  825. Ket
  826. End
  827. ------------------------------------------------------------------
  828. \x{a0}\x20!
  829. 0: \x{a0} !
  830. /\x{a0}+\s!/B,utf,tables=2
  831. ------------------------------------------------------------------
  832. Bra
  833. \x{a0}+
  834. \s
  835. !
  836. Ket
  837. End
  838. ------------------------------------------------------------------
  839. \x{a0}\x20!
  840. 0: \x{a0} !
  841. /(*UTF)abc/never_utf
  842. Failed: error 174 at offset 6: using UTF is disabled by the application
  843. /abc/utf,never_utf
  844. Failed: error 174 at offset 0: using UTF is disabled by the application
  845. /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
  846. ------------------------------------------------------------------
  847. Bra
  848. /i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
  849. Ket
  850. End
  851. ------------------------------------------------------------------
  852. Capture group count = 0
  853. Options: caseless utf
  854. First code unit = 'A' (caseless)
  855. Last code unit = \x{1fb0} (caseless)
  856. Subject length lower bound = 5
  857. /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
  858. ------------------------------------------------------------------
  859. Bra
  860. A\x{391}\x{10427}\x{ff3a}\x{1fb0}
  861. Ket
  862. End
  863. ------------------------------------------------------------------
  864. Capture group count = 0
  865. Options: utf
  866. First code unit = 'A'
  867. Last code unit = \x{1fb0}
  868. Subject length lower bound = 5
  869. /AB\x{1fb0}/IB,utf
  870. ------------------------------------------------------------------
  871. Bra
  872. AB\x{1fb0}
  873. Ket
  874. End
  875. ------------------------------------------------------------------
  876. Capture group count = 0
  877. Options: utf
  878. First code unit = 'A'
  879. Last code unit = \x{1fb0}
  880. Subject length lower bound = 3
  881. /AB\x{1fb0}/IBi,utf
  882. ------------------------------------------------------------------
  883. Bra
  884. /i AB\x{1fb0}
  885. Ket
  886. End
  887. ------------------------------------------------------------------
  888. Capture group count = 0
  889. Options: caseless utf
  890. First code unit = 'A' (caseless)
  891. Last code unit = \x{1fb0} (caseless)
  892. Subject length lower bound = 3
  893. /\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
  894. Capture group count = 0
  895. Options: caseless utf
  896. First code unit = \x{401} (caseless)
  897. Last code unit = \x{42f} (caseless)
  898. Subject length lower bound = 17
  899. \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
  900. 0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
  901. \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
  902. 0: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
  903. /[ⱥ]/Bi,utf
  904. ------------------------------------------------------------------
  905. Bra
  906. /i \x{2c65}
  907. Ket
  908. End
  909. ------------------------------------------------------------------
  910. /[^ⱥ]/Bi,utf
  911. ------------------------------------------------------------------
  912. Bra
  913. /i [^\x{2c65}]
  914. Ket
  915. End
  916. ------------------------------------------------------------------
  917. /[[:blank:]]/B,ucp
  918. ------------------------------------------------------------------
  919. Bra
  920. [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
  921. Ket
  922. End
  923. ------------------------------------------------------------------
  924. /\x{212a}+/Ii,utf
  925. Capture group count = 0
  926. Options: caseless utf
  927. Starting code units: K k \xff
  928. Subject length lower bound = 1
  929. KKkk\x{212a}
  930. 0: KKkk\x{212a}
  931. /s+/Ii,utf
  932. Capture group count = 0
  933. Options: caseless utf
  934. Starting code units: S s \xff
  935. Subject length lower bound = 1
  936. SSss\x{17f}
  937. 0: SSss\x{17f}
  938. # Non-UTF characters should give errors in both 16-bit and 32-bit modes.
  939. /\x{110000}/utf
  940. Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
  941. /\o{4200000}/utf
  942. Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large
  943. /\x{100}*A/IB,utf
  944. ------------------------------------------------------------------
  945. Bra
  946. \x{100}*+
  947. A
  948. Ket
  949. End
  950. ------------------------------------------------------------------
  951. Capture group count = 0
  952. Options: utf
  953. Starting code units: A \xff
  954. Last code unit = 'A'
  955. Subject length lower bound = 1
  956. A
  957. 0: A
  958. /\x{100}*\d(?R)/IB,utf
  959. ------------------------------------------------------------------
  960. Bra
  961. \x{100}*+
  962. \d
  963. Recurse
  964. Ket
  965. End
  966. ------------------------------------------------------------------
  967. Capture group count = 0
  968. Options: utf
  969. Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff
  970. Subject length lower bound = 1
  971. /[Z\x{100}]/IB,utf
  972. ------------------------------------------------------------------
  973. Bra
  974. [Z\x{100}]
  975. Ket
  976. End
  977. ------------------------------------------------------------------
  978. Capture group count = 0
  979. Options: utf
  980. Starting code units: Z \xff
  981. Subject length lower bound = 1
  982. Z\x{100}
  983. 0: Z
  984. \x{100}
  985. 0: \x{100}
  986. \x{100}Z
  987. 0: \x{100}
  988. /[z-\x{100}]/IB,utf
  989. ------------------------------------------------------------------
  990. Bra
  991. [z-\xff\x{100}]
  992. Ket
  993. End
  994. ------------------------------------------------------------------
  995. Capture group count = 0
  996. Options: utf
  997. Starting code units: z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87
  998. \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96
  999. \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5
  1000. \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4
  1001. \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3
  1002. \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2
  1003. \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1
  1004. \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0
  1005. \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
  1006. Subject length lower bound = 1
  1007. /[z\Qa-d]Ā\E]/IB,utf
  1008. ------------------------------------------------------------------
  1009. Bra
  1010. [\-\]adz\x{100}]
  1011. Ket
  1012. End
  1013. ------------------------------------------------------------------
  1014. Capture group count = 0
  1015. Options: utf
  1016. Starting code units: - ] a d z \xff
  1017. Subject length lower bound = 1
  1018. \x{100}
  1019. 0: \x{100}
  1020. Ā
  1021. 0: \x{100}
  1022. /[ab\x{100}]abc(xyz(?1))/IB,utf
  1023. ------------------------------------------------------------------
  1024. Bra
  1025. [ab\x{100}]
  1026. abc
  1027. CBra 1
  1028. xyz
  1029. Recurse
  1030. Ket
  1031. Ket
  1032. End
  1033. ------------------------------------------------------------------
  1034. Capture group count = 1
  1035. Options: utf
  1036. Starting code units: a b \xff
  1037. Last code unit = 'z'
  1038. Subject length lower bound = 7
  1039. /\x{100}*\s/IB,utf
  1040. ------------------------------------------------------------------
  1041. Bra
  1042. \x{100}*+
  1043. \s
  1044. Ket
  1045. End
  1046. ------------------------------------------------------------------
  1047. Capture group count = 0
  1048. Options: utf
  1049. Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xff
  1050. Subject length lower bound = 1
  1051. /\x{100}*\d/IB,utf
  1052. ------------------------------------------------------------------
  1053. Bra
  1054. \x{100}*+
  1055. \d
  1056. Ket
  1057. End
  1058. ------------------------------------------------------------------
  1059. Capture group count = 0
  1060. Options: utf
  1061. Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff
  1062. Subject length lower bound = 1
  1063. /\x{100}*\w/IB,utf
  1064. ------------------------------------------------------------------
  1065. Bra
  1066. \x{100}*+
  1067. \w
  1068. Ket
  1069. End
  1070. ------------------------------------------------------------------
  1071. Capture group count = 0
  1072. Options: utf
  1073. Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
  1074. Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
  1075. \xff
  1076. Subject length lower bound = 1
  1077. /\x{100}*\D/IB,utf
  1078. ------------------------------------------------------------------
  1079. Bra
  1080. \x{100}*
  1081. \D
  1082. Ket
  1083. End
  1084. ------------------------------------------------------------------
  1085. Capture group count = 0
  1086. Options: utf
  1087. Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
  1088. \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
  1089. \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
  1090. ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c
  1091. d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82
  1092. \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91
  1093. \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0
  1094. \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf
  1095. \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe
  1096. \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd
  1097. \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc
  1098. \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb
  1099. \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa
  1100. \xfb \xfc \xfd \xfe \xff
  1101. Subject length lower bound = 1
  1102. /\x{100}*\S/IB,utf
  1103. ------------------------------------------------------------------
  1104. Bra
  1105. \x{100}*
  1106. \S
  1107. Ket
  1108. End
  1109. ------------------------------------------------------------------
  1110. Capture group count = 0
  1111. Options: utf
  1112. Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
  1113. \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
  1114. \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
  1115. D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
  1116. i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84
  1117. \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93
  1118. \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2
  1119. \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1
  1120. \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0
  1121. \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf
  1122. \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde
  1123. \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed
  1124. \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc
  1125. \xfd \xfe \xff
  1126. Subject length lower bound = 1
  1127. /\x{100}*\W/IB,utf
  1128. ------------------------------------------------------------------
  1129. Bra
  1130. \x{100}*
  1131. \W
  1132. Ket
  1133. End
  1134. ------------------------------------------------------------------
  1135. Capture group count = 0
  1136. Options: utf
  1137. Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
  1138. \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
  1139. \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
  1140. ? @ [ \ ] ^ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89
  1141. \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98
  1142. \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7
  1143. \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6
  1144. \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5
  1145. \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4
  1146. \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3
  1147. \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2
  1148. \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
  1149. Subject length lower bound = 1
  1150. /[\x{105}-\x{109}]/IBi,utf
  1151. ------------------------------------------------------------------
  1152. Bra
  1153. [\x{104}-\x{109}]
  1154. Ket
  1155. End
  1156. ------------------------------------------------------------------
  1157. Capture group count = 0
  1158. Options: caseless utf
  1159. Starting code units: \xff
  1160. Subject length lower bound = 1
  1161. \x{104}
  1162. 0: \x{104}
  1163. \x{105}
  1164. 0: \x{105}
  1165. \x{109}
  1166. 0: \x{109}
  1167. \= Expect no match
  1168. \x{100}
  1169. No match
  1170. \x{10a}
  1171. No match
  1172. /[z-\x{100}]/IBi,utf
  1173. ------------------------------------------------------------------
  1174. Bra
  1175. [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
  1176. Ket
  1177. End
  1178. ------------------------------------------------------------------
  1179. Capture group count = 0
  1180. Options: caseless utf
  1181. Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86
  1182. \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95
  1183. \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4
  1184. \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
  1185. \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
  1186. \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
  1187. \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
  1188. \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
  1189. \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
  1190. \xff
  1191. Subject length lower bound = 1
  1192. Z
  1193. 0: Z
  1194. z
  1195. 0: z
  1196. \x{39c}
  1197. 0: \x{39c}
  1198. \x{178}
  1199. 0: \x{178}
  1200. |
  1201. 0: |
  1202. \x{80}
  1203. 0: \x{80}
  1204. \x{ff}
  1205. 0: \x{ff}
  1206. \x{100}
  1207. 0: \x{100}
  1208. \x{101}
  1209. 0: \x{101}
  1210. \= Expect no match
  1211. \x{102}
  1212. No match
  1213. Y
  1214. No match
  1215. y
  1216. No match
  1217. /[z-\x{100}]/IBi,utf
  1218. ------------------------------------------------------------------
  1219. Bra
  1220. [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
  1221. Ket
  1222. End
  1223. ------------------------------------------------------------------
  1224. Capture group count = 0
  1225. Options: caseless utf
  1226. Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86
  1227. \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95
  1228. \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4
  1229. \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
  1230. \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
  1231. \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
  1232. \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
  1233. \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
  1234. \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
  1235. \xff
  1236. Subject length lower bound = 1
  1237. /\x{3a3}B/IBi,utf
  1238. ------------------------------------------------------------------
  1239. Bra
  1240. clist 03a3 03c2 03c3
  1241. /i B
  1242. Ket
  1243. End
  1244. ------------------------------------------------------------------
  1245. Capture group count = 0
  1246. Options: caseless utf
  1247. Starting code units: \xff
  1248. Last code unit = 'B' (caseless)
  1249. Subject length lower bound = 2
  1250. /./utf
  1251. \x{110000}
  1252. Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 0
  1253. /(*UTF)ab������z/B
  1254. ------------------------------------------------------------------
  1255. Bra
  1256. ab\x{fd}\x{bf}\x{bf}\x{bf}\x{bf}\x{bf}z
  1257. Ket
  1258. End
  1259. ------------------------------------------------------------------
  1260. /ab������z/utf
  1261. ** Failed: character value greater than 0x10ffff cannot be converted to UTF
  1262. /[\W\p{Any}]/B
  1263. ------------------------------------------------------------------
  1264. Bra
  1265. [\x00-/:-@[-^`{-\xff\p{Any}\x{100}-\x{ffffffff}]
  1266. Ket
  1267. End
  1268. ------------------------------------------------------------------
  1269. abc
  1270. 0: a
  1271. 123
  1272. 0: 1
  1273. /[\W\pL]/B
  1274. ------------------------------------------------------------------
  1275. Bra
  1276. [\x00-/:-@[-^`{-\xff\p{L}\x{100}-\x{ffffffff}]
  1277. Ket
  1278. End
  1279. ------------------------------------------------------------------
  1280. abc
  1281. 0: a
  1282. \x{100}
  1283. 0: \x{100}
  1284. \x{308}
  1285. 0: \x{308}
  1286. \= Expect no match
  1287. 123
  1288. No match
  1289. /[\s[:^ascii:]]/B,ucp
  1290. ------------------------------------------------------------------
  1291. Bra
  1292. [\x80-\xff\p{Xsp}\x{100}-\x{ffffffff}]
  1293. Ket
  1294. End
  1295. ------------------------------------------------------------------
  1296. /\pP/ucp
  1297. \x{7fffffff}
  1298. No match
  1299. # A special extra option allows excaped surrogate code points in 32-bit mode,
  1300. # but subjects containing them must not be UTF-checked. These patterns give
  1301. # errors in 16-bit mode.
  1302. /\x{d800}/I,utf,allow_surrogate_escapes
  1303. Capture group count = 0
  1304. Options: utf
  1305. Extra options: allow_surrogate_escapes
  1306. First code unit = \x{d800}
  1307. Subject length lower bound = 1
  1308. \x{d800}\=no_utf_check
  1309. 0: \x{d800}
  1310. /\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
  1311. \x{dfff}\x{df01}\=no_utf_check
  1312. 0: \x{dfff}\x{df01}
  1313. # This has different starting code units in 8-bit mode.
  1314. /^[^ab]/IB,utf
  1315. ------------------------------------------------------------------
  1316. Bra
  1317. ^
  1318. [\x00-`c-\xff] (neg)
  1319. Ket
  1320. End
  1321. ------------------------------------------------------------------
  1322. Capture group count = 0
  1323. Compile options: utf
  1324. Overall options: anchored utf
  1325. Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
  1326. \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
  1327. \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
  1328. 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
  1329. Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
  1330. \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e
  1331. \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d
  1332. \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac
  1333. \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb
  1334. \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca
  1335. \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9
  1336. \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8
  1337. \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7
  1338. \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
  1339. Subject length lower bound = 1
  1340. c
  1341. 0: c
  1342. \x{ff}
  1343. 0: \x{ff}
  1344. \x{100}
  1345. 0: \x{100}
  1346. \= Expect no match
  1347. aaa
  1348. No match
  1349. # Offsets are different in 8-bit mode.
  1350. /(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
  1351. 123abcáyzabcdef789abcሴqr
  1352. 1(2) Old 6 6 "" New 6 8 "<>"
  1353. 2(2) Old 12 12 "" New 14 16 "<>"
  1354. 3(2) Old 12 15 "def" New 16 21 "<def>"
  1355. 4(2) Old 21 21 "" New 27 29 "<>"
  1356. 4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr
  1357. # A few script run tests in non-UTF mode (but they need Unicode support)
  1358. /^(*script_run:.{4})/
  1359. \x{3041}\x{30a1}\x{3007}\x{3007} Hiragana Katakana Han Han
  1360. 0: \x{3041}\x{30a1}\x{3007}\x{3007}
  1361. \x{30a1}\x{3041}\x{3007}\x{3007} Katakana Hiragana Han Han
  1362. 0: \x{30a1}\x{3041}\x{3007}\x{3007}
  1363. \x{1100}\x{2e80}\x{2e80}\x{1101} Hangul Han Han Hangul
  1364. 0: \x{1100}\x{2e80}\x{2e80}\x{1101}
  1365. /^(*sr:.*)/utf,allow_surrogate_escapes
  1366. \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana
  1367. 0: \x{2e80}\x{3105}\x{2e80}
  1368. \x{d800}\x{dfff} Surrogates (Unknown) \=no_utf_check
  1369. 0: \x{d800}
  1370. /(?(n/utf
  1371. Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
  1372. /(?(á/utf
  1373. Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
  1374. # Invalid UTF-16/32 tests.
  1375. /.../g,match_invalid_utf
  1376. abcd\x{df00}wxzy\x{df00}pqrs
  1377. 0: abc
  1378. 0: wxz
  1379. 0: pqr
  1380. abcd\x{80}wxzy\x{df00}pqrs
  1381. 0: abc
  1382. 0: d\x{80}w
  1383. 0: xzy
  1384. 0: pqr
  1385. /abc/match_invalid_utf
  1386. ab\x{df00}ab\=ph
  1387. Partial match: ab
  1388. \= Expect no match
  1389. ab\x{df00}cdef\=ph
  1390. No match
  1391. /.a/match_invalid_utf
  1392. ab\=ph
  1393. Partial match: b
  1394. ab\=ps
  1395. Partial match: b
  1396. \= Expect no match
  1397. b\x{df00}\=ph
  1398. No match
  1399. b\x{df00}\=ps
  1400. No match
  1401. /.a$/match_invalid_utf
  1402. ab\=ph
  1403. Partial match: b
  1404. ab\=ps
  1405. Partial match: b
  1406. \= Expect no match
  1407. b\x{df00}\=ph
  1408. No match
  1409. b\x{df00}\=ps
  1410. No match
  1411. /ab$/match_invalid_utf
  1412. ab\x{df00}cdeab
  1413. 0: ab
  1414. \= Expect no match
  1415. ab\x{df00}cde
  1416. No match
  1417. /.../g,match_invalid_utf
  1418. abcd\x{80}wxzy\x{df00}pqrs
  1419. 0: abc
  1420. 0: d\x{80}w
  1421. 0: xzy
  1422. 0: pqr
  1423. /(?<=x)../g,match_invalid_utf
  1424. abcd\x{80}wxzy\x{df00}pqrs
  1425. 0: zy
  1426. abcd\x{80}wxzy\x{df00}xpqrs
  1427. 0: zy
  1428. 0: pq
  1429. /X$/match_invalid_utf
  1430. \= Expect no match
  1431. X\x{df00}
  1432. No match
  1433. /(?<=..)X/match_invalid_utf,aftertext
  1434. AB\x{df00}AQXYZ
  1435. 0: X
  1436. 0+ YZ
  1437. AB\x{df00}AQXYZ\=offset=5
  1438. 0: X
  1439. 0+ YZ
  1440. AB\x{df00}\x{df00}AXYZXC\=offset=5
  1441. 0: X
  1442. 0+ C
  1443. \= Expect no match
  1444. AB\x{df00}XYZ
  1445. No match
  1446. AB\x{df00}XYZ\=offset=3
  1447. No match
  1448. AB\x{df00}AXYZ
  1449. No match
  1450. AB\x{df00}AXYZ\=offset=4
  1451. No match
  1452. AB\x{df00}\x{df00}AXYZ\=offset=5
  1453. No match
  1454. /.../match_invalid_utf
  1455. \= Expect no match
  1456. A\x{d800}B
  1457. No match
  1458. A\x{110000}B
  1459. No match
  1460. /aa/utf,ucp,match_invalid_utf,global
  1461. aa\x{d800}aa
  1462. 0: aa
  1463. 0: aa
  1464. /aa/utf,ucp,match_invalid_utf,global
  1465. \x{d800}aa
  1466. 0: aa
  1467. /A\z/utf,match_invalid_utf
  1468. A\x{df00}\n
  1469. No match
  1470. # ----------------------------------------------------
  1471. /(*UTF)(?=\x{123})/I
  1472. Capture group count = 0
  1473. May match empty string
  1474. Compile options: <none>
  1475. Overall options: utf
  1476. First code unit = \x{123}
  1477. Subject length lower bound = 1
  1478. /[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf
  1479. Capture group count = 0
  1480. Options: utf
  1481. First code unit = \xc1 (caseless)
  1482. Last code unit = \x{145} (caseless)
  1483. Subject length lower bound = 3
  1484. /[\xff\x{ffff}]/I,utf
  1485. Capture group count = 0
  1486. Options: utf
  1487. Starting code units: \xff
  1488. Subject length lower bound = 1
  1489. /[\xff\x{ff}]/I,utf
  1490. Capture group count = 0
  1491. Options: utf
  1492. Starting code units: \xff
  1493. Subject length lower bound = 1
  1494. /[\xff\x{ff}]/I
  1495. Capture group count = 0
  1496. Starting code units: \xff
  1497. Subject length lower bound = 1
  1498. /[Ss]/I
  1499. Capture group count = 0
  1500. First code unit = 'S' (caseless)
  1501. Subject length lower bound = 1
  1502. /[Ss]/I,utf
  1503. Capture group count = 0
  1504. Options: utf
  1505. Starting code units: S s
  1506. Subject length lower bound = 1
  1507. /(?:\x{ff}|\x{3000})/I,utf
  1508. Capture group count = 0
  1509. Options: utf
  1510. Starting code units: \xff
  1511. Subject length lower bound = 1
  1512. # ----------------------------------------------------
  1513. # UCP and casing tests
  1514. /\x{120}/i,I
  1515. Capture group count = 0
  1516. Options: caseless
  1517. First code unit = \x{120}
  1518. Subject length lower bound = 1
  1519. /\x{c1}/i,I,ucp
  1520. Capture group count = 0
  1521. Options: caseless ucp
  1522. First code unit = \xc1 (caseless)
  1523. Subject length lower bound = 1
  1524. /[\x{120}\x{121}]/iB,ucp
  1525. ------------------------------------------------------------------
  1526. Bra
  1527. /i \x{120}
  1528. Ket
  1529. End
  1530. ------------------------------------------------------------------
  1531. /[ab\x{120}]+/iB,ucp
  1532. ------------------------------------------------------------------
  1533. Bra
  1534. [ABab\x{120}-\x{121}]++
  1535. Ket
  1536. End
  1537. ------------------------------------------------------------------
  1538. aABb\x{121}\x{120}
  1539. 0: aABb\x{121}\x{120}
  1540. /\x{c1}/i,no_start_optimize
  1541. \= Expect no match
  1542. \x{e1}
  1543. No match
  1544. /\x{120}\x{c1}/i,ucp,no_start_optimize
  1545. \x{121}\x{e1}
  1546. 0: \x{121}\xe1
  1547. /\x{120}\x{c1}/i,ucp
  1548. \x{121}\x{e1}
  1549. 0: \x{121}\xe1
  1550. /[^\x{120}]/i,no_start_optimize
  1551. \x{121}
  1552. 0: \x{121}
  1553. /[^\x{120}]/i,ucp,no_start_optimize
  1554. \= Expect no match
  1555. \x{121}
  1556. No match
  1557. /[^\x{120}]/i
  1558. \x{121}
  1559. 0: \x{121}
  1560. /[^\x{120}]/i,ucp
  1561. \= Expect no match
  1562. \x{121}
  1563. No match
  1564. /\x{120}{2}/i,ucp
  1565. \x{121}\x{121}
  1566. 0: \x{121}\x{121}
  1567. /[^\x{120}]{2}/i,ucp
  1568. \= Expect no match
  1569. \x{121}\x{121}
  1570. No match
  1571. /\x{c1}+\x{e1}/iB,ucp
  1572. ------------------------------------------------------------------
  1573. Bra
  1574. /i \x{c1}+
  1575. /i \x{e1}
  1576. Ket
  1577. End
  1578. ------------------------------------------------------------------
  1579. \x{c1}\x{c1}\x{c1}
  1580. 0: \xc1\xc1\xc1
  1581. /\x{c1}+\x{e1}/iIB,ucp
  1582. ------------------------------------------------------------------
  1583. Bra
  1584. /i \x{c1}+
  1585. /i \x{e1}
  1586. Ket
  1587. End
  1588. ------------------------------------------------------------------
  1589. Capture group count = 0
  1590. Options: caseless ucp
  1591. First code unit = \xc1 (caseless)
  1592. Last code unit = \xe1 (caseless)
  1593. Subject length lower bound = 2
  1594. \x{c1}\x{c1}\x{c1}
  1595. 0: \xc1\xc1\xc1
  1596. \x{e1}\x{e1}\x{e1}
  1597. 0: \xe1\xe1\xe1
  1598. /a|\x{c1}/iI,ucp
  1599. Capture group count = 0
  1600. Options: caseless ucp
  1601. Starting code units: A a \xc1 \xe1
  1602. Subject length lower bound = 1
  1603. \x{e1}xxx
  1604. 0: \xe1
  1605. /\x{c1}|\x{e1}/iI,ucp
  1606. Capture group count = 0
  1607. Options: caseless ucp
  1608. First code unit = \xc1 (caseless)
  1609. Subject length lower bound = 1
  1610. /X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended
  1611. X\x{e1}Y
  1612. 1: >\xc1<
  1613. /X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended
  1614. X\x{121}Y
  1615. 1: >\x{120}<
  1616. /s/i,ucp
  1617. \x{17f}
  1618. 0: \x{17f}
  1619. /s/i,utf
  1620. \x{17f}
  1621. 0: \x{17f}
  1622. /[^s]/i,ucp
  1623. \= Expect no match
  1624. \x{17f}
  1625. No match
  1626. /[^s]/i,utf
  1627. \= Expect no match
  1628. \x{17f}
  1629. No match
  1630. # ----------------------------------------------------
  1631. # Quantifier after a literal that has the value of META_ACCEPT (not UTF). This
  1632. # fails in 16-bit mode, but is OK for 32-bit.
  1633. /\x{802a0000}*/
  1634. \x{802a0000}\x{802a0000}
  1635. 0: \x{802a0000}\x{802a0000}
  1636. # UTF matching without UTF, check invalid UTF characters
  1637. /\X++/
  1638. a\x{110000}\x{ffffffff}
  1639. 0: a\x{110000}\x{ffffffff}
  1640. # This used to loop in 32-bit mode; it will fail in 16-bit mode.
  1641. /[\x{ffffffff}]/caseless,ucp
  1642. \x{ffffffff}xyz
  1643. 0: \x{ffffffff}
  1644. # These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They
  1645. # will give errors in 16-bit mode.
  1646. /k*\x{ffffffff}/caseless,ucp
  1647. \x{ffffffff}
  1648. 0: \x{ffffffff}
  1649. /k+\x{ffffffff}/caseless,ucp,no_start_optimize
  1650. K\x{ffffffff}
  1651. 0: K\x{ffffffff}
  1652. \= Expect no match
  1653. \x{ffffffff}\x{ffffffff}
  1654. No match
  1655. /k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
  1656. \= Expect no match
  1657. \x{ffffffff}\x{ffffffff}\x{ffffffff}
  1658. No match
  1659. /k\x{ffffffff}/caseless,ucp,no_start_optimize
  1660. K\x{ffffffff}
  1661. 0: K\x{ffffffff}
  1662. \= Expect no match
  1663. \x{ffffffff}\x{ffffffff}\x{ffffffff}
  1664. No match
  1665. /k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
  1666. \= Expect no match
  1667. Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
  1668. No match
  1669. # ---------------------------------------------------------
  1670. # End of testinput12