github的一些开源项目
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

374 lines
14 KiB

  1. # This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF)
  2. # features that are not compatible with the 8-bit library, or which give
  3. # different output in 16-bit or 32-bit mode. The output for the two widths is
  4. # different, so they have separate output files.
  5. #forbid_utf
  6. #newline_default LF ANY ANYCRLF
  7. /[^\x{c4}]/IB
  8. /\x{100}/I
  9. / (?: [\040\t] | \(
  10. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  11. \) )* # optional leading comment
  12. (?: (?:
  13. [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
  14. (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
  15. |
  16. " (?: # opening quote...
  17. [^\\\x80-\xff\n\015"] # Anything except backslash and quote
  18. | # or
  19. \\ [^\x80-\xff] # Escaped something (something != CR)
  20. )* " # closing quote
  21. ) # initial word
  22. (?: (?: [\040\t] | \(
  23. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  24. \) )* \. (?: [\040\t] | \(
  25. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  26. \) )* (?:
  27. [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
  28. (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
  29. |
  30. " (?: # opening quote...
  31. [^\\\x80-\xff\n\015"] # Anything except backslash and quote
  32. | # or
  33. \\ [^\x80-\xff] # Escaped something (something != CR)
  34. )* " # closing quote
  35. ) )* # further okay, if led by a period
  36. (?: [\040\t] | \(
  37. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  38. \) )* @ (?: [\040\t] | \(
  39. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  40. \) )* (?:
  41. [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
  42. (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
  43. | \[ # [
  44. (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
  45. \] # ]
  46. ) # initial subdomain
  47. (?: #
  48. (?: [\040\t] | \(
  49. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  50. \) )* \. # if led by a period...
  51. (?: [\040\t] | \(
  52. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  53. \) )* (?:
  54. [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
  55. (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
  56. | \[ # [
  57. (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
  58. \] # ]
  59. ) # ...further okay
  60. )*
  61. # address
  62. | # or
  63. (?:
  64. [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
  65. (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
  66. |
  67. " (?: # opening quote...
  68. [^\\\x80-\xff\n\015"] # Anything except backslash and quote
  69. | # or
  70. \\ [^\x80-\xff] # Escaped something (something != CR)
  71. )* " # closing quote
  72. ) # one word, optionally followed by....
  73. (?:
  74. [^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
  75. \(
  76. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  77. \) | # comments, or...
  78. " (?: # opening quote...
  79. [^\\\x80-\xff\n\015"] # Anything except backslash and quote
  80. | # or
  81. \\ [^\x80-\xff] # Escaped something (something != CR)
  82. )* " # closing quote
  83. # quoted strings
  84. )*
  85. < (?: [\040\t] | \(
  86. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  87. \) )* # leading <
  88. (?: @ (?: [\040\t] | \(
  89. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  90. \) )* (?:
  91. [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
  92. (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
  93. | \[ # [
  94. (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
  95. \] # ]
  96. ) # initial subdomain
  97. (?: #
  98. (?: [\040\t] | \(
  99. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  100. \) )* \. # if led by a period...
  101. (?: [\040\t] | \(
  102. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  103. \) )* (?:
  104. [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
  105. (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
  106. | \[ # [
  107. (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
  108. \] # ]
  109. ) # ...further okay
  110. )*
  111. (?: (?: [\040\t] | \(
  112. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  113. \) )* , (?: [\040\t] | \(
  114. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  115. \) )* @ (?: [\040\t] | \(
  116. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  117. \) )* (?:
  118. [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
  119. (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
  120. | \[ # [
  121. (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
  122. \] # ]
  123. ) # initial subdomain
  124. (?: #
  125. (?: [\040\t] | \(
  126. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  127. \) )* \. # if led by a period...
  128. (?: [\040\t] | \(
  129. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  130. \) )* (?:
  131. [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
  132. (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
  133. | \[ # [
  134. (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
  135. \] # ]
  136. ) # ...further okay
  137. )*
  138. )* # further okay, if led by comma
  139. : # closing colon
  140. (?: [\040\t] | \(
  141. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  142. \) )* )? # optional route
  143. (?:
  144. [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
  145. (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
  146. |
  147. " (?: # opening quote...
  148. [^\\\x80-\xff\n\015"] # Anything except backslash and quote
  149. | # or
  150. \\ [^\x80-\xff] # Escaped something (something != CR)
  151. )* " # closing quote
  152. ) # initial word
  153. (?: (?: [\040\t] | \(
  154. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  155. \) )* \. (?: [\040\t] | \(
  156. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  157. \) )* (?:
  158. [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
  159. (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
  160. |
  161. " (?: # opening quote...
  162. [^\\\x80-\xff\n\015"] # Anything except backslash and quote
  163. | # or
  164. \\ [^\x80-\xff] # Escaped something (something != CR)
  165. )* " # closing quote
  166. ) )* # further okay, if led by a period
  167. (?: [\040\t] | \(
  168. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  169. \) )* @ (?: [\040\t] | \(
  170. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  171. \) )* (?:
  172. [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
  173. (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
  174. | \[ # [
  175. (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
  176. \] # ]
  177. ) # initial subdomain
  178. (?: #
  179. (?: [\040\t] | \(
  180. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  181. \) )* \. # if led by a period...
  182. (?: [\040\t] | \(
  183. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  184. \) )* (?:
  185. [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
  186. (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
  187. | \[ # [
  188. (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
  189. \] # ]
  190. ) # ...further okay
  191. )*
  192. # address spec
  193. (?: [\040\t] | \(
  194. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  195. \) )* > # trailing >
  196. # name and address
  197. ) (?: [\040\t] | \(
  198. (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
  199. \) )* # optional trailing comment
  200. /Ix
  201. /[\h]/B
  202. >\x09<
  203. /[\h]+/B
  204. >\x09\x20\xa0<
  205. /[\v]/B
  206. /[^\h]/B
  207. /\h+/I
  208. \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
  209. \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
  210. /[\h\x{dc00}]+/IB
  211. \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
  212. \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
  213. /\H+/I
  214. \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
  215. \x{2000}\x{200a}\x{1fff}\x{200b}
  216. \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
  217. \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
  218. /[\H\x{d800}]+/
  219. \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
  220. \x{2000}\x{200a}\x{1fff}\x{200b}
  221. \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
  222. \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
  223. /\v+/I
  224. \x{2027}\x{2030}\x{2028}\x{2029}
  225. \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
  226. /[\v\x{dc00}]+/IB
  227. \x{2027}\x{2030}\x{2028}\x{2029}
  228. \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
  229. /\V+/I
  230. \x{2028}\x{2029}\x{2027}\x{2030}
  231. \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
  232. /[\V\x{d800}]+/
  233. \x{2028}\x{2029}\x{2027}\x{2030}
  234. \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
  235. /\R+/I,bsr=unicode
  236. \x{2027}\x{2030}\x{2028}\x{2029}
  237. \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
  238. /\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
  239. \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
  240. /[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B
  241. /[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi
  242. /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B
  243. /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi
  244. /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
  245. XX
  246. /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
  247. XX
  248. /\u0100/B,alt_bsux,allow_empty_class,match_unset_backref
  249. /[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref
  250. /\ud800/B,alt_bsux,allow_empty_class,match_unset_backref
  251. /^\x{ffff}+/i
  252. \x{ffff}
  253. /^\x{ffff}?/i
  254. \x{ffff}
  255. /^\x{ffff}*/i
  256. \x{ffff}
  257. /^\x{ffff}{3}/i
  258. \x{ffff}\x{ffff}\x{ffff}
  259. /^\x{ffff}{0,3}/i
  260. \x{ffff}
  261. /[^\x00-a]{12,}[^b-\xff]*/B
  262. /[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
  263. /a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B
  264. /^[\x{1234}\x{4321}]{2,4}?/
  265. \x{1234}\x{1234}\x{1234}
  266. # Check maximum non-UTF character size for the 16-bit library.
  267. /\x{ffff}/
  268. A\x{ffff}B
  269. /\x{10000}/
  270. /\o{20000}/
  271. # Check maximum character size for the 32-bit library. These will all give
  272. # errors in the 16-bit library.
  273. /\x{110000}/
  274. /\x{7fffffff}/
  275. /\x{80000000}/
  276. /\x{ffffffff}/
  277. /\x{100000000}/
  278. /\o{17777777777}/
  279. /\o{20000000000}/
  280. /\o{37777777777}/
  281. /\o{40000000000}/
  282. /\x{7fffffff}\x{7fffffff}/I
  283. /\x{80000000}\x{80000000}/I
  284. /\x{ffffffff}\x{ffffffff}/I
  285. # Non-UTF characters
  286. /.{2,3}/
  287. \x{400000}\x{400001}\x{400002}\x{400003}
  288. /\x{400000}\x{800000}/IBi
  289. # Check character ranges
  290. /[\H]/IB
  291. /[\V]/IB
  292. /(*THEN:\[A]{65501})/expand
  293. # We can use pcre2test's utf8_input modifier to create wide pattern characters,
  294. # even though this test is run when UTF is not supported.
  295. /ab������z/utf8_input
  296. ab������z
  297. ab\x{7fffffff}z
  298. /ab�������z/utf8_input
  299. ab�������z
  300. ab\x{ffffffff}z
  301. /ab�Az/utf8_input
  302. ab�Az
  303. ab\x{80000041}z
  304. /(?i:A{1,}\6666666666)/
  305. A\x{1b6}6666666
  306. # End of testinput11