# This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF) # features that are not compatible with the 8-bit library, or which give # different output in 16-bit or 32-bit mode. The output for the two widths is # different, so they have separate output files. #forbid_utf #newline_default LF ANY ANYCRLF /[^\x{c4}]/IB ------------------------------------------------------------------ Bra [^\x{c4}] (not) Ket End ------------------------------------------------------------------ Capture group count = 0 Subject length lower bound = 1 /\x{100}/I Capture group count = 0 First code unit = \x{100} Subject length lower bound = 1 / (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* # optional leading comment (?: (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | " (?: # opening quote... [^\\\x80-\xff\n\015"] # Anything except backslash and quote | # or \\ [^\x80-\xff] # Escaped something (something != CR) )* " # closing quote ) # initial word (?: (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* \. (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | " (?: # opening quote... [^\\\x80-\xff\n\015"] # Anything except backslash and quote | # or \\ [^\x80-\xff] # Escaped something (something != CR) )* " # closing quote ) )* # further okay, if led by a period (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* @ (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | \[ # [ (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff \] # ] ) # initial subdomain (?: # (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* \. # if led by a period... (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | \[ # [ (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff \] # ] ) # ...further okay )* # address | # or (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | " (?: # opening quote... [^\\\x80-\xff\n\015"] # Anything except backslash and quote | # or \\ [^\x80-\xff] # Escaped something (something != CR) )* " # closing quote ) # one word, optionally followed by.... (?: [^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or... \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) | # comments, or... " (?: # opening quote... [^\\\x80-\xff\n\015"] # Anything except backslash and quote | # or \\ [^\x80-\xff] # Escaped something (something != CR) )* " # closing quote # quoted strings )* < (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* # leading < (?: @ (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | \[ # [ (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff \] # ] ) # initial subdomain (?: # (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* \. # if led by a period... (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | \[ # [ (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff \] # ] ) # ...further okay )* (?: (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* , (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* @ (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | \[ # [ (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff \] # ] ) # initial subdomain (?: # (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* \. # if led by a period... (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | \[ # [ (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff \] # ] ) # ...further okay )* )* # further okay, if led by comma : # closing colon (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* )? # optional route (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | " (?: # opening quote... [^\\\x80-\xff\n\015"] # Anything except backslash and quote | # or \\ [^\x80-\xff] # Escaped something (something != CR) )* " # closing quote ) # initial word (?: (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* \. (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | " (?: # opening quote... [^\\\x80-\xff\n\015"] # Anything except backslash and quote | # or \\ [^\x80-\xff] # Escaped something (something != CR) )* " # closing quote ) )* # further okay, if led by a period (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* @ (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | \[ # [ (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff \] # ] ) # initial subdomain (?: # (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* \. # if led by a period... (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | \[ # [ (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff \] # ] ) # ...further okay )* # address spec (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* > # trailing > # name and address ) (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* # optional trailing comment /Ix Capture group count = 0 Contains explicit CR or LF match Options: extended Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8 9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xff Subject length lower bound = 3 /[\h]/B ------------------------------------------------------------------ Bra [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}] Ket End ------------------------------------------------------------------ >\x09< 0: \x09 /[\h]+/B ------------------------------------------------------------------ Bra [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]++ Ket End ------------------------------------------------------------------ >\x09\x20\xa0< 0: \x09 \xa0 /[\v]/B ------------------------------------------------------------------ Bra [\x0a-\x0d\x85\x{2028}-\x{2029}] Ket End ------------------------------------------------------------------ /[^\h]/B ------------------------------------------------------------------ Bra [^\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}] Ket End ------------------------------------------------------------------ /\h+/I Capture group count = 0 Starting code units: \x09 \x20 \xa0 \xff Subject length lower bound = 1 \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} 0: \x{1680}\x{2000}\x{202f}\x{3000} \x{3001}\x{2fff}\x{200a}\xa0\x{2000} 0: \x{200a}\xa0\x{2000} /[\h\x{dc00}]+/IB ------------------------------------------------------------------ Bra [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{dc00}]++ Ket End ------------------------------------------------------------------ Capture group count = 0 Starting code units: \x09 \x20 \xa0 \xff Subject length lower bound = 1 \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} 0: \x{1680}\x{2000}\x{202f}\x{3000} \x{3001}\x{2fff}\x{200a}\xa0\x{2000} 0: \x{200a}\xa0\x{2000} /\H+/I Capture group count = 0 Subject length lower bound = 1 \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} 0: \x{167f}\x{1681}\x{180d}\x{180f} \x{2000}\x{200a}\x{1fff}\x{200b} 0: \x{1fff}\x{200b} \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} 0: \x{202e}\x{2030}\x{205e}\x{2060} \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001} 0: \x9f\xa1\x{2fff}\x{3001} /[\H\x{d800}]+/ \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} 0: \x{167f}\x{1681}\x{180d}\x{180f} \x{2000}\x{200a}\x{1fff}\x{200b} 0: \x{1fff}\x{200b} \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} 0: \x{202e}\x{2030}\x{205e}\x{2060} \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001} 0: \x9f\xa1\x{2fff}\x{3001} /\v+/I Capture group count = 0 Starting code units: \x0a \x0b \x0c \x0d \x85 \xff Subject length lower bound = 1 \x{2027}\x{2030}\x{2028}\x{2029} 0: \x{2028}\x{2029} \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d 0: \x85\x0a\x0b\x0c\x0d /[\v\x{dc00}]+/IB ------------------------------------------------------------------ Bra [\x0a-\x0d\x85\x{2028}-\x{2029}\x{dc00}]++ Ket End ------------------------------------------------------------------ Capture group count = 0 Starting code units: \x0a \x0b \x0c \x0d \x85 \xff Subject length lower bound = 1 \x{2027}\x{2030}\x{2028}\x{2029} 0: \x{2028}\x{2029} \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d 0: \x85\x0a\x0b\x0c\x0d /\V+/I Capture group count = 0 Subject length lower bound = 1 \x{2028}\x{2029}\x{2027}\x{2030} 0: \x{2027}\x{2030} \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86 0: \x09\x0e\x84\x86 /[\V\x{d800}]+/ \x{2028}\x{2029}\x{2027}\x{2030} 0: \x{2027}\x{2030} \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86 0: \x09\x0e\x84\x86 /\R+/I,bsr=unicode Capture group count = 0 \R matches any Unicode newline Starting code units: \x0a \x0b \x0c \x0d \x85 \xff Subject length lower bound = 1 \x{2027}\x{2030}\x{2028}\x{2029} 0: \x{2028}\x{2029} \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d 0: \x85\x0a\x0b\x0c\x0d /\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I Capture group count = 0 First code unit = \x{d800} Last code unit = \x{dd00} Subject length lower bound = 6 \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00} 0: \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00} /[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B ------------------------------------------------------------------ Bra [^\x{80}] (not) [^\x{ff}] (not) [^\x{100}] (not) [^\x{1000}] (not) [^\x{ffff}] (not) Ket End ------------------------------------------------------------------ /[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi ------------------------------------------------------------------ Bra /i [^\x{80}] (not) /i [^\x{ff}] (not) /i [^\x{100}] (not) /i [^\x{1000}] (not) /i [^\x{ffff}] (not) Ket End ------------------------------------------------------------------ /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B ------------------------------------------------------------------ Bra [^\x{100}]* (not) [^\x{1000}]+ (not) [^\x{ffff}]?? (not) [^\x{8000}]{4} (not) [^\x{8000}]* (not) [^\x{7fff}]{2} (not) [^\x{7fff}]{0,7}? (not) [^\x{100}]{5} (not) [^\x{100}]?+ (not) Ket End ------------------------------------------------------------------ /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi ------------------------------------------------------------------ Bra /i [^\x{100}]* (not) /i [^\x{1000}]+ (not) /i [^\x{ffff}]?? (not) /i [^\x{8000}]{4} (not) /i [^\x{8000}]* (not) /i [^\x{7fff}]{2} (not) /i [^\x{7fff}]{0,7}? (not) /i [^\x{100}]{5} (not) /i [^\x{100}]?+ (not) Ket End ------------------------------------------------------------------ /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark XX 0: XX MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark XX 0: XX MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE /\u0100/B,alt_bsux,allow_empty_class,match_unset_backref ------------------------------------------------------------------ Bra \x{100} Ket End ------------------------------------------------------------------ /[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref ------------------------------------------------------------------ Bra [\x{100}-\x{200}] Ket End ------------------------------------------------------------------ /\ud800/B,alt_bsux,allow_empty_class,match_unset_backref ------------------------------------------------------------------ Bra \x{d800} Ket End ------------------------------------------------------------------ /^\x{ffff}+/i \x{ffff} 0: \x{ffff} /^\x{ffff}?/i \x{ffff} 0: \x{ffff} /^\x{ffff}*/i \x{ffff} 0: \x{ffff} /^\x{ffff}{3}/i \x{ffff}\x{ffff}\x{ffff} 0: \x{ffff}\x{ffff}\x{ffff} /^\x{ffff}{0,3}/i \x{ffff} 0: \x{ffff} /[^\x00-a]{12,}[^b-\xff]*/B ------------------------------------------------------------------ Bra [^\x00-a]{12,} [^b-\xff]*+ Ket End ------------------------------------------------------------------ /[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B ------------------------------------------------------------------ Bra [^\x09-\x0d ]* \s* [0-9A-Z_a-z]++ \W+ [^0-9]*? \d 0 [^0-9A-Z_a-z]{4,6}? \w* A Ket End ------------------------------------------------------------------ /a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B ------------------------------------------------------------------ Bra a* [b-\xff\x{100}-\x{200}]?+ a# a*+ [b-\xff\x{100}-\x{200}]? b# [a-f]*+ [g-\xff\x{100}-\x{200}]*+ # [g-\xff\x{100}-\x{200}]*+ [a-c]*+ # [g-\xff\x{100}-\x{200}]* [a-h]*+ Ket End ------------------------------------------------------------------ /^[\x{1234}\x{4321}]{2,4}?/ \x{1234}\x{1234}\x{1234} 0: \x{1234}\x{1234} # Check maximum non-UTF character size for the 16-bit library. /\x{ffff}/ A\x{ffff}B 0: \x{ffff} /\x{10000}/ /\o{20000}/ # Check maximum character size for the 32-bit library. These will all give # errors in the 16-bit library. /\x{110000}/ /\x{7fffffff}/ /\x{80000000}/ /\x{ffffffff}/ /\x{100000000}/ Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large /\o{17777777777}/ /\o{20000000000}/ /\o{37777777777}/ /\o{40000000000}/ Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large /\x{7fffffff}\x{7fffffff}/I Capture group count = 0 First code unit = \x{7fffffff} Last code unit = \x{7fffffff} Subject length lower bound = 2 /\x{80000000}\x{80000000}/I Capture group count = 0 First code unit = \x{80000000} Last code unit = \x{80000000} Subject length lower bound = 2 /\x{ffffffff}\x{ffffffff}/I Capture group count = 0 First code unit = \x{ffffffff} Last code unit = \x{ffffffff} Subject length lower bound = 2 # Non-UTF characters /.{2,3}/ \x{400000}\x{400001}\x{400002}\x{400003} 0: \x{400000}\x{400001}\x{400002} /\x{400000}\x{800000}/IBi ------------------------------------------------------------------ Bra /i \x{400000}\x{800000} Ket End ------------------------------------------------------------------ Capture group count = 0 Options: caseless First code unit = \x{400000} Last code unit = \x{800000} Subject length lower bound = 2 # Check character ranges /[\H]/IB ------------------------------------------------------------------ Bra [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffffffff}] Ket End ------------------------------------------------------------------ Capture group count = 0 Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /[\V]/IB ------------------------------------------------------------------ Bra [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffffffff}] Ket End ------------------------------------------------------------------ Capture group count = 0 Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /(*THEN:\[A]{65501})/expand # We can use pcre2test's utf8_input modifier to create wide pattern characters, # even though this test is run when UTF is not supported. /a\x{d800}b/utf8_input aí €b 0: a\x{d800}b a\x{d800}b 0: a\x{d800}b a\o{154000}b 0: a\x{d800}b \= Expect warning unless 32bit a\N{U+d800}b 0: a\x{d800}b /a\x{ffff}b/utf8_input aï¿¿b 0: a\x{ffff}b a\x{ffff}b 0: a\x{ffff}b a\o{177777}b 0: a\x{ffff}b a\N{U+ffff}b 0: a\x{ffff}b /abý¿¿¿¿¿z/utf8_input abý¿¿¿¿¿z 0: ab\x{7fffffff}z ab\x{7fffffff}z 0: ab\x{7fffffff}z ab\o{17777777777}z 0: ab\x{7fffffff}z ab\N{U+7fffffff}z ** Warning: character \N{U+7fffffff} is greater than 0x10ffff and should not be encoded as UTF-32 0: ab\x{7fffffff}z /abÿý¿¿¿¿¿z/utf8_input abÿý¿¿¿¿¿z 0: ab\x{ffffffff}z ab\x{ffffffff}z 0: ab\x{ffffffff}z /abÿAz/utf8_input abÿAz 0: ab\x{80000041}z ab\x{80000041}z 0: ab\x{80000041}z \= Expect no match abAz No match aAz No match ab\377Az No match ab\xff\N{U+0041}z No match ab\N{U+ff}\N{U+41}z No match /ab\x{80000041}z/ ab\x{80000041}z 0: ab\x{80000041}z /(?i:A{1,}\6666666666)/ A\x{1b6}6666666 0: A\x{1b6}6666666 /abc/substitute_extended,replace=>\777< abc 1: >\x{1ff}< /abc/substitute_extended,replace=>\o{012345}< abc 1: >\x{14e5}< # Character range merging tests /[\x{100}-\x{200}\H\x{8000}-\x{9000}]/B ------------------------------------------------------------------ Bra [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffffffff}] Ket End ------------------------------------------------------------------ /[\x{100}-\x{200}\V\x{8000}-\x{9000}]/B ------------------------------------------------------------------ Bra [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffffffff}] Ket End ------------------------------------------------------------------ /[\x00-\x{6000}\x{3000}-\x{ffff}]#[\x00-\x{6000}\x{3000}-\x{ffff}]{5,7}?/B ------------------------------------------------------------------ Bra [\x00-\xff\x{100}-\x{ffff}] # [\x00-\xff\x{100}-\x{ffff}]{5,7}? Ket End ------------------------------------------------------------------ /[\x00-\x{6000}\x{3000}-\x{ffffffff}]#[\x00-\x{6000}\x{3000}-\x{ffffffff}]{5,7}?/B ------------------------------------------------------------------ Bra AllAny # AllAny{5} AllAny{0,2}? Ket End ------------------------------------------------------------------ /[\x00-\x2f\x11-\xff]*?!/B ------------------------------------------------------------------ Bra [\x00-\xff]*? ! Ket End ------------------------------------------------------------------ abcd!e 0: abcd! /i/turkish_casing Failed: error 204 at offset 0: PCRE2_EXTRA_TURKISH_CASING require Unicode (UTF or UCP) mode # Character list tests /([\x{100}-\x{7fff}\x{9000}\x{9002}\x{9004}\x{9006}\x{9008}\x{10000}-\x{7fffffff}]{3,8}?).#/B ------------------------------------------------------------------ Bra CBra 1 [\x{100}-\x{7fff}\x{9000}\x{9002}\x{9004}\x{9006}\x{9008}\x{10000}-\x{7fffffff}]{3,8}? Ket Any # Ket End ------------------------------------------------------------------ \x{9001}\x{9007}\x{8000}\x{ffff}\x{9002}\x{7fff}\x{10000}\x{7fffffff}\x{500000}\x{9006}# 0: \x{9002}\x{7fff}\x{10000}\x{7fffffff}\x{500000}\x{9006}# 1: \x{9002}\x{7fff}\x{10000}\x{7fffffff}\x{500000} /([\x{3000}\x{3001}\x{3003}\x{3004}\x{3006}\x{3007}\x{8000}-\x{ffff}\x{100001}\x{100002}\x{100004}\x{100005}\x{100007}\x{100008}\x{10000a}\x{10000b}\x{80000000}-\x{ffffffff}]{5,}).#/B ------------------------------------------------------------------ Bra CBra 1 [\x{3000}-\x{3001}\x{3003}-\x{3004}\x{3006}-\x{3007}\x{8000}-\x{ffff}\x{100001}-\x{100002}\x{100004}-\x{100005}\x{100007}-\x{100008}\x{10000a}-\x{10000b}\x{80000000}-\x{ffffffff}]{5,} Ket Any # Ket End ------------------------------------------------------------------ \x{2fff}\x{3002}\x{7fff}\x{100000}\x{7fffffff}\x{3000}\x{3007}\x{8000}\x{ffff}\x{100001}\x{10000b}\x{80000000}\x{ffffffff}\x{3000}# 0: \x{3000}\x{3007}\x{8000}\x{ffff}\x{100001}\x{10000b}\x{80000000}\x{ffffffff}\x{3000}# 1: \x{3000}\x{3007}\x{8000}\x{ffff}\x{100001}\x{10000b}\x{80000000}\x{ffffffff} /([^\x{4000}\x{4002}\x{4004}\x{4005}\x{4007}\x{4009}\x{400a}\x{f000}\x{f002}\x{f004}\x{f005}\x{f007}\x{f009}\x{f00a}\x{100000}\x{100002}\x{100004}\x{100005}\x{100007}\x{100009}\x{10000a}\x{a0000000}\x{a0000002}\x{a0000004}\x{a0000005}\x{a0000007}\x{a0000009}\x{a000000a}]+).#/B ------------------------------------------------------------------ Bra CBra 1 [^\x{4000}\x{4002}\x{4004}-\x{4005}\x{4007}\x{4009}-\x{400a}\x{f000}\x{f002}\x{f004}-\x{f005}\x{f007}\x{f009}-\x{f00a}\x{100000}\x{100002}\x{100004}-\x{100005}\x{100007}\x{100009}-\x{10000a}\x{a0000000}\x{a0000002}\x{a0000004}-\x{a0000005}\x{a0000007}\x{a0000009}-\x{a000000a}]+ Ket Any # Ket End ------------------------------------------------------------------ \x{4000}\x{4002}\x{4004}\x{4005}\x{4007}\x{4009}\x{400a}\x{3fff}\x{4001}\x{4003}\x{4006}\x{4008}\x{400b}\x{100}# 0: \x{3fff}\x{4001}\x{4003}\x{4006}\x{4008}\x{400b}\x{100}# 1: \x{3fff}\x{4001}\x{4003}\x{4006}\x{4008}\x{400b} \x{f000}\x{f002}\x{f004}\x{f005}\x{f007}\x{f009}\x{f00a}\x{efff}\x{f001}\x{f003}\x{f006}\x{f008}\x{f00b}\x{100}# 0: \x{efff}\x{f001}\x{f003}\x{f006}\x{f008}\x{f00b}\x{100}# 1: \x{efff}\x{f001}\x{f003}\x{f006}\x{f008}\x{f00b} \x{100000}\x{100002}\x{100004}\x{100005}\x{100007}\x{100009}\x{10000a}\x{fffff}\x{100001}\x{100003}\x{100006}\x{100008}\x{10000b}\x{100}# 0: \x{fffff}\x{100001}\x{100003}\x{100006}\x{100008}\x{10000b}\x{100}# 1: \x{fffff}\x{100001}\x{100003}\x{100006}\x{100008}\x{10000b} \x{a0000000}\x{a0000002}\x{a0000004}\x{a0000005}\x{a0000007}\x{a0000009}\x{a000000a}\x{9fffffff}\x{a0000001}\x{a0000003}\x{a0000006}\x{a0000008}\x{a000000b}\x{100}# 0: \x{9fffffff}\x{a0000001}\x{a0000003}\x{a0000006}\x{a0000008}\x{a000000b}\x{100}# 1: \x{9fffffff}\x{a0000001}\x{a0000003}\x{a0000006}\x{a0000008}\x{a000000b} # -------------- # EXTENDED CHARACTER CLASSES (UTS#18) # META_BIGVALUE tests /\x{80000000}/B ------------------------------------------------------------------ Bra \x{80000000} Ket End ------------------------------------------------------------------ \x{80000000} 0: \x{80000000} \= Expect no match \x{7fffffff} No match \x{80000001} No match /[\x{80000000}-\x{8000000f}\x{8fffffff}]/B ------------------------------------------------------------------ Bra [\x{80000000}-\x{8000000f}\x{8fffffff}] Ket End ------------------------------------------------------------------ \x{80000002} 0: \x{80000002} \x{8fffffff} 0: \x{8fffffff} \= Expect no match \x{7fffffff} No match \x{90000000} No match /\x{80000000}/B,alt_extended_class ------------------------------------------------------------------ Bra \x{80000000} Ket End ------------------------------------------------------------------ \x{80000000} 0: \x{80000000} \= Expect no match \x{7fffffff} No match \x{80000001} No match /[\x{80000000}-\x{8000000f}\x{8fffffff}]/B,alt_extended_class ------------------------------------------------------------------ Bra [\x{80000000}-\x{8000000f}\x{8fffffff}] Ket End ------------------------------------------------------------------ \x{80000002} 0: \x{80000002} \x{8fffffff} 0: \x{8fffffff} \= Expect no match \x{7fffffff} No match \x{90000000} No match /[\x{80000000}-\x{8000000f}--\x{80000002}]/B,alt_extended_class ------------------------------------------------------------------ Bra eclass[ no bitmap xclass: [\x{80000000}-\x{8000000f}] xclass: [^\x{80000002}] AND ] Ket End ------------------------------------------------------------------ \x{80000001} 0: \x{80000001} \x{80000003} 0: \x{80000003} \= Expect no match \x{80000002} No match /[[\x{80000000}-\x{8000000f}]--[\x{80000002}]]/B,alt_extended_class ------------------------------------------------------------------ Bra eclass[ no bitmap xclass: [\x{80000000}-\x{8000000f}] xclass: [^\x{80000002}] AND ] Ket End ------------------------------------------------------------------ \x{80000001} 0: \x{80000001} \x{80000003} 0: \x{80000003} \= Expect no match \x{80000002} No match # -------------- # EXTENDED CHARACTER CLASSES (Perl) # META_BIGVALUE tests /(?[[\x{80000000}-\x{8000000f}]+\x{8fffffff}])/B ------------------------------------------------------------------ Bra eclass[ no bitmap xclass: [\x{80000000}-\x{8000000f}] xclass: [\x{8fffffff}] OR ] Ket End ------------------------------------------------------------------ \x{80000002} 0: \x{80000002} \x{8fffffff} 0: \x{8fffffff} \= Expect no match \x{7fffffff} No match \x{90000000} No match /(?[[\x{80000000}-\x{8000000f}]-\x{80000002}])/B ------------------------------------------------------------------ Bra eclass[ no bitmap xclass: [\x{80000000}-\x{8000000f}] xclass: [^\x{80000002}] AND ] Ket End ------------------------------------------------------------------ \x{80000001} 0: \x{80000001} \x{80000003} 0: \x{80000003} \= Expect no match \x{80000002} No match /(?[[\x{80000000}-\x{8000000f}]-\x{80000002}])/B ------------------------------------------------------------------ Bra eclass[ no bitmap xclass: [\x{80000000}-\x{8000000f}] xclass: [^\x{80000002}] AND ] Ket End ------------------------------------------------------------------ \x{80000001} 0: \x{80000001} \x{80000003} 0: \x{80000003} \= Expect no match \x{80000002} No match # -------------- # End of testinput11