# This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF) # features that are not compatible with the 8-bit library, or which give # different output in 16-bit or 32-bit mode. The output for the two widths is # different, so they have separate output files. #forbid_utf #newline_default LF ANY ANYCRLF /[^\x{c4}]/IB ------------------------------------------------------------------ Bra [^\x{c4}] (not) Ket End ------------------------------------------------------------------ Capture group count = 0 Subject length lower bound = 1 /\x{100}/I Capture group count = 0 First code unit = \x{100} Subject length lower bound = 1 / (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* # optional leading comment (?: (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | " (?: # opening quote... [^\\\x80-\xff\n\015"] # Anything except backslash and quote | # or \\ [^\x80-\xff] # Escaped something (something != CR) )* " # closing quote ) # initial word (?: (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* \. (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | " (?: # opening quote... [^\\\x80-\xff\n\015"] # Anything except backslash and quote | # or \\ [^\x80-\xff] # Escaped something (something != CR) )* " # closing quote ) )* # further okay, if led by a period (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* @ (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | \[ # [ (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff \] # ] ) # initial subdomain (?: # (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* \. # if led by a period... (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | \[ # [ (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff \] # ] ) # ...further okay )* # address | # or (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | " (?: # opening quote... [^\\\x80-\xff\n\015"] # Anything except backslash and quote | # or \\ [^\x80-\xff] # Escaped something (something != CR) )* " # closing quote ) # one word, optionally followed by.... (?: [^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or... \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) | # comments, or... " (?: # opening quote... [^\\\x80-\xff\n\015"] # Anything except backslash and quote | # or \\ [^\x80-\xff] # Escaped something (something != CR) )* " # closing quote # quoted strings )* < (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* # leading < (?: @ (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | \[ # [ (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff \] # ] ) # initial subdomain (?: # (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* \. # if led by a period... (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | \[ # [ (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff \] # ] ) # ...further okay )* (?: (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* , (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* @ (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | \[ # [ (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff \] # ] ) # initial subdomain (?: # (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* \. # if led by a period... (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | \[ # [ (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff \] # ] ) # ...further okay )* )* # further okay, if led by comma : # closing colon (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* )? # optional route (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | " (?: # opening quote... [^\\\x80-\xff\n\015"] # Anything except backslash and quote | # or \\ [^\x80-\xff] # Escaped something (something != CR) )* " # closing quote ) # initial word (?: (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* \. (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | " (?: # opening quote... [^\\\x80-\xff\n\015"] # Anything except backslash and quote | # or \\ [^\x80-\xff] # Escaped something (something != CR) )* " # closing quote ) )* # further okay, if led by a period (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* @ (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | \[ # [ (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff \] # ] ) # initial subdomain (?: # (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* \. # if led by a period... (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* (?: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom | \[ # [ (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff \] # ] ) # ...further okay )* # address spec (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* > # trailing > # name and address ) (?: [\040\t] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* \) )* # optional trailing comment /Ix Capture group count = 0 Contains explicit CR or LF match Options: extended Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8 9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xff Subject length lower bound = 3 /[\h]/B ------------------------------------------------------------------ Bra [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}] Ket End ------------------------------------------------------------------ >\x09< 0: \x09 /[\h]+/B ------------------------------------------------------------------ Bra [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]++ Ket End ------------------------------------------------------------------ >\x09\x20\xa0< 0: \x09 \xa0 /[\v]/B ------------------------------------------------------------------ Bra [\x0a-\x0d\x85\x{2028}-\x{2029}] Ket End ------------------------------------------------------------------ /[^\h]/B ------------------------------------------------------------------ Bra [^\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}] Ket End ------------------------------------------------------------------ /\h+/I Capture group count = 0 Starting code units: \x09 \x20 \xa0 \xff Subject length lower bound = 1 \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} 0: \x{1680}\x{2000}\x{202f}\x{3000} \x{3001}\x{2fff}\x{200a}\xa0\x{2000} 0: \x{200a}\xa0\x{2000} /[\h\x{dc00}]+/IB ------------------------------------------------------------------ Bra [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{dc00}]++ Ket End ------------------------------------------------------------------ Capture group count = 0 Starting code units: \x09 \x20 \xa0 \xff Subject length lower bound = 1 \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} 0: \x{1680}\x{2000}\x{202f}\x{3000} \x{3001}\x{2fff}\x{200a}\xa0\x{2000} 0: \x{200a}\xa0\x{2000} /\H+/I Capture group count = 0 Subject length lower bound = 1 \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} 0: \x{167f}\x{1681}\x{180d}\x{180f} \x{2000}\x{200a}\x{1fff}\x{200b} 0: \x{1fff}\x{200b} \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} 0: \x{202e}\x{2030}\x{205e}\x{2060} \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001} 0: \x9f\xa1\x{2fff}\x{3001} /[\H\x{d800}]+/ \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} 0: \x{167f}\x{1681}\x{180d}\x{180f} \x{2000}\x{200a}\x{1fff}\x{200b} 0: \x{1fff}\x{200b} \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} 0: \x{202e}\x{2030}\x{205e}\x{2060} \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001} 0: \x9f\xa1\x{2fff}\x{3001} /\v+/I Capture group count = 0 Starting code units: \x0a \x0b \x0c \x0d \x85 \xff Subject length lower bound = 1 \x{2027}\x{2030}\x{2028}\x{2029} 0: \x{2028}\x{2029} \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d 0: \x85\x0a\x0b\x0c\x0d /[\v\x{dc00}]+/IB ------------------------------------------------------------------ Bra [\x0a-\x0d\x85\x{2028}-\x{2029}\x{dc00}]++ Ket End ------------------------------------------------------------------ Capture group count = 0 Starting code units: \x0a \x0b \x0c \x0d \x85 \xff Subject length lower bound = 1 \x{2027}\x{2030}\x{2028}\x{2029} 0: \x{2028}\x{2029} \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d 0: \x85\x0a\x0b\x0c\x0d /\V+/I Capture group count = 0 Subject length lower bound = 1 \x{2028}\x{2029}\x{2027}\x{2030} 0: \x{2027}\x{2030} \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86 0: \x09\x0e\x84\x86 /[\V\x{d800}]+/ \x{2028}\x{2029}\x{2027}\x{2030} 0: \x{2027}\x{2030} \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86 0: \x09\x0e\x84\x86 /\R+/I,bsr=unicode Capture group count = 0 \R matches any Unicode newline Starting code units: \x0a \x0b \x0c \x0d \x85 \xff Subject length lower bound = 1 \x{2027}\x{2030}\x{2028}\x{2029} 0: \x{2028}\x{2029} \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d 0: \x85\x0a\x0b\x0c\x0d /\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I Capture group count = 0 First code unit = \x{d800} Last code unit = \x{dd00} Subject length lower bound = 6 \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00} 0: \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00} /[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B ------------------------------------------------------------------ Bra [^\x{80}] (not) [^\x{ff}] (not) [^\x{100}] (not) [^\x{1000}] (not) [^\x{ffff}] (not) Ket End ------------------------------------------------------------------ /[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi ------------------------------------------------------------------ Bra /i [^\x{80}] (not) /i [^\x{ff}] (not) /i [^\x{100}] (not) /i [^\x{1000}] (not) /i [^\x{ffff}] (not) Ket End ------------------------------------------------------------------ /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B ------------------------------------------------------------------ Bra [^\x{100}]* (not) [^\x{1000}]+ (not) [^\x{ffff}]?? (not) [^\x{8000}]{4} (not) [^\x{8000}]* (not) [^\x{7fff}]{2} (not) [^\x{7fff}]{0,7}? (not) [^\x{100}]{5} (not) [^\x{100}]?+ (not) Ket End ------------------------------------------------------------------ /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi ------------------------------------------------------------------ Bra /i [^\x{100}]* (not) /i [^\x{1000}]+ (not) /i [^\x{ffff}]?? (not) /i [^\x{8000}]{4} (not) /i [^\x{8000}]* (not) /i [^\x{7fff}]{2} (not) /i [^\x{7fff}]{0,7}? (not) /i [^\x{100}]{5} (not) /i [^\x{100}]?+ (not) Ket End ------------------------------------------------------------------ /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark XX 0: XX MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark XX 0: XX MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE /\u0100/B,alt_bsux,allow_empty_class,match_unset_backref ------------------------------------------------------------------ Bra \x{100} Ket End ------------------------------------------------------------------ /[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref ------------------------------------------------------------------ Bra [\x{100}-\x{200}] Ket End ------------------------------------------------------------------ /\ud800/B,alt_bsux,allow_empty_class,match_unset_backref ------------------------------------------------------------------ Bra \x{d800} Ket End ------------------------------------------------------------------ /^\x{ffff}+/i \x{ffff} 0: \x{ffff} /^\x{ffff}?/i \x{ffff} 0: \x{ffff} /^\x{ffff}*/i \x{ffff} 0: \x{ffff} /^\x{ffff}{3}/i \x{ffff}\x{ffff}\x{ffff} 0: \x{ffff}\x{ffff}\x{ffff} /^\x{ffff}{0,3}/i \x{ffff} 0: \x{ffff} /[^\x00-a]{12,}[^b-\xff]*/B ------------------------------------------------------------------ Bra [^\x00-a]{12,} [^b-\xff]*+ Ket End ------------------------------------------------------------------ /[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B ------------------------------------------------------------------ Bra [^\x09-\x0d ]* \s* [0-9A-Z_a-z]++ \W+ [^0-9]*? \d 0 [^0-9A-Z_a-z]{4,6}? \w* A Ket End ------------------------------------------------------------------ /a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B ------------------------------------------------------------------ Bra a* [b-\xff\x{100}-\x{200}]?+ a# a*+ [b-\xff\x{100}-\x{200}]? b# [a-f]*+ [g-\xff\x{100}-\x{200}]*+ # [g-\xff\x{100}-\x{200}]*+ [a-c]*+ # [g-\xff\x{100}-\x{200}]* [a-h]*+ Ket End ------------------------------------------------------------------ /^[\x{1234}\x{4321}]{2,4}?/ \x{1234}\x{1234}\x{1234} 0: \x{1234}\x{1234} # Check maximum non-UTF character size for the 16-bit library. /\x{ffff}/ A\x{ffff}B 0: \x{ffff} /\x{10000}/ Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large /\o{20000}/ # Check maximum character size for the 32-bit library. These will all give # errors in the 16-bit library. /\x{110000}/ Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large /\x{7fffffff}/ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large /\x{80000000}/ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large /\x{ffffffff}/ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large /\x{100000000}/ Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large /\o{17777777777}/ Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large /\o{20000000000}/ Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large /\o{37777777777}/ Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large /\o{40000000000}/ Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large /\x{7fffffff}\x{7fffffff}/I Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large /\x{80000000}\x{80000000}/I Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large /\x{ffffffff}\x{ffffffff}/I Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large # Non-UTF characters /.{2,3}/ \x{400000}\x{400001}\x{400002}\x{400003} ** Character \x{400000} is greater than 0xffff and UTF-16 mode is not enabled. ** Truncation will probably give the wrong result. ** Character \x{400001} is greater than 0xffff and UTF-16 mode is not enabled. ** Truncation will probably give the wrong result. ** Character \x{400002} is greater than 0xffff and UTF-16 mode is not enabled. ** Truncation will probably give the wrong result. ** Character \x{400003} is greater than 0xffff and UTF-16 mode is not enabled. ** Truncation will probably give the wrong result. 0: \x00\x01\x02 /\x{400000}\x{800000}/IBi Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large # Check character ranges /[\H]/IB ------------------------------------------------------------------ Bra [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}] Ket End ------------------------------------------------------------------ Capture group count = 0 Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /[\V]/IB ------------------------------------------------------------------ Bra [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}] Ket End ------------------------------------------------------------------ Capture group count = 0 Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /(*THEN:\[A]{65501})/expand # We can use pcre2test's utf8_input modifier to create wide pattern characters, # even though this test is run when UTF is not supported. /a\x{d800}b/utf8_input aí €b 0: a\x{d800}b a\x{d800}b 0: a\x{d800}b a\o{154000}b 0: a\x{d800}b \= Expect warning unless 32bit a\N{U+d800}b ** Warning: character \N{U+d800} is a surrogate and should not be encoded as UTF-16 0: a\x{d800}b /a\x{ffff}b/utf8_input aï¿¿b 0: a\x{ffff}b a\x{ffff}b 0: a\x{ffff}b a\o{177777}b 0: a\x{ffff}b a\N{U+ffff}b 0: a\x{ffff}b /abý¿¿¿¿¿z/utf8_input ** Failed: character value greater than 0xffff cannot be converted to 16-bit in non-UTF mode abý¿¿¿¿¿z ab\x{7fffffff}z ab\o{17777777777}z ab\N{U+7fffffff}z /abÿý¿¿¿¿¿z/utf8_input ** Failed: invalid UTF-8 string cannot be converted to 16-bit string abÿý¿¿¿¿¿z ab\x{ffffffff}z /abÿAz/utf8_input ** Failed: invalid UTF-8 string cannot be converted to 16-bit string abÿAz ab\x{80000041}z \= Expect no match abAz aAz ab\377Az ab\xff\N{U+0041}z ab\N{U+ff}\N{U+41}z /ab\x{80000041}z/ Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large ab\x{80000041}z /(?i:A{1,}\6666666666)/ A\x{1b6}6666666 0: A\x{1b6}6666666 /abc/substitute_extended,replace=>\777< abc 1: >\x{1ff}< /abc/substitute_extended,replace=>\o{012345}< abc 1: >\x{14e5}< # Character range merging tests /[\x{100}-\x{200}\H\x{8000}-\x{9000}]/B ------------------------------------------------------------------ Bra [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}] Ket End ------------------------------------------------------------------ /[\x{100}-\x{200}\V\x{8000}-\x{9000}]/B ------------------------------------------------------------------ Bra [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}] Ket End ------------------------------------------------------------------ /[\x00-\x{6000}\x{3000}-\x{ffff}]#[\x00-\x{6000}\x{3000}-\x{ffff}]{5,7}?/B ------------------------------------------------------------------ Bra AllAny # AllAny{5} AllAny{0,2}? Ket End ------------------------------------------------------------------ /[\x00-\x{6000}\x{3000}-\x{ffffffff}]#[\x00-\x{6000}\x{3000}-\x{ffffffff}]{5,7}?/B Failed: error 134 at offset 34: character code point value in \x{} or \o{} is too large /[\x00-\x2f\x11-\xff]*?!/B ------------------------------------------------------------------ Bra [\x00-\xff]*? ! Ket End ------------------------------------------------------------------ abcd!e 0: abcd! /i/turkish_casing Failed: error 204 at offset 0: PCRE2_EXTRA_TURKISH_CASING require Unicode (UTF or UCP) mode # Character list tests /([\x{100}-\x{7fff}\x{9000}\x{9002}\x{9004}\x{9006}\x{9008}\x{10000}-\x{7fffffff}]{3,8}?).#/B Failed: error 134 at offset 66: character code point value in \x{} or \o{} is too large \x{9001}\x{9007}\x{8000}\x{ffff}\x{9002}\x{7fff}\x{10000}\x{7fffffff}\x{500000}\x{9006}# /([\x{3000}\x{3001}\x{3003}\x{3004}\x{3006}\x{3007}\x{8000}-\x{ffff}\x{100001}\x{100002}\x{100004}\x{100005}\x{100007}\x{100008}\x{10000a}\x{10000b}\x{80000000}-\x{ffffffff}]{5,}).#/B Failed: error 134 at offset 76: character code point value in \x{} or \o{} is too large \x{2fff}\x{3002}\x{7fff}\x{100000}\x{7fffffff}\x{3000}\x{3007}\x{8000}\x{ffff}\x{100001}\x{10000b}\x{80000000}\x{ffffffff}\x{3000}# /([^\x{4000}\x{4002}\x{4004}\x{4005}\x{4007}\x{4009}\x{400a}\x{f000}\x{f002}\x{f004}\x{f005}\x{f007}\x{f009}\x{f00a}\x{100000}\x{100002}\x{100004}\x{100005}\x{100007}\x{100009}\x{10000a}\x{a0000000}\x{a0000002}\x{a0000004}\x{a0000005}\x{a0000007}\x{a0000009}\x{a000000a}]+).#/B Failed: error 134 at offset 124: character code point value in \x{} or \o{} is too large \x{4000}\x{4002}\x{4004}\x{4005}\x{4007}\x{4009}\x{400a}\x{3fff}\x{4001}\x{4003}\x{4006}\x{4008}\x{400b}\x{100}# \x{f000}\x{f002}\x{f004}\x{f005}\x{f007}\x{f009}\x{f00a}\x{efff}\x{f001}\x{f003}\x{f006}\x{f008}\x{f00b}\x{100}# \x{100000}\x{100002}\x{100004}\x{100005}\x{100007}\x{100009}\x{10000a}\x{fffff}\x{100001}\x{100003}\x{100006}\x{100008}\x{10000b}\x{100}# \x{a0000000}\x{a0000002}\x{a0000004}\x{a0000005}\x{a0000007}\x{a0000009}\x{a000000a}\x{9fffffff}\x{a0000001}\x{a0000003}\x{a0000006}\x{a0000008}\x{a000000b}\x{100}# # -------------- # EXTENDED CHARACTER CLASSES (UTS#18) # META_BIGVALUE tests /\x{80000000}/B Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large \x{80000000} \= Expect no match \x{7fffffff} \x{80000001} /[\x{80000000}-\x{8000000f}\x{8fffffff}]/B Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large \x{80000002} \x{8fffffff} \= Expect no match \x{7fffffff} \x{90000000} /\x{80000000}/B,alt_extended_class Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large \x{80000000} \= Expect no match \x{7fffffff} \x{80000001} /[\x{80000000}-\x{8000000f}\x{8fffffff}]/B,alt_extended_class Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large \x{80000002} \x{8fffffff} \= Expect no match \x{7fffffff} \x{90000000} /[\x{80000000}-\x{8000000f}--\x{80000002}]/B,alt_extended_class Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large \x{80000001} \x{80000003} \= Expect no match \x{80000002} /[[\x{80000000}-\x{8000000f}]--[\x{80000002}]]/B,alt_extended_class Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large \x{80000001} \x{80000003} \= Expect no match \x{80000002} # -------------- # EXTENDED CHARACTER CLASSES (Perl) # META_BIGVALUE tests /(?[[\x{80000000}-\x{8000000f}]+\x{8fffffff}])/B Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large \x{80000002} \x{8fffffff} \= Expect no match \x{7fffffff} \x{90000000} /(?[[\x{80000000}-\x{8000000f}]-\x{80000002}])/B Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large \x{80000001} \x{80000003} \= Expect no match \x{80000002} /(?[[\x{80000000}-\x{8000000f}]-\x{80000002}])/B Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large \x{80000001} \x{80000003} \= Expect no match \x{80000002} # -------------- # End of testinput11