Compare commits
3 Commits
8d6ae976a3
...
pimap
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e85b11a233 | ||
|
|
831adf3fc9 | ||
|
|
a18f461ce3 |
224
.clang-format
224
.clang-format
@@ -1,224 +0,0 @@
|
||||
---
|
||||
Language: Cpp
|
||||
AccessModifierOffset: -4
|
||||
AlignAfterOpenBracket: Align
|
||||
AlignArrayOfStructures: Left
|
||||
AlignConsecutiveAssignments:
|
||||
Enabled: true
|
||||
AcrossEmptyLines: true
|
||||
AcrossComments: true
|
||||
AlignCompound: false
|
||||
PadOperators: true
|
||||
AlignConsecutiveBitFields:
|
||||
Enabled: true
|
||||
AcrossEmptyLines: false
|
||||
AcrossComments: true
|
||||
AlignCompound: false
|
||||
PadOperators: true
|
||||
AlignConsecutiveDeclarations:
|
||||
Enabled: false
|
||||
AcrossEmptyLines: false
|
||||
AcrossComments: false
|
||||
AlignCompound: false
|
||||
PadOperators: false
|
||||
AlignConsecutiveMacros:
|
||||
Enabled: true
|
||||
AcrossEmptyLines: true
|
||||
AcrossComments: true
|
||||
AlignCompound: false
|
||||
PadOperators: true
|
||||
AlignEscapedNewlines: Left
|
||||
AlignOperands: Align
|
||||
AlignTrailingComments: true
|
||||
AllowAllArgumentsOnNextLine: false
|
||||
AllowAllParametersOfDeclarationOnNextLine: false
|
||||
AllowShortEnumsOnASingleLine: false
|
||||
AllowShortBlocksOnASingleLine: Empty
|
||||
AllowShortCaseLabelsOnASingleLine: true
|
||||
AllowShortFunctionsOnASingleLine: Inline
|
||||
AllowShortLambdasOnASingleLine: All
|
||||
AllowShortIfStatementsOnASingleLine: WithoutElse
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
AlwaysBreakAfterDefinitionReturnType: None
|
||||
AlwaysBreakAfterReturnType: None
|
||||
AlwaysBreakBeforeMultilineStrings: false
|
||||
AlwaysBreakTemplateDeclarations: Yes
|
||||
AttributeMacros:
|
||||
- __capability
|
||||
BinPackArguments: false
|
||||
BinPackParameters: false
|
||||
BraceWrapping:
|
||||
AfterCaseLabel: false
|
||||
AfterClass: false
|
||||
AfterControlStatement: Never
|
||||
AfterEnum: false
|
||||
AfterFunction: false
|
||||
AfterNamespace: false
|
||||
AfterObjCDeclaration: false
|
||||
AfterStruct: false
|
||||
AfterUnion: false
|
||||
AfterExternBlock: false
|
||||
BeforeCatch: false
|
||||
BeforeElse: false
|
||||
BeforeLambdaBody: false
|
||||
BeforeWhile: false
|
||||
IndentBraces: false
|
||||
SplitEmptyFunction: false
|
||||
SplitEmptyRecord: false
|
||||
SplitEmptyNamespace: true
|
||||
BreakBeforeBinaryOperators: None
|
||||
BreakBeforeConceptDeclarations: Always
|
||||
BreakBeforeBraces: Attach
|
||||
BreakInheritanceList: BeforeComma
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakConstructorInitializers: BeforeComma
|
||||
BreakAfterJavaFieldAnnotations: false
|
||||
BreakStringLiterals: true
|
||||
ColumnLimit: 140
|
||||
CommentPragmas: '^ IWYU pragma:'
|
||||
QualifierAlignment: Leave
|
||||
CompactNamespaces: false
|
||||
ConstructorInitializerIndentWidth: 4
|
||||
ContinuationIndentWidth: 4
|
||||
Cpp11BracedListStyle: true
|
||||
DeriveLineEnding: false
|
||||
DerivePointerAlignment: false
|
||||
DisableFormat: false
|
||||
EmptyLineAfterAccessModifier: Never
|
||||
EmptyLineBeforeAccessModifier: Always
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
PackConstructorInitializers: CurrentLine
|
||||
BasedOnStyle: ''
|
||||
ConstructorInitializerAllOnOneLineOrOnePerLine: true
|
||||
AllowAllConstructorInitializersOnNextLine: true
|
||||
FixNamespaceComments: true
|
||||
ForEachMacros:
|
||||
- foreach
|
||||
- Q_FOREACH
|
||||
- BOOST_FOREACH
|
||||
- piForeach
|
||||
- piForeachC
|
||||
- piForeachR
|
||||
- piForeachRC
|
||||
- piForeachCR
|
||||
IfMacros:
|
||||
- KJ_IF_MAYBE
|
||||
IncludeBlocks: Regroup
|
||||
IncludeCategories:
|
||||
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
|
||||
Priority: 2
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '^(<|"(gtest|gmock|isl|json)/)'
|
||||
Priority: 3
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '.*'
|
||||
Priority: 1
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
IncludeIsMainRegex: '(Test)?$'
|
||||
IncludeIsMainSourceRegex: ''
|
||||
IndentAccessModifiers: false
|
||||
IndentCaseLabels: false
|
||||
IndentCaseBlocks: false
|
||||
IndentGotoLabels: false
|
||||
IndentPPDirectives: AfterHash
|
||||
IndentExternBlock: NoIndent
|
||||
IndentRequiresClause: true
|
||||
IndentWidth: 4
|
||||
IndentWrappedFunctionNames: false
|
||||
InsertBraces: false
|
||||
InsertTrailingCommas: Wrapped
|
||||
JavaScriptQuotes: Leave
|
||||
JavaScriptWrapImports: true
|
||||
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||
LambdaBodyIndentation: Signature
|
||||
MacroBlockBegin: "PRIVATE_DEFINITION_START|STATIC_INITIALIZER_BEGIN|DECLARE_UNIT_CLASS_BEGIN"
|
||||
MacroBlockEnd: "PRIVATE_DEFINITION_END|PRIVATE_DEFINITION_END_NO_INITIALIZE|STATIC_INITIALIZER_END|DECLARE_UNIT_CLASS_END"
|
||||
MaxEmptyLinesToKeep: 2
|
||||
NamespaceIndentation: None
|
||||
ObjCBinPackProtocolList: Auto
|
||||
ObjCBlockIndentWidth: 2
|
||||
ObjCBreakBeforeNestedBlockParam: true
|
||||
ObjCSpaceAfterProperty: false
|
||||
ObjCSpaceBeforeProtocolList: true
|
||||
PenaltyBreakAssignment: 2
|
||||
PenaltyBreakBeforeFirstCallParameter: 19
|
||||
PenaltyBreakComment: 300
|
||||
PenaltyBreakFirstLessLess: 120
|
||||
PenaltyBreakOpenParenthesis: 0
|
||||
PenaltyBreakString: 1000
|
||||
PenaltyBreakTemplateDeclaration: 10
|
||||
PenaltyExcessCharacter: 1000000
|
||||
PenaltyReturnTypeOnItsOwnLine: 60
|
||||
PenaltyIndentedWhitespace: 0
|
||||
PointerAlignment: Middle
|
||||
PPIndentWidth: 2
|
||||
ReferenceAlignment: Middle
|
||||
ReflowComments: true
|
||||
RemoveBracesLLVM: false
|
||||
RequiresClausePosition: OwnLine
|
||||
SeparateDefinitionBlocks: Leave
|
||||
ShortNamespaceLines: 1
|
||||
SortIncludes: CaseSensitive
|
||||
SortJavaStaticImport: Before
|
||||
SortUsingDeclarations: true
|
||||
SpaceAfterCStyleCast: false
|
||||
SpaceAfterLogicalNot: false
|
||||
SpaceAfterTemplateKeyword: false
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
SpaceBeforeCaseColon: false
|
||||
SpaceBeforeCpp11BracedList: false
|
||||
SpaceBeforeCtorInitializerColon: false
|
||||
SpaceBeforeInheritanceColon: false
|
||||
SpaceBeforeParens: ControlStatementsExceptControlMacros
|
||||
SpaceBeforeParensOptions:
|
||||
AfterControlStatements: true
|
||||
AfterForeachMacros: false
|
||||
AfterFunctionDefinitionName: false
|
||||
AfterFunctionDeclarationName: false
|
||||
AfterIfMacros: false
|
||||
AfterOverloadedOperator: false
|
||||
AfterRequiresInClause: false
|
||||
AfterRequiresInExpression: false
|
||||
BeforeNonEmptyParentheses: false
|
||||
SpaceAroundPointerQualifiers: Both
|
||||
SpaceBeforeRangeBasedForLoopColon: false
|
||||
SpaceInEmptyBlock: false
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesBeforeTrailingComments: 1
|
||||
SpacesInAngles: Never
|
||||
SpacesInConditionalStatement: false
|
||||
SpacesInContainerLiterals: false
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInLineCommentPrefix:
|
||||
Minimum: 1
|
||||
Maximum: -1
|
||||
SpacesInParentheses: false
|
||||
SpacesInSquareBrackets: false
|
||||
SpaceBeforeSquareBrackets: false
|
||||
BitFieldColonSpacing: After
|
||||
Standard: c++11
|
||||
StatementAttributeLikeMacros:
|
||||
- Q_EMIT
|
||||
- PIMETA
|
||||
StatementMacros:
|
||||
- Q_UNUSED
|
||||
- QT_REQUIRE_VERSION
|
||||
- PRIVATE_DECLARATION
|
||||
- NO_COPY_CLASS
|
||||
- FOREVER_WAIT
|
||||
- WAIT_FOREVER
|
||||
TabWidth: 4
|
||||
UseCRLF: false
|
||||
UseTab: AlignWithSpaces
|
||||
WhitespaceSensitiveMacros:
|
||||
- STRINGIZE
|
||||
- PP_STRINGIZE
|
||||
- BOOST_PP_STRINGIZE
|
||||
- NS_SWIFT_NAME
|
||||
- CF_SWIFT_NAME
|
||||
- PIMETA
|
||||
...
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
root = true
|
||||
|
||||
[*.{h,c,cpp}]
|
||||
charset = utf-8
|
||||
indent_style = tab
|
||||
tab_width = 4
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -3,6 +3,4 @@
|
||||
/doc/rtf
|
||||
_unsused
|
||||
CMakeLists.txt.user*
|
||||
/include
|
||||
/release
|
||||
/build*
|
||||
/include
|
||||
@@ -1,160 +0,0 @@
|
||||
/*
|
||||
BLAKE2 reference source code package - reference C implementations
|
||||
|
||||
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
|
||||
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
|
||||
your option. The terms of these licenses can be found at:
|
||||
|
||||
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
- OpenSSL license : https://www.openssl.org/source/license.html
|
||||
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
More information about the BLAKE2 hash function can be found at
|
||||
https://blake2.net.
|
||||
*/
|
||||
#ifndef BLAKE2_IMPL_H
|
||||
#define BLAKE2_IMPL_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L)
|
||||
#if defined(_MSC_VER)
|
||||
#define BLAKE2_INLINE __inline
|
||||
#elif defined(__GNUC__)
|
||||
#define BLAKE2_INLINE __inline__
|
||||
#else
|
||||
#define BLAKE2_INLINE
|
||||
#endif
|
||||
#else
|
||||
#define BLAKE2_INLINE inline
|
||||
#endif
|
||||
|
||||
static BLAKE2_INLINE uint32_t load32( const void *src )
|
||||
{
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
uint32_t w;
|
||||
memcpy(&w, src, sizeof w);
|
||||
return w;
|
||||
#else
|
||||
const uint8_t *p = ( const uint8_t * )src;
|
||||
return (( uint32_t )( p[0] ) << 0) |
|
||||
(( uint32_t )( p[1] ) << 8) |
|
||||
(( uint32_t )( p[2] ) << 16) |
|
||||
(( uint32_t )( p[3] ) << 24) ;
|
||||
#endif
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE uint64_t load64( const void *src )
|
||||
{
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
uint64_t w;
|
||||
memcpy(&w, src, sizeof w);
|
||||
return w;
|
||||
#else
|
||||
const uint8_t *p = ( const uint8_t * )src;
|
||||
return (( uint64_t )( p[0] ) << 0) |
|
||||
(( uint64_t )( p[1] ) << 8) |
|
||||
(( uint64_t )( p[2] ) << 16) |
|
||||
(( uint64_t )( p[3] ) << 24) |
|
||||
(( uint64_t )( p[4] ) << 32) |
|
||||
(( uint64_t )( p[5] ) << 40) |
|
||||
(( uint64_t )( p[6] ) << 48) |
|
||||
(( uint64_t )( p[7] ) << 56) ;
|
||||
#endif
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE uint16_t load16( const void *src )
|
||||
{
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
uint16_t w;
|
||||
memcpy(&w, src, sizeof w);
|
||||
return w;
|
||||
#else
|
||||
const uint8_t *p = ( const uint8_t * )src;
|
||||
return ( uint16_t )((( uint32_t )( p[0] ) << 0) |
|
||||
(( uint32_t )( p[1] ) << 8));
|
||||
#endif
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void store16( void *dst, uint16_t w )
|
||||
{
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
memcpy(dst, &w, sizeof w);
|
||||
#else
|
||||
uint8_t *p = ( uint8_t * )dst;
|
||||
*p++ = ( uint8_t )w; w >>= 8;
|
||||
*p++ = ( uint8_t )w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void store32( void *dst, uint32_t w )
|
||||
{
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
memcpy(dst, &w, sizeof w);
|
||||
#else
|
||||
uint8_t *p = ( uint8_t * )dst;
|
||||
p[0] = (uint8_t)(w >> 0);
|
||||
p[1] = (uint8_t)(w >> 8);
|
||||
p[2] = (uint8_t)(w >> 16);
|
||||
p[3] = (uint8_t)(w >> 24);
|
||||
#endif
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void store64( void *dst, uint64_t w )
|
||||
{
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
memcpy(dst, &w, sizeof w);
|
||||
#else
|
||||
uint8_t *p = ( uint8_t * )dst;
|
||||
p[0] = (uint8_t)(w >> 0);
|
||||
p[1] = (uint8_t)(w >> 8);
|
||||
p[2] = (uint8_t)(w >> 16);
|
||||
p[3] = (uint8_t)(w >> 24);
|
||||
p[4] = (uint8_t)(w >> 32);
|
||||
p[5] = (uint8_t)(w >> 40);
|
||||
p[6] = (uint8_t)(w >> 48);
|
||||
p[7] = (uint8_t)(w >> 56);
|
||||
#endif
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE uint64_t load48( const void *src )
|
||||
{
|
||||
const uint8_t *p = ( const uint8_t * )src;
|
||||
return (( uint64_t )( p[0] ) << 0) |
|
||||
(( uint64_t )( p[1] ) << 8) |
|
||||
(( uint64_t )( p[2] ) << 16) |
|
||||
(( uint64_t )( p[3] ) << 24) |
|
||||
(( uint64_t )( p[4] ) << 32) |
|
||||
(( uint64_t )( p[5] ) << 40) ;
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void store48( void *dst, uint64_t w )
|
||||
{
|
||||
uint8_t *p = ( uint8_t * )dst;
|
||||
p[0] = (uint8_t)(w >> 0);
|
||||
p[1] = (uint8_t)(w >> 8);
|
||||
p[2] = (uint8_t)(w >> 16);
|
||||
p[3] = (uint8_t)(w >> 24);
|
||||
p[4] = (uint8_t)(w >> 32);
|
||||
p[5] = (uint8_t)(w >> 40);
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c )
|
||||
{
|
||||
return ( w >> c ) | ( w << ( 32 - c ) );
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c )
|
||||
{
|
||||
return ( w >> c ) | ( w << ( 64 - c ) );
|
||||
}
|
||||
|
||||
/* prevents compiler optimizing out memset() */
|
||||
static BLAKE2_INLINE void secure_zero_memory(void *v, size_t n)
|
||||
{
|
||||
static void *(*const volatile memset_v)(void *, int, size_t) = &memset;
|
||||
memset_v(v, 0, n);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,195 +0,0 @@
|
||||
/*
|
||||
BLAKE2 reference source code package - reference C implementations
|
||||
|
||||
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
|
||||
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
|
||||
your option. The terms of these licenses can be found at:
|
||||
|
||||
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
- OpenSSL license : https://www.openssl.org/source/license.html
|
||||
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
More information about the BLAKE2 hash function can be found at
|
||||
https://blake2.net.
|
||||
*/
|
||||
#ifndef BLAKE2_H
|
||||
#define BLAKE2_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define BLAKE2_PACKED(x) __pragma(pack(push, 1)) x __pragma(pack(pop))
|
||||
#else
|
||||
#define BLAKE2_PACKED(x) x __attribute__((packed))
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum blake2s_constant
|
||||
{
|
||||
BLAKE2S_BLOCKBYTES = 64,
|
||||
BLAKE2S_OUTBYTES = 32,
|
||||
BLAKE2S_KEYBYTES = 32,
|
||||
BLAKE2S_SALTBYTES = 8,
|
||||
BLAKE2S_PERSONALBYTES = 8
|
||||
};
|
||||
|
||||
enum blake2b_constant
|
||||
{
|
||||
BLAKE2B_BLOCKBYTES = 128,
|
||||
BLAKE2B_OUTBYTES = 64,
|
||||
BLAKE2B_KEYBYTES = 64,
|
||||
BLAKE2B_SALTBYTES = 16,
|
||||
BLAKE2B_PERSONALBYTES = 16
|
||||
};
|
||||
|
||||
typedef struct blake2s_state__
|
||||
{
|
||||
uint32_t h[8];
|
||||
uint32_t t[2];
|
||||
uint32_t f[2];
|
||||
uint8_t buf[BLAKE2S_BLOCKBYTES];
|
||||
size_t buflen;
|
||||
size_t outlen;
|
||||
uint8_t last_node;
|
||||
} blake2s_state;
|
||||
|
||||
typedef struct blake2b_state__
|
||||
{
|
||||
uint64_t h[8];
|
||||
uint64_t t[2];
|
||||
uint64_t f[2];
|
||||
uint8_t buf[BLAKE2B_BLOCKBYTES];
|
||||
size_t buflen;
|
||||
size_t outlen;
|
||||
uint8_t last_node;
|
||||
} blake2b_state;
|
||||
|
||||
typedef struct blake2sp_state__
|
||||
{
|
||||
blake2s_state S[8][1];
|
||||
blake2s_state R[1];
|
||||
uint8_t buf[8 * BLAKE2S_BLOCKBYTES];
|
||||
size_t buflen;
|
||||
size_t outlen;
|
||||
} blake2sp_state;
|
||||
|
||||
typedef struct blake2bp_state__
|
||||
{
|
||||
blake2b_state S[4][1];
|
||||
blake2b_state R[1];
|
||||
uint8_t buf[4 * BLAKE2B_BLOCKBYTES];
|
||||
size_t buflen;
|
||||
size_t outlen;
|
||||
} blake2bp_state;
|
||||
|
||||
|
||||
BLAKE2_PACKED(struct blake2s_param__
|
||||
{
|
||||
uint8_t digest_length; /* 1 */
|
||||
uint8_t key_length; /* 2 */
|
||||
uint8_t fanout; /* 3 */
|
||||
uint8_t depth; /* 4 */
|
||||
uint32_t leaf_length; /* 8 */
|
||||
uint32_t node_offset; /* 12 */
|
||||
uint16_t xof_length; /* 14 */
|
||||
uint8_t node_depth; /* 15 */
|
||||
uint8_t inner_length; /* 16 */
|
||||
/* uint8_t reserved[0]; */
|
||||
uint8_t salt[BLAKE2S_SALTBYTES]; /* 24 */
|
||||
uint8_t personal[BLAKE2S_PERSONALBYTES]; /* 32 */
|
||||
});
|
||||
|
||||
typedef struct blake2s_param__ blake2s_param;
|
||||
|
||||
BLAKE2_PACKED(struct blake2b_param__
|
||||
{
|
||||
uint8_t digest_length; /* 1 */
|
||||
uint8_t key_length; /* 2 */
|
||||
uint8_t fanout; /* 3 */
|
||||
uint8_t depth; /* 4 */
|
||||
uint32_t leaf_length; /* 8 */
|
||||
uint32_t node_offset; /* 12 */
|
||||
uint32_t xof_length; /* 16 */
|
||||
uint8_t node_depth; /* 17 */
|
||||
uint8_t inner_length; /* 18 */
|
||||
uint8_t reserved[14]; /* 32 */
|
||||
uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
|
||||
uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
|
||||
});
|
||||
|
||||
typedef struct blake2b_param__ blake2b_param;
|
||||
|
||||
typedef struct blake2xs_state__
|
||||
{
|
||||
blake2s_state S[1];
|
||||
blake2s_param P[1];
|
||||
} blake2xs_state;
|
||||
|
||||
typedef struct blake2xb_state__
|
||||
{
|
||||
blake2b_state S[1];
|
||||
blake2b_param P[1];
|
||||
} blake2xb_state;
|
||||
|
||||
/* Padded structs result in a compile-time error */
|
||||
enum {
|
||||
BLAKE2_DUMMY_1 = 1/(int)(sizeof(blake2s_param) == BLAKE2S_OUTBYTES),
|
||||
BLAKE2_DUMMY_2 = 1/(int)(sizeof(blake2b_param) == BLAKE2B_OUTBYTES)
|
||||
};
|
||||
|
||||
/* Streaming API */
|
||||
int blake2s_init( blake2s_state *S, size_t outlen );
|
||||
int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen );
|
||||
int blake2s_init_param( blake2s_state *S, const blake2s_param *P );
|
||||
int blake2s_update( blake2s_state *S, const void *in, size_t inlen );
|
||||
int blake2s_final( blake2s_state *S, void *out, size_t outlen );
|
||||
|
||||
int blake2b_init( blake2b_state *S, size_t outlen );
|
||||
int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
|
||||
int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
|
||||
int blake2b_update( blake2b_state *S, const void *in, size_t inlen );
|
||||
int blake2b_final( blake2b_state *S, void *out, size_t outlen );
|
||||
|
||||
int blake2sp_init( blake2sp_state *S, size_t outlen );
|
||||
int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen );
|
||||
int blake2sp_update( blake2sp_state *S, const void *in, size_t inlen );
|
||||
int blake2sp_final( blake2sp_state *S, void *out, size_t outlen );
|
||||
|
||||
int blake2bp_init( blake2bp_state *S, size_t outlen );
|
||||
int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen );
|
||||
int blake2bp_update( blake2bp_state *S, const void *in, size_t inlen );
|
||||
int blake2bp_final( blake2bp_state *S, void *out, size_t outlen );
|
||||
|
||||
/* Variable output length API */
|
||||
int blake2xs_init( blake2xs_state *S, const size_t outlen );
|
||||
int blake2xs_init_key( blake2xs_state *S, const size_t outlen, const void *key, size_t keylen );
|
||||
int blake2xs_update( blake2xs_state *S, const void *in, size_t inlen );
|
||||
int blake2xs_final(blake2xs_state *S, void *out, size_t outlen);
|
||||
|
||||
int blake2xb_init( blake2xb_state *S, const size_t outlen );
|
||||
int blake2xb_init_key( blake2xb_state *S, const size_t outlen, const void *key, size_t keylen );
|
||||
int blake2xb_update( blake2xb_state *S, const void *in, size_t inlen );
|
||||
int blake2xb_final(blake2xb_state *S, void *out, size_t outlen);
|
||||
|
||||
/* Simple API */
|
||||
int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
|
||||
int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
|
||||
|
||||
int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
|
||||
int blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
|
||||
|
||||
int blake2xs( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
|
||||
int blake2xb( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
|
||||
|
||||
/* This is simply an alias for blake2b */
|
||||
int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,379 +0,0 @@
|
||||
/*
|
||||
BLAKE2 reference source code package - reference C implementations
|
||||
|
||||
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
|
||||
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
|
||||
your option. The terms of these licenses can be found at:
|
||||
|
||||
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
- OpenSSL license : https://www.openssl.org/source/license.html
|
||||
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
More information about the BLAKE2 hash function can be found at
|
||||
https://blake2.net.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "blake2.h"
|
||||
#include "blake2-impl.h"
|
||||
|
||||
static const uint64_t blake2b_IV[8] =
|
||||
{
|
||||
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
||||
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
|
||||
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
|
||||
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
|
||||
};
|
||||
|
||||
static const uint8_t blake2b_sigma[12][16] =
|
||||
{
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
|
||||
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
|
||||
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
|
||||
};
|
||||
|
||||
|
||||
static void blake2b_set_lastnode( blake2b_state *S )
|
||||
{
|
||||
S->f[1] = (uint64_t)-1;
|
||||
}
|
||||
|
||||
/* Some helper functions, not necessarily useful */
|
||||
static int blake2b_is_lastblock( const blake2b_state *S )
|
||||
{
|
||||
return S->f[0] != 0;
|
||||
}
|
||||
|
||||
static void blake2b_set_lastblock( blake2b_state *S )
|
||||
{
|
||||
if( S->last_node ) blake2b_set_lastnode( S );
|
||||
|
||||
S->f[0] = (uint64_t)-1;
|
||||
}
|
||||
|
||||
static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
|
||||
{
|
||||
S->t[0] += inc;
|
||||
S->t[1] += ( S->t[0] < inc );
|
||||
}
|
||||
|
||||
static void blake2b_init0( blake2b_state *S )
|
||||
{
|
||||
size_t i;
|
||||
memset( S, 0, sizeof( blake2b_state ) );
|
||||
|
||||
for( i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i];
|
||||
}
|
||||
|
||||
/* init xors IV with input parameter block */
|
||||
int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
|
||||
{
|
||||
const uint8_t *p = ( const uint8_t * )( P );
|
||||
size_t i;
|
||||
|
||||
blake2b_init0( S );
|
||||
|
||||
/* IV XOR ParamBlock */
|
||||
for( i = 0; i < 8; ++i )
|
||||
S->h[i] ^= load64( p + sizeof( S->h[i] ) * i );
|
||||
|
||||
S->outlen = P->digest_length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int blake2b_init( blake2b_state *S, size_t outlen )
|
||||
{
|
||||
blake2b_param P[1];
|
||||
|
||||
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
|
||||
|
||||
P->digest_length = (uint8_t)outlen;
|
||||
P->key_length = 0;
|
||||
P->fanout = 1;
|
||||
P->depth = 1;
|
||||
store32( &P->leaf_length, 0 );
|
||||
store32( &P->node_offset, 0 );
|
||||
store32( &P->xof_length, 0 );
|
||||
P->node_depth = 0;
|
||||
P->inner_length = 0;
|
||||
memset( P->reserved, 0, sizeof( P->reserved ) );
|
||||
memset( P->salt, 0, sizeof( P->salt ) );
|
||||
memset( P->personal, 0, sizeof( P->personal ) );
|
||||
return blake2b_init_param( S, P );
|
||||
}
|
||||
|
||||
|
||||
int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen )
|
||||
{
|
||||
blake2b_param P[1];
|
||||
|
||||
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
|
||||
|
||||
if ( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1;
|
||||
|
||||
P->digest_length = (uint8_t)outlen;
|
||||
P->key_length = (uint8_t)keylen;
|
||||
P->fanout = 1;
|
||||
P->depth = 1;
|
||||
store32( &P->leaf_length, 0 );
|
||||
store32( &P->node_offset, 0 );
|
||||
store32( &P->xof_length, 0 );
|
||||
P->node_depth = 0;
|
||||
P->inner_length = 0;
|
||||
memset( P->reserved, 0, sizeof( P->reserved ) );
|
||||
memset( P->salt, 0, sizeof( P->salt ) );
|
||||
memset( P->personal, 0, sizeof( P->personal ) );
|
||||
|
||||
if( blake2b_init_param( S, P ) < 0 ) return -1;
|
||||
|
||||
{
|
||||
uint8_t block[BLAKE2B_BLOCKBYTES];
|
||||
memset( block, 0, BLAKE2B_BLOCKBYTES );
|
||||
memcpy( block, key, keylen );
|
||||
blake2b_update( S, block, BLAKE2B_BLOCKBYTES );
|
||||
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define G(r,i,a,b,c,d) \
|
||||
do { \
|
||||
a = a + b + m[blake2b_sigma[r][2*i+0]]; \
|
||||
d = rotr64(d ^ a, 32); \
|
||||
c = c + d; \
|
||||
b = rotr64(b ^ c, 24); \
|
||||
a = a + b + m[blake2b_sigma[r][2*i+1]]; \
|
||||
d = rotr64(d ^ a, 16); \
|
||||
c = c + d; \
|
||||
b = rotr64(b ^ c, 63); \
|
||||
} while(0)
|
||||
|
||||
#define ROUND(r) \
|
||||
do { \
|
||||
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
|
||||
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
|
||||
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
|
||||
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
|
||||
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
|
||||
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
|
||||
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
|
||||
G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
|
||||
} while(0)
|
||||
|
||||
static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
|
||||
{
|
||||
uint64_t m[16];
|
||||
uint64_t v[16];
|
||||
size_t i;
|
||||
|
||||
for( i = 0; i < 16; ++i ) {
|
||||
m[i] = load64( block + i * sizeof( m[i] ) );
|
||||
}
|
||||
|
||||
for( i = 0; i < 8; ++i ) {
|
||||
v[i] = S->h[i];
|
||||
}
|
||||
|
||||
v[ 8] = blake2b_IV[0];
|
||||
v[ 9] = blake2b_IV[1];
|
||||
v[10] = blake2b_IV[2];
|
||||
v[11] = blake2b_IV[3];
|
||||
v[12] = blake2b_IV[4] ^ S->t[0];
|
||||
v[13] = blake2b_IV[5] ^ S->t[1];
|
||||
v[14] = blake2b_IV[6] ^ S->f[0];
|
||||
v[15] = blake2b_IV[7] ^ S->f[1];
|
||||
|
||||
ROUND( 0 );
|
||||
ROUND( 1 );
|
||||
ROUND( 2 );
|
||||
ROUND( 3 );
|
||||
ROUND( 4 );
|
||||
ROUND( 5 );
|
||||
ROUND( 6 );
|
||||
ROUND( 7 );
|
||||
ROUND( 8 );
|
||||
ROUND( 9 );
|
||||
ROUND( 10 );
|
||||
ROUND( 11 );
|
||||
|
||||
for( i = 0; i < 8; ++i ) {
|
||||
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
|
||||
}
|
||||
}
|
||||
|
||||
#undef G
|
||||
#undef ROUND
|
||||
|
||||
int blake2b_update( blake2b_state *S, const void *pin, size_t inlen )
|
||||
{
|
||||
const unsigned char * in = (const unsigned char *)pin;
|
||||
if( inlen > 0 )
|
||||
{
|
||||
size_t left = S->buflen;
|
||||
size_t fill = BLAKE2B_BLOCKBYTES - left;
|
||||
if( inlen > fill )
|
||||
{
|
||||
S->buflen = 0;
|
||||
memcpy( S->buf + left, in, fill ); /* Fill buffer */
|
||||
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
|
||||
blake2b_compress( S, S->buf ); /* Compress */
|
||||
in += fill; inlen -= fill;
|
||||
while(inlen > BLAKE2B_BLOCKBYTES) {
|
||||
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
||||
blake2b_compress( S, in );
|
||||
in += BLAKE2B_BLOCKBYTES;
|
||||
inlen -= BLAKE2B_BLOCKBYTES;
|
||||
}
|
||||
}
|
||||
memcpy( S->buf + S->buflen, in, inlen );
|
||||
S->buflen += inlen;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2b_final( blake2b_state *S, void *out, size_t outlen )
|
||||
{
|
||||
uint8_t buffer[BLAKE2B_OUTBYTES] = {0};
|
||||
size_t i;
|
||||
|
||||
if( out == NULL || outlen < S->outlen )
|
||||
return -1;
|
||||
|
||||
if( blake2b_is_lastblock( S ) )
|
||||
return -1;
|
||||
|
||||
blake2b_increment_counter( S, S->buflen );
|
||||
blake2b_set_lastblock( S );
|
||||
memset( S->buf + S->buflen, 0, BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
|
||||
blake2b_compress( S, S->buf );
|
||||
|
||||
for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
|
||||
store64( buffer + sizeof( S->h[i] ) * i, S->h[i] );
|
||||
|
||||
memcpy( out, buffer, S->outlen );
|
||||
secure_zero_memory(buffer, sizeof(buffer));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* inlen, at least, should be uint64_t. Others can be size_t. */
|
||||
int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
|
||||
{
|
||||
blake2b_state S[1];
|
||||
|
||||
/* Verify parameters */
|
||||
if ( NULL == in && inlen > 0 ) return -1;
|
||||
|
||||
if ( NULL == out ) return -1;
|
||||
|
||||
if( NULL == key && keylen > 0 ) return -1;
|
||||
|
||||
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
|
||||
|
||||
if( keylen > BLAKE2B_KEYBYTES ) return -1;
|
||||
|
||||
if( keylen > 0 )
|
||||
{
|
||||
if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( blake2b_init( S, outlen ) < 0 ) return -1;
|
||||
}
|
||||
|
||||
blake2b_update( S, ( const uint8_t * )in, inlen );
|
||||
blake2b_final( S, out, outlen );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) {
|
||||
return blake2b(out, outlen, in, inlen, key, keylen);
|
||||
}
|
||||
|
||||
#if defined(SUPERCOP)
|
||||
int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
|
||||
{
|
||||
return blake2b( out, BLAKE2B_OUTBYTES, in, inlen, NULL, 0 );
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(BLAKE2B_SELFTEST)
|
||||
#include <string.h>
|
||||
#include "blake2-kat.h"
|
||||
int main( void )
|
||||
{
|
||||
uint8_t key[BLAKE2B_KEYBYTES];
|
||||
uint8_t buf[BLAKE2_KAT_LENGTH];
|
||||
size_t i, step;
|
||||
|
||||
for( i = 0; i < BLAKE2B_KEYBYTES; ++i )
|
||||
key[i] = ( uint8_t )i;
|
||||
|
||||
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
|
||||
buf[i] = ( uint8_t )i;
|
||||
|
||||
/* Test simple API */
|
||||
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
|
||||
{
|
||||
uint8_t hash[BLAKE2B_OUTBYTES];
|
||||
blake2b( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES );
|
||||
|
||||
if( 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) )
|
||||
{
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
/* Test streaming API */
|
||||
for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) {
|
||||
for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
|
||||
uint8_t hash[BLAKE2B_OUTBYTES];
|
||||
blake2b_state S;
|
||||
uint8_t * p = buf;
|
||||
size_t mlen = i;
|
||||
int err = 0;
|
||||
|
||||
if( (err = blake2b_init_key(&S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES)) < 0 ) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
while (mlen >= step) {
|
||||
if ( (err = blake2b_update(&S, p, step)) < 0 ) {
|
||||
goto fail;
|
||||
}
|
||||
mlen -= step;
|
||||
p += step;
|
||||
}
|
||||
if ( (err = blake2b_update(&S, p, mlen)) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
if ( (err = blake2b_final(&S, hash, BLAKE2B_OUTBYTES)) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (0 != memcmp(hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES)) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
puts( "ok" );
|
||||
return 0;
|
||||
fail:
|
||||
puts("error");
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
@@ -1,359 +0,0 @@
|
||||
/*
|
||||
BLAKE2 reference source code package - reference C implementations
|
||||
|
||||
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
|
||||
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
|
||||
your option. The terms of these licenses can be found at:
|
||||
|
||||
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
- OpenSSL license : https://www.openssl.org/source/license.html
|
||||
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
More information about the BLAKE2 hash function can be found at
|
||||
https://blake2.net.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#include "blake2.h"
|
||||
#include "blake2-impl.h"
|
||||
|
||||
#define PARALLELISM_DEGREE 4
|
||||
|
||||
/*
|
||||
blake2b_init_param defaults to setting the expecting output length
|
||||
from the digest_length parameter block field.
|
||||
|
||||
In some cases, however, we do not want this, as the output length
|
||||
of these instances is given by inner_length instead.
|
||||
*/
|
||||
static int blake2bp_init_leaf_param( blake2b_state *S, const blake2b_param *P )
|
||||
{
|
||||
int err = blake2b_init_param(S, P);
|
||||
S->outlen = P->inner_length;
|
||||
return err;
|
||||
}
|
||||
|
||||
static int blake2bp_init_leaf( blake2b_state *S, size_t outlen, size_t keylen, uint64_t offset )
|
||||
{
|
||||
blake2b_param P[1];
|
||||
P->digest_length = (uint8_t)outlen;
|
||||
P->key_length = (uint8_t)keylen;
|
||||
P->fanout = PARALLELISM_DEGREE;
|
||||
P->depth = 2;
|
||||
store32( &P->leaf_length, 0 );
|
||||
store32( &P->node_offset, offset );
|
||||
store32( &P->xof_length, 0 );
|
||||
P->node_depth = 0;
|
||||
P->inner_length = BLAKE2B_OUTBYTES;
|
||||
memset( P->reserved, 0, sizeof( P->reserved ) );
|
||||
memset( P->salt, 0, sizeof( P->salt ) );
|
||||
memset( P->personal, 0, sizeof( P->personal ) );
|
||||
return blake2bp_init_leaf_param( S, P );
|
||||
}
|
||||
|
||||
static int blake2bp_init_root( blake2b_state *S, size_t outlen, size_t keylen )
|
||||
{
|
||||
blake2b_param P[1];
|
||||
P->digest_length = (uint8_t)outlen;
|
||||
P->key_length = (uint8_t)keylen;
|
||||
P->fanout = PARALLELISM_DEGREE;
|
||||
P->depth = 2;
|
||||
store32( &P->leaf_length, 0 );
|
||||
store32( &P->node_offset, 0 );
|
||||
store32( &P->xof_length, 0 );
|
||||
P->node_depth = 1;
|
||||
P->inner_length = BLAKE2B_OUTBYTES;
|
||||
memset( P->reserved, 0, sizeof( P->reserved ) );
|
||||
memset( P->salt, 0, sizeof( P->salt ) );
|
||||
memset( P->personal, 0, sizeof( P->personal ) );
|
||||
return blake2b_init_param( S, P );
|
||||
}
|
||||
|
||||
|
||||
int blake2bp_init( blake2bp_state *S, size_t outlen )
|
||||
{
|
||||
size_t i;
|
||||
|
||||
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
|
||||
|
||||
memset( S->buf, 0, sizeof( S->buf ) );
|
||||
S->buflen = 0;
|
||||
S->outlen = outlen;
|
||||
|
||||
if( blake2bp_init_root( S->R, outlen, 0 ) < 0 )
|
||||
return -1;
|
||||
|
||||
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
if( blake2bp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1;
|
||||
|
||||
S->R->last_node = 1;
|
||||
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen )
|
||||
{
|
||||
size_t i;
|
||||
|
||||
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
|
||||
|
||||
if( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1;
|
||||
|
||||
memset( S->buf, 0, sizeof( S->buf ) );
|
||||
S->buflen = 0;
|
||||
S->outlen = outlen;
|
||||
|
||||
if( blake2bp_init_root( S->R, outlen, keylen ) < 0 )
|
||||
return -1;
|
||||
|
||||
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
if( blake2bp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1;
|
||||
|
||||
S->R->last_node = 1;
|
||||
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
|
||||
{
|
||||
uint8_t block[BLAKE2B_BLOCKBYTES];
|
||||
memset( block, 0, BLAKE2B_BLOCKBYTES );
|
||||
memcpy( block, key, keylen );
|
||||
|
||||
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
blake2b_update( S->S[i], block, BLAKE2B_BLOCKBYTES );
|
||||
|
||||
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int blake2bp_update( blake2bp_state *S, const void *pin, size_t inlen )
|
||||
{
|
||||
const unsigned char * in = (const unsigned char *)pin;
|
||||
size_t left = S->buflen;
|
||||
size_t fill = sizeof( S->buf ) - left;
|
||||
size_t i;
|
||||
|
||||
if( left && inlen >= fill )
|
||||
{
|
||||
memcpy( S->buf + left, in, fill );
|
||||
|
||||
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES );
|
||||
|
||||
in += fill;
|
||||
inlen -= fill;
|
||||
left = 0;
|
||||
}
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE)
|
||||
#else
|
||||
|
||||
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
#endif
|
||||
{
|
||||
#if defined(_OPENMP)
|
||||
size_t i = omp_get_thread_num();
|
||||
#endif
|
||||
size_t inlen__ = inlen;
|
||||
const unsigned char *in__ = ( const unsigned char * )in;
|
||||
in__ += i * BLAKE2B_BLOCKBYTES;
|
||||
|
||||
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES )
|
||||
{
|
||||
blake2b_update( S->S[i], in__, BLAKE2B_BLOCKBYTES );
|
||||
in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
|
||||
inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
|
||||
}
|
||||
}
|
||||
|
||||
in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES );
|
||||
inlen %= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
|
||||
|
||||
if( inlen > 0 )
|
||||
memcpy( S->buf + left, in, inlen );
|
||||
|
||||
S->buflen = left + inlen;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2bp_final( blake2bp_state *S, void *out, size_t outlen )
|
||||
{
|
||||
uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES];
|
||||
size_t i;
|
||||
|
||||
if(out == NULL || outlen < S->outlen) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
{
|
||||
if( S->buflen > i * BLAKE2B_BLOCKBYTES )
|
||||
{
|
||||
size_t left = S->buflen - i * BLAKE2B_BLOCKBYTES;
|
||||
|
||||
if( left > BLAKE2B_BLOCKBYTES ) left = BLAKE2B_BLOCKBYTES;
|
||||
|
||||
blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, left );
|
||||
}
|
||||
|
||||
blake2b_final( S->S[i], hash[i], BLAKE2B_OUTBYTES );
|
||||
}
|
||||
|
||||
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
blake2b_update( S->R, hash[i], BLAKE2B_OUTBYTES );
|
||||
|
||||
return blake2b_final( S->R, out, S->outlen );
|
||||
}
|
||||
|
||||
int blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
|
||||
{
|
||||
uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES];
|
||||
blake2b_state S[PARALLELISM_DEGREE][1];
|
||||
blake2b_state FS[1];
|
||||
size_t i;
|
||||
|
||||
/* Verify parameters */
|
||||
if ( NULL == in && inlen > 0 ) return -1;
|
||||
|
||||
if ( NULL == out ) return -1;
|
||||
|
||||
if( NULL == key && keylen > 0 ) return -1;
|
||||
|
||||
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
|
||||
|
||||
if( keylen > BLAKE2B_KEYBYTES ) return -1;
|
||||
|
||||
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
if( blake2bp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1;
|
||||
|
||||
S[PARALLELISM_DEGREE - 1]->last_node = 1; /* mark last node */
|
||||
|
||||
if( keylen > 0 )
|
||||
{
|
||||
uint8_t block[BLAKE2B_BLOCKBYTES];
|
||||
memset( block, 0, BLAKE2B_BLOCKBYTES );
|
||||
memcpy( block, key, keylen );
|
||||
|
||||
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
blake2b_update( S[i], block, BLAKE2B_BLOCKBYTES );
|
||||
|
||||
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
|
||||
}
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE)
|
||||
#else
|
||||
|
||||
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
#endif
|
||||
{
|
||||
#if defined(_OPENMP)
|
||||
size_t i = omp_get_thread_num();
|
||||
#endif
|
||||
size_t inlen__ = inlen;
|
||||
const unsigned char *in__ = ( const unsigned char * )in;
|
||||
in__ += i * BLAKE2B_BLOCKBYTES;
|
||||
|
||||
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES )
|
||||
{
|
||||
blake2b_update( S[i], in__, BLAKE2B_BLOCKBYTES );
|
||||
in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
|
||||
inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
|
||||
}
|
||||
|
||||
if( inlen__ > i * BLAKE2B_BLOCKBYTES )
|
||||
{
|
||||
const size_t left = inlen__ - i * BLAKE2B_BLOCKBYTES;
|
||||
const size_t len = left <= BLAKE2B_BLOCKBYTES ? left : BLAKE2B_BLOCKBYTES;
|
||||
blake2b_update( S[i], in__, len );
|
||||
}
|
||||
|
||||
blake2b_final( S[i], hash[i], BLAKE2B_OUTBYTES );
|
||||
}
|
||||
|
||||
if( blake2bp_init_root( FS, outlen, keylen ) < 0 )
|
||||
return -1;
|
||||
|
||||
FS->last_node = 1; /* Mark as last node */
|
||||
|
||||
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
blake2b_update( FS, hash[i], BLAKE2B_OUTBYTES );
|
||||
|
||||
return blake2b_final( FS, out, outlen );;
|
||||
}
|
||||
|
||||
#if defined(BLAKE2BP_SELFTEST)
|
||||
#include <string.h>
|
||||
#include "blake2-kat.h"
|
||||
int main( void )
|
||||
{
|
||||
uint8_t key[BLAKE2B_KEYBYTES];
|
||||
uint8_t buf[BLAKE2_KAT_LENGTH];
|
||||
size_t i, step;
|
||||
|
||||
for( i = 0; i < BLAKE2B_KEYBYTES; ++i )
|
||||
key[i] = ( uint8_t )i;
|
||||
|
||||
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
|
||||
buf[i] = ( uint8_t )i;
|
||||
|
||||
/* Test simple API */
|
||||
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
|
||||
{
|
||||
uint8_t hash[BLAKE2B_OUTBYTES];
|
||||
blake2bp( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES );
|
||||
|
||||
if( 0 != memcmp( hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES ) )
|
||||
{
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
/* Test streaming API */
|
||||
for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) {
|
||||
for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
|
||||
uint8_t hash[BLAKE2B_OUTBYTES];
|
||||
blake2bp_state S;
|
||||
uint8_t * p = buf;
|
||||
size_t mlen = i;
|
||||
int err = 0;
|
||||
|
||||
if( (err = blake2bp_init_key(&S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES)) < 0 ) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
while (mlen >= step) {
|
||||
if ( (err = blake2bp_update(&S, p, step)) < 0 ) {
|
||||
goto fail;
|
||||
}
|
||||
mlen -= step;
|
||||
p += step;
|
||||
}
|
||||
if ( (err = blake2bp_update(&S, p, mlen)) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
if ( (err = blake2bp_final(&S, hash, BLAKE2B_OUTBYTES)) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (0 != memcmp(hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES)) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
puts( "ok" );
|
||||
return 0;
|
||||
fail:
|
||||
puts("error");
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
@@ -1,367 +0,0 @@
|
||||
/*
|
||||
BLAKE2 reference source code package - reference C implementations
|
||||
|
||||
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
|
||||
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
|
||||
your option. The terms of these licenses can be found at:
|
||||
|
||||
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
- OpenSSL license : https://www.openssl.org/source/license.html
|
||||
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
More information about the BLAKE2 hash function can be found at
|
||||
https://blake2.net.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "blake2.h"
|
||||
#include "blake2-impl.h"
|
||||
|
||||
static const uint32_t blake2s_IV[8] =
|
||||
{
|
||||
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
|
||||
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
|
||||
};
|
||||
|
||||
static const uint8_t blake2s_sigma[10][16] =
|
||||
{
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
|
||||
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
|
||||
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
|
||||
};
|
||||
|
||||
static void blake2s_set_lastnode( blake2s_state *S )
|
||||
{
|
||||
S->f[1] = (uint32_t)-1;
|
||||
}
|
||||
|
||||
/* Some helper functions, not necessarily useful */
|
||||
static int blake2s_is_lastblock( const blake2s_state *S )
|
||||
{
|
||||
return S->f[0] != 0;
|
||||
}
|
||||
|
||||
static void blake2s_set_lastblock( blake2s_state *S )
|
||||
{
|
||||
if( S->last_node ) blake2s_set_lastnode( S );
|
||||
|
||||
S->f[0] = (uint32_t)-1;
|
||||
}
|
||||
|
||||
static void blake2s_increment_counter( blake2s_state *S, const uint32_t inc )
|
||||
{
|
||||
S->t[0] += inc;
|
||||
S->t[1] += ( S->t[0] < inc );
|
||||
}
|
||||
|
||||
static void blake2s_init0( blake2s_state *S )
|
||||
{
|
||||
size_t i;
|
||||
memset( S, 0, sizeof( blake2s_state ) );
|
||||
|
||||
for( i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i];
|
||||
}
|
||||
|
||||
/* init2 xors IV with input parameter block */
|
||||
int blake2s_init_param( blake2s_state *S, const blake2s_param *P )
|
||||
{
|
||||
const unsigned char *p = ( const unsigned char * )( P );
|
||||
size_t i;
|
||||
|
||||
blake2s_init0( S );
|
||||
|
||||
/* IV XOR ParamBlock */
|
||||
for( i = 0; i < 8; ++i )
|
||||
S->h[i] ^= load32( &p[i * 4] );
|
||||
|
||||
S->outlen = P->digest_length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Sequential blake2s initialization */
|
||||
int blake2s_init( blake2s_state *S, size_t outlen )
|
||||
{
|
||||
blake2s_param P[1];
|
||||
|
||||
/* Move interval verification here? */
|
||||
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
|
||||
|
||||
P->digest_length = (uint8_t)outlen;
|
||||
P->key_length = 0;
|
||||
P->fanout = 1;
|
||||
P->depth = 1;
|
||||
store32( &P->leaf_length, 0 );
|
||||
store32( &P->node_offset, 0 );
|
||||
store16( &P->xof_length, 0 );
|
||||
P->node_depth = 0;
|
||||
P->inner_length = 0;
|
||||
/* memset(P->reserved, 0, sizeof(P->reserved) ); */
|
||||
memset( P->salt, 0, sizeof( P->salt ) );
|
||||
memset( P->personal, 0, sizeof( P->personal ) );
|
||||
return blake2s_init_param( S, P );
|
||||
}
|
||||
|
||||
int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen )
|
||||
{
|
||||
blake2s_param P[1];
|
||||
|
||||
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
|
||||
|
||||
if ( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1;
|
||||
|
||||
P->digest_length = (uint8_t)outlen;
|
||||
P->key_length = (uint8_t)keylen;
|
||||
P->fanout = 1;
|
||||
P->depth = 1;
|
||||
store32( &P->leaf_length, 0 );
|
||||
store32( &P->node_offset, 0 );
|
||||
store16( &P->xof_length, 0 );
|
||||
P->node_depth = 0;
|
||||
P->inner_length = 0;
|
||||
/* memset(P->reserved, 0, sizeof(P->reserved) ); */
|
||||
memset( P->salt, 0, sizeof( P->salt ) );
|
||||
memset( P->personal, 0, sizeof( P->personal ) );
|
||||
|
||||
if( blake2s_init_param( S, P ) < 0 ) return -1;
|
||||
|
||||
{
|
||||
uint8_t block[BLAKE2S_BLOCKBYTES];
|
||||
memset( block, 0, BLAKE2S_BLOCKBYTES );
|
||||
memcpy( block, key, keylen );
|
||||
blake2s_update( S, block, BLAKE2S_BLOCKBYTES );
|
||||
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define G(r,i,a,b,c,d) \
|
||||
do { \
|
||||
a = a + b + m[blake2s_sigma[r][2*i+0]]; \
|
||||
d = rotr32(d ^ a, 16); \
|
||||
c = c + d; \
|
||||
b = rotr32(b ^ c, 12); \
|
||||
a = a + b + m[blake2s_sigma[r][2*i+1]]; \
|
||||
d = rotr32(d ^ a, 8); \
|
||||
c = c + d; \
|
||||
b = rotr32(b ^ c, 7); \
|
||||
} while(0)
|
||||
|
||||
#define ROUND(r) \
|
||||
do { \
|
||||
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
|
||||
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
|
||||
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
|
||||
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
|
||||
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
|
||||
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
|
||||
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
|
||||
G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
|
||||
} while(0)
|
||||
|
||||
static void blake2s_compress( blake2s_state *S, const uint8_t in[BLAKE2S_BLOCKBYTES] )
|
||||
{
|
||||
uint32_t m[16];
|
||||
uint32_t v[16];
|
||||
size_t i;
|
||||
|
||||
for( i = 0; i < 16; ++i ) {
|
||||
m[i] = load32( in + i * sizeof( m[i] ) );
|
||||
}
|
||||
|
||||
for( i = 0; i < 8; ++i ) {
|
||||
v[i] = S->h[i];
|
||||
}
|
||||
|
||||
v[ 8] = blake2s_IV[0];
|
||||
v[ 9] = blake2s_IV[1];
|
||||
v[10] = blake2s_IV[2];
|
||||
v[11] = blake2s_IV[3];
|
||||
v[12] = S->t[0] ^ blake2s_IV[4];
|
||||
v[13] = S->t[1] ^ blake2s_IV[5];
|
||||
v[14] = S->f[0] ^ blake2s_IV[6];
|
||||
v[15] = S->f[1] ^ blake2s_IV[7];
|
||||
|
||||
ROUND( 0 );
|
||||
ROUND( 1 );
|
||||
ROUND( 2 );
|
||||
ROUND( 3 );
|
||||
ROUND( 4 );
|
||||
ROUND( 5 );
|
||||
ROUND( 6 );
|
||||
ROUND( 7 );
|
||||
ROUND( 8 );
|
||||
ROUND( 9 );
|
||||
|
||||
for( i = 0; i < 8; ++i ) {
|
||||
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
|
||||
}
|
||||
}
|
||||
|
||||
#undef G
|
||||
#undef ROUND
|
||||
|
||||
int blake2s_update( blake2s_state *S, const void *pin, size_t inlen )
|
||||
{
|
||||
const unsigned char * in = (const unsigned char *)pin;
|
||||
if( inlen > 0 )
|
||||
{
|
||||
size_t left = S->buflen;
|
||||
size_t fill = BLAKE2S_BLOCKBYTES - left;
|
||||
if( inlen > fill )
|
||||
{
|
||||
S->buflen = 0;
|
||||
memcpy( S->buf + left, in, fill ); /* Fill buffer */
|
||||
blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
|
||||
blake2s_compress( S, S->buf ); /* Compress */
|
||||
in += fill; inlen -= fill;
|
||||
while(inlen > BLAKE2S_BLOCKBYTES) {
|
||||
blake2s_increment_counter(S, BLAKE2S_BLOCKBYTES);
|
||||
blake2s_compress( S, in );
|
||||
in += BLAKE2S_BLOCKBYTES;
|
||||
inlen -= BLAKE2S_BLOCKBYTES;
|
||||
}
|
||||
}
|
||||
memcpy( S->buf + S->buflen, in, inlen );
|
||||
S->buflen += inlen;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2s_final( blake2s_state *S, void *out, size_t outlen )
|
||||
{
|
||||
uint8_t buffer[BLAKE2S_OUTBYTES] = {0};
|
||||
size_t i;
|
||||
|
||||
if( out == NULL || outlen < S->outlen )
|
||||
return -1;
|
||||
|
||||
if( blake2s_is_lastblock( S ) )
|
||||
return -1;
|
||||
|
||||
blake2s_increment_counter( S, ( uint32_t )S->buflen );
|
||||
blake2s_set_lastblock( S );
|
||||
memset( S->buf + S->buflen, 0, BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
|
||||
blake2s_compress( S, S->buf );
|
||||
|
||||
for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
|
||||
store32( buffer + sizeof( S->h[i] ) * i, S->h[i] );
|
||||
|
||||
memcpy( out, buffer, outlen );
|
||||
secure_zero_memory(buffer, sizeof(buffer));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
|
||||
{
|
||||
blake2s_state S[1];
|
||||
|
||||
/* Verify parameters */
|
||||
if ( NULL == in && inlen > 0 ) return -1;
|
||||
|
||||
if ( NULL == out ) return -1;
|
||||
|
||||
if ( NULL == key && keylen > 0) return -1;
|
||||
|
||||
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
|
||||
|
||||
if( keylen > BLAKE2S_KEYBYTES ) return -1;
|
||||
|
||||
if( keylen > 0 )
|
||||
{
|
||||
if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( blake2s_init( S, outlen ) < 0 ) return -1;
|
||||
}
|
||||
|
||||
blake2s_update( S, ( const uint8_t * )in, inlen );
|
||||
blake2s_final( S, out, outlen );
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(SUPERCOP)
|
||||
int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
|
||||
{
|
||||
return blake2s( out, BLAKE2S_OUTBYTES, in, inlen, NULL, 0 );
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(BLAKE2S_SELFTEST)
|
||||
#include <string.h>
|
||||
#include "blake2-kat.h"
|
||||
int main( void )
|
||||
{
|
||||
uint8_t key[BLAKE2S_KEYBYTES];
|
||||
uint8_t buf[BLAKE2_KAT_LENGTH];
|
||||
size_t i, step;
|
||||
|
||||
for( i = 0; i < BLAKE2S_KEYBYTES; ++i )
|
||||
key[i] = ( uint8_t )i;
|
||||
|
||||
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
|
||||
buf[i] = ( uint8_t )i;
|
||||
|
||||
/* Test simple API */
|
||||
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
|
||||
{
|
||||
uint8_t hash[BLAKE2S_OUTBYTES];
|
||||
blake2s( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES );
|
||||
|
||||
if( 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) )
|
||||
{
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
/* Test streaming API */
|
||||
for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) {
|
||||
for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
|
||||
uint8_t hash[BLAKE2S_OUTBYTES];
|
||||
blake2s_state S;
|
||||
uint8_t * p = buf;
|
||||
size_t mlen = i;
|
||||
int err = 0;
|
||||
|
||||
if( (err = blake2s_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
while (mlen >= step) {
|
||||
if ( (err = blake2s_update(&S, p, step)) < 0 ) {
|
||||
goto fail;
|
||||
}
|
||||
mlen -= step;
|
||||
p += step;
|
||||
}
|
||||
if ( (err = blake2s_update(&S, p, mlen)) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
if ( (err = blake2s_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (0 != memcmp(hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES)) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
puts( "ok" );
|
||||
return 0;
|
||||
fail:
|
||||
puts("error");
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
@@ -1,359 +0,0 @@
|
||||
/*
|
||||
BLAKE2 reference source code package - reference C implementations
|
||||
|
||||
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
|
||||
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
|
||||
your option. The terms of these licenses can be found at:
|
||||
|
||||
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
- OpenSSL license : https://www.openssl.org/source/license.html
|
||||
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
More information about the BLAKE2 hash function can be found at
|
||||
https://blake2.net.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#include "blake2.h"
|
||||
#include "blake2-impl.h"
|
||||
|
||||
#define PARALLELISM_DEGREE 8
|
||||
|
||||
/*
|
||||
blake2sp_init_param defaults to setting the expecting output length
|
||||
from the digest_length parameter block field.
|
||||
|
||||
In some cases, however, we do not want this, as the output length
|
||||
of these instances is given by inner_length instead.
|
||||
*/
|
||||
static int blake2sp_init_leaf_param( blake2s_state *S, const blake2s_param *P )
|
||||
{
|
||||
int err = blake2s_init_param(S, P);
|
||||
S->outlen = P->inner_length;
|
||||
return err;
|
||||
}
|
||||
|
||||
static int blake2sp_init_leaf( blake2s_state *S, size_t outlen, size_t keylen, uint64_t offset )
|
||||
{
|
||||
blake2s_param P[1];
|
||||
P->digest_length = (uint8_t)outlen;
|
||||
P->key_length = (uint8_t)keylen;
|
||||
P->fanout = PARALLELISM_DEGREE;
|
||||
P->depth = 2;
|
||||
store32( &P->leaf_length, 0 );
|
||||
store32( &P->node_offset, offset );
|
||||
store16( &P->xof_length, 0 );
|
||||
P->node_depth = 0;
|
||||
P->inner_length = BLAKE2S_OUTBYTES;
|
||||
memset( P->salt, 0, sizeof( P->salt ) );
|
||||
memset( P->personal, 0, sizeof( P->personal ) );
|
||||
return blake2sp_init_leaf_param( S, P );
|
||||
}
|
||||
|
||||
static int blake2sp_init_root( blake2s_state *S, size_t outlen, size_t keylen )
|
||||
{
|
||||
blake2s_param P[1];
|
||||
P->digest_length = (uint8_t)outlen;
|
||||
P->key_length = (uint8_t)keylen;
|
||||
P->fanout = PARALLELISM_DEGREE;
|
||||
P->depth = 2;
|
||||
store32( &P->leaf_length, 0 );
|
||||
store32( &P->node_offset, 0 );
|
||||
store16( &P->xof_length, 0 );
|
||||
P->node_depth = 1;
|
||||
P->inner_length = BLAKE2S_OUTBYTES;
|
||||
memset( P->salt, 0, sizeof( P->salt ) );
|
||||
memset( P->personal, 0, sizeof( P->personal ) );
|
||||
return blake2s_init_param( S, P );
|
||||
}
|
||||
|
||||
|
||||
int blake2sp_init( blake2sp_state *S, size_t outlen )
|
||||
{
|
||||
size_t i;
|
||||
|
||||
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
|
||||
|
||||
memset( S->buf, 0, sizeof( S->buf ) );
|
||||
S->buflen = 0;
|
||||
S->outlen = outlen;
|
||||
|
||||
if( blake2sp_init_root( S->R, outlen, 0 ) < 0 )
|
||||
return -1;
|
||||
|
||||
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
if( blake2sp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1;
|
||||
|
||||
S->R->last_node = 1;
|
||||
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen )
|
||||
{
|
||||
size_t i;
|
||||
|
||||
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
|
||||
|
||||
if( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1;
|
||||
|
||||
memset( S->buf, 0, sizeof( S->buf ) );
|
||||
S->buflen = 0;
|
||||
S->outlen = outlen;
|
||||
|
||||
if( blake2sp_init_root( S->R, outlen, keylen ) < 0 )
|
||||
return -1;
|
||||
|
||||
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
if( blake2sp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1;
|
||||
|
||||
S->R->last_node = 1;
|
||||
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
|
||||
{
|
||||
uint8_t block[BLAKE2S_BLOCKBYTES];
|
||||
memset( block, 0, BLAKE2S_BLOCKBYTES );
|
||||
memcpy( block, key, keylen );
|
||||
|
||||
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
blake2s_update( S->S[i], block, BLAKE2S_BLOCKBYTES );
|
||||
|
||||
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int blake2sp_update( blake2sp_state *S, const void *pin, size_t inlen )
|
||||
{
|
||||
const unsigned char * in = (const unsigned char *)pin;
|
||||
size_t left = S->buflen;
|
||||
size_t fill = sizeof( S->buf ) - left;
|
||||
size_t i;
|
||||
|
||||
if( left && inlen >= fill )
|
||||
{
|
||||
memcpy( S->buf + left, in, fill );
|
||||
|
||||
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES );
|
||||
|
||||
in += fill;
|
||||
inlen -= fill;
|
||||
left = 0;
|
||||
}
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE)
|
||||
#else
|
||||
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
#endif
|
||||
{
|
||||
#if defined(_OPENMP)
|
||||
size_t i = omp_get_thread_num();
|
||||
#endif
|
||||
size_t inlen__ = inlen;
|
||||
const unsigned char *in__ = ( const unsigned char * )in;
|
||||
in__ += i * BLAKE2S_BLOCKBYTES;
|
||||
|
||||
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES )
|
||||
{
|
||||
blake2s_update( S->S[i], in__, BLAKE2S_BLOCKBYTES );
|
||||
in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
|
||||
inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
|
||||
}
|
||||
}
|
||||
|
||||
in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES );
|
||||
inlen %= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
|
||||
|
||||
if( inlen > 0 )
|
||||
memcpy( S->buf + left, in, inlen );
|
||||
|
||||
S->buflen = left + inlen;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int blake2sp_final( blake2sp_state *S, void *out, size_t outlen )
|
||||
{
|
||||
uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES];
|
||||
size_t i;
|
||||
|
||||
if(out == NULL || outlen < S->outlen) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
{
|
||||
if( S->buflen > i * BLAKE2S_BLOCKBYTES )
|
||||
{
|
||||
size_t left = S->buflen - i * BLAKE2S_BLOCKBYTES;
|
||||
|
||||
if( left > BLAKE2S_BLOCKBYTES ) left = BLAKE2S_BLOCKBYTES;
|
||||
|
||||
blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, left );
|
||||
}
|
||||
|
||||
blake2s_final( S->S[i], hash[i], BLAKE2S_OUTBYTES );
|
||||
}
|
||||
|
||||
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
blake2s_update( S->R, hash[i], BLAKE2S_OUTBYTES );
|
||||
|
||||
return blake2s_final( S->R, out, S->outlen );
|
||||
}
|
||||
|
||||
|
||||
int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
|
||||
{
|
||||
uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES];
|
||||
blake2s_state S[PARALLELISM_DEGREE][1];
|
||||
blake2s_state FS[1];
|
||||
size_t i;
|
||||
|
||||
/* Verify parameters */
|
||||
if ( NULL == in && inlen > 0 ) return -1;
|
||||
|
||||
if ( NULL == out ) return -1;
|
||||
|
||||
if ( NULL == key && keylen > 0) return -1;
|
||||
|
||||
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
|
||||
|
||||
if( keylen > BLAKE2S_KEYBYTES ) return -1;
|
||||
|
||||
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
if( blake2sp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1;
|
||||
|
||||
S[PARALLELISM_DEGREE - 1]->last_node = 1; /* mark last node */
|
||||
|
||||
if( keylen > 0 )
|
||||
{
|
||||
uint8_t block[BLAKE2S_BLOCKBYTES];
|
||||
memset( block, 0, BLAKE2S_BLOCKBYTES );
|
||||
memcpy( block, key, keylen );
|
||||
|
||||
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
blake2s_update( S[i], block, BLAKE2S_BLOCKBYTES );
|
||||
|
||||
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
|
||||
}
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE)
|
||||
#else
|
||||
|
||||
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
#endif
|
||||
{
|
||||
#if defined(_OPENMP)
|
||||
size_t i = omp_get_thread_num();
|
||||
#endif
|
||||
size_t inlen__ = inlen;
|
||||
const unsigned char *in__ = ( const unsigned char * )in;
|
||||
in__ += i * BLAKE2S_BLOCKBYTES;
|
||||
|
||||
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES )
|
||||
{
|
||||
blake2s_update( S[i], in__, BLAKE2S_BLOCKBYTES );
|
||||
in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
|
||||
inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
|
||||
}
|
||||
|
||||
if( inlen__ > i * BLAKE2S_BLOCKBYTES )
|
||||
{
|
||||
const size_t left = inlen__ - i * BLAKE2S_BLOCKBYTES;
|
||||
const size_t len = left <= BLAKE2S_BLOCKBYTES ? left : BLAKE2S_BLOCKBYTES;
|
||||
blake2s_update( S[i], in__, len );
|
||||
}
|
||||
|
||||
blake2s_final( S[i], hash[i], BLAKE2S_OUTBYTES );
|
||||
}
|
||||
|
||||
if( blake2sp_init_root( FS, outlen, keylen ) < 0 )
|
||||
return -1;
|
||||
|
||||
FS->last_node = 1;
|
||||
|
||||
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||
blake2s_update( FS, hash[i], BLAKE2S_OUTBYTES );
|
||||
|
||||
return blake2s_final( FS, out, outlen );
|
||||
}
|
||||
|
||||
|
||||
|
||||
#if defined(BLAKE2SP_SELFTEST)
|
||||
#include <string.h>
|
||||
#include "blake2-kat.h"
|
||||
int main( void )
|
||||
{
|
||||
uint8_t key[BLAKE2S_KEYBYTES];
|
||||
uint8_t buf[BLAKE2_KAT_LENGTH];
|
||||
size_t i, step;
|
||||
|
||||
for( i = 0; i < BLAKE2S_KEYBYTES; ++i )
|
||||
key[i] = ( uint8_t )i;
|
||||
|
||||
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
|
||||
buf[i] = ( uint8_t )i;
|
||||
|
||||
/* Test simple API */
|
||||
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
|
||||
{
|
||||
uint8_t hash[BLAKE2S_OUTBYTES];
|
||||
blake2sp( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES );
|
||||
|
||||
if( 0 != memcmp( hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES ) )
|
||||
{
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
/* Test streaming API */
|
||||
for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) {
|
||||
for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
|
||||
uint8_t hash[BLAKE2S_OUTBYTES];
|
||||
blake2sp_state S;
|
||||
uint8_t * p = buf;
|
||||
size_t mlen = i;
|
||||
int err = 0;
|
||||
|
||||
if( (err = blake2sp_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
while (mlen >= step) {
|
||||
if ( (err = blake2sp_update(&S, p, step)) < 0 ) {
|
||||
goto fail;
|
||||
}
|
||||
mlen -= step;
|
||||
p += step;
|
||||
}
|
||||
if ( (err = blake2sp_update(&S, p, mlen)) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
if ( (err = blake2sp_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (0 != memcmp(hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES)) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
puts( "ok" );
|
||||
return 0;
|
||||
fail:
|
||||
puts("error");
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
@@ -1,241 +0,0 @@
|
||||
/*
|
||||
BLAKE2 reference source code package - reference C implementations
|
||||
|
||||
Copyright 2016, JP Aumasson <jeanphilippe.aumasson@gmail.com>.
|
||||
Copyright 2016, Samuel Neves <sneves@dei.uc.pt>.
|
||||
|
||||
You may use this under the terms of the CC0, the OpenSSL Licence, or
|
||||
the Apache Public License 2.0, at your option. The terms of these
|
||||
licenses can be found at:
|
||||
|
||||
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
- OpenSSL license : https://www.openssl.org/source/license.html
|
||||
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
More information about the BLAKE2 hash function can be found at
|
||||
https://blake2.net.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "blake2.h"
|
||||
#include "blake2-impl.h"
|
||||
|
||||
int blake2xb_init( blake2xb_state *S, const size_t outlen ) {
|
||||
return blake2xb_init_key(S, outlen, NULL, 0);
|
||||
}
|
||||
|
||||
int blake2xb_init_key( blake2xb_state *S, const size_t outlen, const void *key, size_t keylen)
|
||||
{
|
||||
if ( outlen == 0 || outlen > 0xFFFFFFFFUL ) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (NULL != key && keylen > BLAKE2B_KEYBYTES) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (NULL == key && keylen > 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Initialize parameter block */
|
||||
S->P->digest_length = BLAKE2B_OUTBYTES;
|
||||
S->P->key_length = keylen;
|
||||
S->P->fanout = 1;
|
||||
S->P->depth = 1;
|
||||
store32( &S->P->leaf_length, 0 );
|
||||
store32( &S->P->node_offset, 0 );
|
||||
store32( &S->P->xof_length, outlen );
|
||||
S->P->node_depth = 0;
|
||||
S->P->inner_length = 0;
|
||||
memset( S->P->reserved, 0, sizeof( S->P->reserved ) );
|
||||
memset( S->P->salt, 0, sizeof( S->P->salt ) );
|
||||
memset( S->P->personal, 0, sizeof( S->P->personal ) );
|
||||
|
||||
if( blake2b_init_param( S->S, S->P ) < 0 ) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (keylen > 0) {
|
||||
uint8_t block[BLAKE2B_BLOCKBYTES];
|
||||
memset(block, 0, BLAKE2B_BLOCKBYTES);
|
||||
memcpy(block, key, keylen);
|
||||
blake2b_update(S->S, block, BLAKE2B_BLOCKBYTES);
|
||||
secure_zero_memory(block, BLAKE2B_BLOCKBYTES);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2xb_update( blake2xb_state *S, const void *in, size_t inlen ) {
|
||||
return blake2b_update( S->S, in, inlen );
|
||||
}
|
||||
|
||||
int blake2xb_final( blake2xb_state *S, void *out, size_t outlen) {
|
||||
|
||||
blake2b_state C[1];
|
||||
blake2b_param P[1];
|
||||
uint32_t xof_length = load32(&S->P->xof_length);
|
||||
uint8_t root[BLAKE2B_BLOCKBYTES];
|
||||
size_t i;
|
||||
|
||||
if (NULL == out) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* outlen must match the output size defined in xof_length, */
|
||||
/* unless it was -1, in which case anything goes except 0. */
|
||||
if(xof_length == 0xFFFFFFFFUL) {
|
||||
if(outlen == 0) {
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
if(outlen != xof_length) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Finalize the root hash */
|
||||
if (blake2b_final(S->S, root, BLAKE2B_OUTBYTES) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Set common block structure values */
|
||||
/* Copy values from parent instance, and only change the ones below */
|
||||
memcpy(P, S->P, sizeof(blake2b_param));
|
||||
P->key_length = 0;
|
||||
P->fanout = 0;
|
||||
P->depth = 0;
|
||||
store32(&P->leaf_length, BLAKE2B_OUTBYTES);
|
||||
P->inner_length = BLAKE2B_OUTBYTES;
|
||||
P->node_depth = 0;
|
||||
|
||||
for (i = 0; outlen > 0; ++i) {
|
||||
const size_t block_size = (outlen < BLAKE2B_OUTBYTES) ? outlen : BLAKE2B_OUTBYTES;
|
||||
/* Initialize state */
|
||||
P->digest_length = block_size;
|
||||
store32(&P->node_offset, i);
|
||||
blake2b_init_param(C, P);
|
||||
/* Process key if needed */
|
||||
blake2b_update(C, root, BLAKE2B_OUTBYTES);
|
||||
if (blake2b_final(C, (uint8_t *)out + i * BLAKE2B_OUTBYTES, block_size) < 0 ) {
|
||||
return -1;
|
||||
}
|
||||
outlen -= block_size;
|
||||
}
|
||||
secure_zero_memory(root, sizeof(root));
|
||||
secure_zero_memory(P, sizeof(P));
|
||||
secure_zero_memory(C, sizeof(C));
|
||||
/* Put blake2xb in an invalid state? cf. blake2s_is_lastblock */
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
int blake2xb(void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen)
|
||||
{
|
||||
blake2xb_state S[1];
|
||||
|
||||
/* Verify parameters */
|
||||
if (NULL == in && inlen > 0)
|
||||
return -1;
|
||||
|
||||
if (NULL == out)
|
||||
return -1;
|
||||
|
||||
if (NULL == key && keylen > 0)
|
||||
return -1;
|
||||
|
||||
if (keylen > BLAKE2B_KEYBYTES)
|
||||
return -1;
|
||||
|
||||
if (outlen == 0)
|
||||
return -1;
|
||||
|
||||
/* Initialize the root block structure */
|
||||
if (blake2xb_init_key(S, outlen, key, keylen) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Absorb the input message */
|
||||
blake2xb_update(S, in, inlen);
|
||||
|
||||
/* Compute the root node of the tree and the final hash using the counter construction */
|
||||
return blake2xb_final(S, out, outlen);
|
||||
}
|
||||
|
||||
#if defined(BLAKE2XB_SELFTEST)
|
||||
#include <string.h>
|
||||
#include "blake2-kat.h"
|
||||
int main( void )
|
||||
{
|
||||
uint8_t key[BLAKE2B_KEYBYTES];
|
||||
uint8_t buf[BLAKE2_KAT_LENGTH];
|
||||
size_t i, step, outlen;
|
||||
|
||||
for( i = 0; i < BLAKE2B_KEYBYTES; ++i ) {
|
||||
key[i] = ( uint8_t )i;
|
||||
}
|
||||
|
||||
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) {
|
||||
buf[i] = ( uint8_t )i;
|
||||
}
|
||||
|
||||
/* Testing length of outputs rather than inputs */
|
||||
/* (Test of input lengths mostly covered by blake2b tests) */
|
||||
|
||||
/* Test simple API */
|
||||
for( outlen = 1; outlen <= BLAKE2_KAT_LENGTH; ++outlen )
|
||||
{
|
||||
uint8_t hash[BLAKE2_KAT_LENGTH] = {0};
|
||||
if( blake2xb( hash, outlen, buf, BLAKE2_KAT_LENGTH, key, BLAKE2B_KEYBYTES ) < 0 ) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if( 0 != memcmp( hash, blake2xb_keyed_kat[outlen-1], outlen ) )
|
||||
{
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
/* Test streaming API */
|
||||
for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) {
|
||||
for (outlen = 1; outlen <= BLAKE2_KAT_LENGTH; ++outlen) {
|
||||
uint8_t hash[BLAKE2_KAT_LENGTH];
|
||||
blake2xb_state S;
|
||||
uint8_t * p = buf;
|
||||
size_t mlen = BLAKE2_KAT_LENGTH;
|
||||
int err = 0;
|
||||
|
||||
if( (err = blake2xb_init_key(&S, outlen, key, BLAKE2B_KEYBYTES)) < 0 ) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
while (mlen >= step) {
|
||||
if ( (err = blake2xb_update(&S, p, step)) < 0 ) {
|
||||
goto fail;
|
||||
}
|
||||
mlen -= step;
|
||||
p += step;
|
||||
}
|
||||
if ( (err = blake2xb_update(&S, p, mlen)) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
if ( (err = blake2xb_final(&S, hash, outlen)) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (0 != memcmp(hash, blake2xb_keyed_kat[outlen-1], outlen)) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
puts( "ok" );
|
||||
return 0;
|
||||
fail:
|
||||
puts("error");
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
@@ -1,239 +0,0 @@
|
||||
/*
|
||||
BLAKE2 reference source code package - reference C implementations
|
||||
|
||||
Copyright 2016, JP Aumasson <jeanphilippe.aumasson@gmail.com>.
|
||||
Copyright 2016, Samuel Neves <sneves@dei.uc.pt>.
|
||||
|
||||
You may use this under the terms of the CC0, the OpenSSL Licence, or
|
||||
the Apache Public License 2.0, at your option. The terms of these
|
||||
licenses can be found at:
|
||||
|
||||
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
- OpenSSL license : https://www.openssl.org/source/license.html
|
||||
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
More information about the BLAKE2 hash function can be found at
|
||||
https://blake2.net.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "blake2.h"
|
||||
#include "blake2-impl.h"
|
||||
|
||||
int blake2xs_init( blake2xs_state *S, const size_t outlen ) {
|
||||
return blake2xs_init_key(S, outlen, NULL, 0);
|
||||
}
|
||||
|
||||
int blake2xs_init_key( blake2xs_state *S, const size_t outlen, const void *key, size_t keylen )
|
||||
{
|
||||
if ( outlen == 0 || outlen > 0xFFFFUL ) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (NULL != key && keylen > BLAKE2S_KEYBYTES) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (NULL == key && keylen > 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Initialize parameter block */
|
||||
S->P->digest_length = BLAKE2S_OUTBYTES;
|
||||
S->P->key_length = keylen;
|
||||
S->P->fanout = 1;
|
||||
S->P->depth = 1;
|
||||
store32( &S->P->leaf_length, 0 );
|
||||
store32( &S->P->node_offset, 0 );
|
||||
store16( &S->P->xof_length, outlen );
|
||||
S->P->node_depth = 0;
|
||||
S->P->inner_length = 0;
|
||||
memset( S->P->salt, 0, sizeof( S->P->salt ) );
|
||||
memset( S->P->personal, 0, sizeof( S->P->personal ) );
|
||||
|
||||
if( blake2s_init_param( S->S, S->P ) < 0 ) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (keylen > 0) {
|
||||
uint8_t block[BLAKE2S_BLOCKBYTES];
|
||||
memset(block, 0, BLAKE2S_BLOCKBYTES);
|
||||
memcpy(block, key, keylen);
|
||||
blake2s_update(S->S, block, BLAKE2S_BLOCKBYTES);
|
||||
secure_zero_memory(block, BLAKE2S_BLOCKBYTES);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2xs_update( blake2xs_state *S, const void *in, size_t inlen ) {
|
||||
return blake2s_update( S->S, in, inlen );
|
||||
}
|
||||
|
||||
int blake2xs_final(blake2xs_state *S, void *out, size_t outlen) {
|
||||
|
||||
blake2s_state C[1];
|
||||
blake2s_param P[1];
|
||||
uint16_t xof_length = load16(&S->P->xof_length);
|
||||
uint8_t root[BLAKE2S_BLOCKBYTES];
|
||||
size_t i;
|
||||
|
||||
if (NULL == out) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* outlen must match the output size defined in xof_length, */
|
||||
/* unless it was -1, in which case anything goes except 0. */
|
||||
if(xof_length == 0xFFFFUL) {
|
||||
if(outlen == 0) {
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
if(outlen != xof_length) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Finalize the root hash */
|
||||
if (blake2s_final(S->S, root, BLAKE2S_OUTBYTES) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Set common block structure values */
|
||||
/* Copy values from parent instance, and only change the ones below */
|
||||
memcpy(P, S->P, sizeof(blake2s_param));
|
||||
P->key_length = 0;
|
||||
P->fanout = 0;
|
||||
P->depth = 0;
|
||||
store32(&P->leaf_length, BLAKE2S_OUTBYTES);
|
||||
P->inner_length = BLAKE2S_OUTBYTES;
|
||||
P->node_depth = 0;
|
||||
|
||||
for (i = 0; outlen > 0; ++i) {
|
||||
const size_t block_size = (outlen < BLAKE2S_OUTBYTES) ? outlen : BLAKE2S_OUTBYTES;
|
||||
/* Initialize state */
|
||||
P->digest_length = block_size;
|
||||
store32(&P->node_offset, i);
|
||||
blake2s_init_param(C, P);
|
||||
/* Process key if needed */
|
||||
blake2s_update(C, root, BLAKE2S_OUTBYTES);
|
||||
if (blake2s_final(C, (uint8_t *)out + i * BLAKE2S_OUTBYTES, block_size) < 0) {
|
||||
return -1;
|
||||
}
|
||||
outlen -= block_size;
|
||||
}
|
||||
secure_zero_memory(root, sizeof(root));
|
||||
secure_zero_memory(P, sizeof(P));
|
||||
secure_zero_memory(C, sizeof(C));
|
||||
/* Put blake2xs in an invalid state? cf. blake2s_is_lastblock */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2xs(void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen)
|
||||
{
|
||||
blake2xs_state S[1];
|
||||
|
||||
/* Verify parameters */
|
||||
if (NULL == in && inlen > 0)
|
||||
return -1;
|
||||
|
||||
if (NULL == out)
|
||||
return -1;
|
||||
|
||||
if (NULL == key && keylen > 0)
|
||||
return -1;
|
||||
|
||||
if (keylen > BLAKE2S_KEYBYTES)
|
||||
return -1;
|
||||
|
||||
if (outlen == 0)
|
||||
return -1;
|
||||
|
||||
/* Initialize the root block structure */
|
||||
if (blake2xs_init_key(S, outlen, key, keylen) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Absorb the input message */
|
||||
blake2xs_update(S, in, inlen);
|
||||
|
||||
/* Compute the root node of the tree and the final hash using the counter construction */
|
||||
return blake2xs_final(S, out, outlen);
|
||||
}
|
||||
|
||||
#if defined(BLAKE2XS_SELFTEST)
|
||||
#include <string.h>
|
||||
#include "blake2-kat.h"
|
||||
int main( void )
|
||||
{
|
||||
uint8_t key[BLAKE2S_KEYBYTES];
|
||||
uint8_t buf[BLAKE2_KAT_LENGTH];
|
||||
size_t i, step, outlen;
|
||||
|
||||
for( i = 0; i < BLAKE2S_KEYBYTES; ++i ) {
|
||||
key[i] = ( uint8_t )i;
|
||||
}
|
||||
|
||||
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) {
|
||||
buf[i] = ( uint8_t )i;
|
||||
}
|
||||
|
||||
/* Testing length of outputs rather than inputs */
|
||||
/* (Test of input lengths mostly covered by blake2s tests) */
|
||||
|
||||
/* Test simple API */
|
||||
for( outlen = 1; outlen <= BLAKE2_KAT_LENGTH; ++outlen )
|
||||
{
|
||||
uint8_t hash[BLAKE2_KAT_LENGTH] = {0};
|
||||
if( blake2xs( hash, outlen, buf, BLAKE2_KAT_LENGTH, key, BLAKE2S_KEYBYTES ) < 0 ) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if( 0 != memcmp( hash, blake2xs_keyed_kat[outlen-1], outlen ) )
|
||||
{
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
/* Test streaming API */
|
||||
for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) {
|
||||
for (outlen = 1; outlen <= BLAKE2_KAT_LENGTH; ++outlen) {
|
||||
uint8_t hash[BLAKE2_KAT_LENGTH];
|
||||
blake2xs_state S;
|
||||
uint8_t * p = buf;
|
||||
size_t mlen = BLAKE2_KAT_LENGTH;
|
||||
int err = 0;
|
||||
|
||||
if( (err = blake2xs_init_key(&S, outlen, key, BLAKE2S_KEYBYTES)) < 0 ) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
while (mlen >= step) {
|
||||
if ( (err = blake2xs_update(&S, p, step)) < 0 ) {
|
||||
goto fail;
|
||||
}
|
||||
mlen -= step;
|
||||
p += step;
|
||||
}
|
||||
if ( (err = blake2xs_update(&S, p, mlen)) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
if ( (err = blake2xs_final(&S, hash, outlen)) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (0 != memcmp(hash, blake2xs_keyed_kat[outlen-1], outlen)) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
puts( "ok" );
|
||||
return 0;
|
||||
fail:
|
||||
puts("error");
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
#define lapi_c
|
||||
|
||||
#define LUA_CORE
|
||||
|
||||
#include "lprefix.h"
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
#define lcode_c
|
||||
|
||||
#define LUA_CORE
|
||||
|
||||
#include "lprefix.h"
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
#define lctype_c
|
||||
|
||||
#define LUA_CORE
|
||||
|
||||
#include "lprefix.h"
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
#define ldebug_c
|
||||
|
||||
#define LUA_CORE
|
||||
|
||||
#include "lprefix.h"
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
#define ldo_c
|
||||
|
||||
#define LUA_CORE
|
||||
|
||||
#include "lprefix.h"
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
#define ldump_c
|
||||
|
||||
#define LUA_CORE
|
||||
|
||||
#include "lprefix.h"
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
#define lfunc_c
|
||||
|
||||
#define LUA_CORE
|
||||
|
||||
#include "lprefix.h"
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
#define lgc_c
|
||||
|
||||
#define LUA_CORE
|
||||
|
||||
#include "lprefix.h"
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
#define llex_c
|
||||
|
||||
#define LUA_CORE
|
||||
|
||||
#include "lprefix.h"
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
#define lmem_c
|
||||
|
||||
#define LUA_CORE
|
||||
|
||||
#include "lprefix.h"
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
#define lobject_c
|
||||
|
||||
#define LUA_CORE
|
||||
|
||||
#include "lprefix.h"
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
#define lopcodes_c
|
||||
|
||||
#define LUA_CORE
|
||||
|
||||
#include "lprefix.h"
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
#define lparser_c
|
||||
|
||||
#define LUA_CORE
|
||||
|
||||
#include "lprefix.h"
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
#define lstate_c
|
||||
|
||||
#define LUA_CORE
|
||||
|
||||
#include "lprefix.h"
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
#define lstring_c
|
||||
|
||||
#define LUA_CORE
|
||||
|
||||
#include "lprefix.h"
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
#define ltable_c
|
||||
|
||||
#define LUA_CORE
|
||||
|
||||
#include "lprefix.h"
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
#define ltm_c
|
||||
|
||||
#define LUA_CORE
|
||||
|
||||
#include "lprefix.h"
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
#define lundump_c
|
||||
|
||||
#define LUA_CORE
|
||||
|
||||
#include "lprefix.h"
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
#define lvm_c
|
||||
|
||||
#define LUA_CORE
|
||||
|
||||
#include "lprefix.h"
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
#define lzio_c
|
||||
|
||||
#define LUA_CORE
|
||||
|
||||
#include "lprefix.h"
|
||||
|
||||
|
||||
@@ -1,1403 +0,0 @@
|
||||
# CMakeLists.txt
|
||||
#
|
||||
# This file enables PCRE2 to be built with the CMake configuration and build
|
||||
# tool. Download CMake in source or binary form from http://www.cmake.org/
|
||||
# Converted to support PCRE2 from the original PCRE file, August 2014.
|
||||
#
|
||||
# Original listfile by Christian Ehrlicher <Ch.Ehrlicher@gmx.de>
|
||||
# Refined and expanded by Daniel Richard G. <skunk@iSKUNK.ORG>
|
||||
# 2007-09-14 mod by Sheri so 7.4 supported configuration options can be entered
|
||||
# 2007-09-19 Adjusted by PH to retain previous default settings
|
||||
# 2007-12-26 (a) On UNIX, use names libpcre instead of just pcre
|
||||
# (b) Ensure pcretest and pcregrep link with the local library,
|
||||
# not a previously-installed one.
|
||||
# (c) Add PCRE_SUPPORT_LIBREADLINE, PCRE_SUPPORT_LIBZ, and
|
||||
# PCRE_SUPPORT_LIBBZ2.
|
||||
# 2008-01-20 Brought up to date to include several new features by Christian
|
||||
# Ehrlicher.
|
||||
# 2008-01-22 Sheri added options for backward compatibility of library names
|
||||
# when building with minGW:
|
||||
# if "ON", NON_STANDARD_LIB_PREFIX causes shared libraries to
|
||||
# be built without "lib" as prefix. (The libraries will be named
|
||||
# pcre.dll, pcreposix.dll and pcrecpp.dll).
|
||||
# if "ON", NON_STANDARD_LIB_SUFFIX causes shared libraries to
|
||||
# be built with suffix of "-0.dll". (The libraries will be named
|
||||
# libpcre-0.dll, libpcreposix-0.dll and libpcrecpp-0.dll - same names
|
||||
# built by default with Configure and Make.
|
||||
# 2008-01-23 PH removed the automatic build of pcredemo.
|
||||
# 2008-04-22 PH modified READLINE support so it finds NCURSES when needed.
|
||||
# 2008-07-03 PH updated for revised UCP property support (change of files)
|
||||
# 2009-03-23 PH applied Steven Van Ingelgem's patch to change the name
|
||||
# CMAKE_BINARY_DIR to PROJECT_BINARY_DIR so that it works when PCRE
|
||||
# is included within another project.
|
||||
# 2009-03-23 PH applied a modified version of Steven Van Ingelgem's patches to
|
||||
# add options to stop the building of pcregrep and the tests, and
|
||||
# to disable the final configuration report.
|
||||
# 2009-04-11 PH applied Christian Ehrlicher's patch to show compiler flags that
|
||||
# are set by specifying a release type.
|
||||
# 2010-01-02 PH added test for stdint.h
|
||||
# 2010-03-02 PH added test for inttypes.h
|
||||
# 2011-08-01 PH added PCREGREP_BUFSIZE
|
||||
# 2011-08-22 PH added PCRE_SUPPORT_JIT
|
||||
# 2011-09-06 PH modified WIN32 ADD_TEST line as suggested by Sergey Cherepanov
|
||||
# 2011-09-06 PH added PCRE_SUPPORT_PCREGREP_JIT
|
||||
# 2011-10-04 Sheri added support for including coff data in windows shared libraries
|
||||
# compiled with MINGW if pcre.rc and/or pcreposix.rc are placed in
|
||||
# the source dir by the user prior to building
|
||||
# 2011-10-04 Sheri changed various add_test's to use exes' location built instead
|
||||
# of DEBUG location only (likely only matters in MSVC)
|
||||
# 2011-10-04 Sheri added scripts to provide needed variables to RunTest and
|
||||
# RunGrepTest (used for UNIX and Msys)
|
||||
# 2011-10-04 Sheri added scripts to provide needed variables and to execute
|
||||
# RunTest.bat in Win32 (for effortless testing with "make test")
|
||||
# 2011-10-04 Sheri Increased minimum required cmake version
|
||||
# 2012-01-06 PH removed pcre_info.c and added pcre_string_utils.c
|
||||
# 2012-01-10 Zoltan Herczeg added libpcre16 support
|
||||
# 2012-01-13 Stephen Kelly added out of source build support
|
||||
# 2012-01-17 PH applied Stephen Kelly's patch to parse the version data out
|
||||
# of the configure.ac file
|
||||
# 2012-02-26 PH added support for libedit
|
||||
# 2012-09-06 PH added support for PCRE_EBCDIC_NL25
|
||||
# 2012-09-08 ChPe added PCRE32 support
|
||||
# 2012-10-23 PH added support for VALGRIND and GCOV
|
||||
# 2012-12-08 PH added patch from Daniel Richard G to quash some MSVC warnings
|
||||
# 2013-07-01 PH realized that the "support" for GCOV was a total nonsense and
|
||||
# so it has been removed.
|
||||
# 2013-10-08 PH got rid of the "source" command, which is a bash-ism (use ".")
|
||||
# 2013-11-05 PH added support for PARENS_NEST_LIMIT
|
||||
# 2014-08-29 PH converted the file for PCRE2 (which has no C++).
|
||||
# 2015-04-24 PH added support for PCRE2_DEBUG
|
||||
# 2015-07-16 PH updated for new pcre2_find_bracket source module
|
||||
# 2015-08-24 PH correct C_FLAGS setting (patch from Roy Ivy III)
|
||||
# 2015-10=16 PH added support for never-backslash-C
|
||||
# 2016-03-01 PH applied Chris Wilson's patch for MSVC static
|
||||
# 2016-06-24 PH applied Chris Wilson's second patch, putting the first under
|
||||
# a new option instead of being unconditional.
|
||||
# 2016-10-05 PH fixed a typo (PCRE should be PCRE2) in above patch
|
||||
# fix by David Gaussmann
|
||||
# 2016-10-07 PH added PCREGREP_MAX_BUFSIZE
|
||||
# 2017-03-11 PH turned HEAP_MATCH_RECURSE into a NO-OP for 10.30
|
||||
# 2017-04-08 PH added HEAP_LIMIT
|
||||
# 2017-06-15 ZH added SUPPORT_JIT_SEALLOC support
|
||||
# 2018-06-19 PH added checks for stdint.h and inttypes.h (later removed)
|
||||
# 2018-06-27 PH added Daniel's patch to increase the stack for MSVC
|
||||
# 2018-11-14 PH removed unnecessary checks for stdint.h and inttypes.h
|
||||
# 2018-11-16 PH added PCRE2GREP_SUPPORT_CALLOUT_FORK support and tidied
|
||||
# 2019-02-16 PH hacked to avoid CMP0026 policy issue (see comments below)
|
||||
# 2020-03-16 PH renamed dftables as pcre2_dftables (as elsewhere)
|
||||
# 2020-03-24 PH changed CMAKE_MODULE_PATH definition to add, not replace
|
||||
# 2020-04-08 Carlo added function check for secure_getenv, fixed strerror
|
||||
# 2020-04-16 enh added check for __attribute__((uninitialized))
|
||||
# 2020-04-25 PH applied patches from Uwe Korn to support pkg-config and
|
||||
# library versioning.
|
||||
# 2020-04-25 Carlo added function check for mkostemp used in ProtExecAllocator
|
||||
# 2020-04-28 PH added function check for memfd_create based on Carlo's patch
|
||||
# 2020-05-25 PH added a check for Intel CET
|
||||
# 2020-12-03 PH altered the definition of pcre2test as suggested by Daniel
|
||||
# 2021-06-29 JWSB added the option to build static library with PIC.
|
||||
# 2021-07-05 JWSB modified such both the static and shared library can be
|
||||
# build in one go.
|
||||
# 2021-08-28 PH increased minimum version
|
||||
# 2021-08-28 PH added test for realpath()
|
||||
# 2022-12-10 PH added support for pcre2posix_test
|
||||
# 2023-01-15 Carlo added C99 as the minimum required
|
||||
# 2023-08-06 PH added support for setting variable length lookbehind maximum
|
||||
|
||||
################################################################################
|
||||
# We have used `gersemi` for auto-formatting our CMake files.
|
||||
# Applied to all CMake files using:
|
||||
# > pip3 install gersemi
|
||||
# > gersemi --in-place --line-length 120 --indent 2 \
|
||||
# ./CMakeLists.txt ./cmake/*.cmake ./cmake/*.cmake.in
|
||||
################################################################################
|
||||
|
||||
# Increased minimum to 3.15 to allow use of string(REPEAT).
|
||||
cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
|
||||
project(PCRE2 C)
|
||||
set(CMAKE_C_STANDARD 99)
|
||||
set(CMAKE_C_STANDARD_REQUIRED TRUE)
|
||||
|
||||
set(CMAKE_C_VISIBILITY_PRESET hidden)
|
||||
cmake_policy(SET CMP0063 NEW)
|
||||
|
||||
# Set policy CMP0026 to avoid warnings for the use of LOCATION in
|
||||
# GET_TARGET_PROPERTY. This should no longer be required.
|
||||
# CMAKE_POLICY(SET CMP0026 OLD)
|
||||
|
||||
# With a recent cmake, you can provide a rootdir to look for non
|
||||
# standard installed library dependencies, but to do so, the policy
|
||||
# needs to be set to new (by uncommenting the following)
|
||||
# CMAKE_POLICY(SET CMP0074 NEW)
|
||||
|
||||
# For FindReadline.cmake. This was changed to allow setting CMAKE_MODULE_PATH
|
||||
# on the command line.
|
||||
# SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||
|
||||
include_directories(${PROJECT_SOURCE_DIR}/src)
|
||||
|
||||
# external packages
|
||||
find_package(BZip2)
|
||||
find_package(ZLIB)
|
||||
find_package(Readline)
|
||||
find_package(Editline)
|
||||
|
||||
# Configuration checks
|
||||
|
||||
include(CheckCSourceCompiles)
|
||||
include(CheckFunctionExists)
|
||||
include(CheckSymbolExists)
|
||||
include(CheckIncludeFile)
|
||||
include(CheckTypeSize)
|
||||
include(GNUInstallDirs) # for CMAKE_INSTALL_LIBDIR
|
||||
|
||||
check_include_file(assert.h HAVE_ASSERT_H)
|
||||
check_include_file(dirent.h HAVE_DIRENT_H)
|
||||
check_include_file(sys/stat.h HAVE_SYS_STAT_H)
|
||||
check_include_file(sys/types.h HAVE_SYS_TYPES_H)
|
||||
check_include_file(unistd.h HAVE_UNISTD_H)
|
||||
check_include_file(windows.h HAVE_WINDOWS_H)
|
||||
|
||||
check_symbol_exists(bcopy "strings.h" HAVE_BCOPY)
|
||||
check_symbol_exists(memfd_create "sys/mman.h" HAVE_MEMFD_CREATE)
|
||||
check_symbol_exists(memmove "string.h" HAVE_MEMMOVE)
|
||||
check_symbol_exists(secure_getenv "stdlib.h" HAVE_SECURE_GETENV)
|
||||
check_symbol_exists(strerror "string.h" HAVE_STRERROR)
|
||||
|
||||
check_c_source_compiles(
|
||||
[=[
|
||||
#include <stdlib.h>
|
||||
#include <limits.h>
|
||||
int main(int c, char *v[]) { char buf[PATH_MAX]; realpath(v[c], buf); return 0; }
|
||||
]=]
|
||||
HAVE_REALPATH
|
||||
)
|
||||
|
||||
set(ORIG_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
|
||||
if(NOT MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "XL")
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror")
|
||||
endif()
|
||||
|
||||
check_c_source_compiles(
|
||||
"int main(void) { char buf[128] __attribute__((uninitialized)); (void)buf; return 0; }"
|
||||
HAVE_ATTRIBUTE_UNINITIALIZED
|
||||
)
|
||||
|
||||
check_c_source_compiles(
|
||||
[=[
|
||||
extern __attribute__ ((visibility ("default"))) int f(void);
|
||||
int main(void) { return f(); }
|
||||
int f(void) { return 42; }
|
||||
]=]
|
||||
HAVE_VISIBILITY
|
||||
)
|
||||
|
||||
set(CMAKE_REQUIRED_FLAGS ${ORIG_CMAKE_REQUIRED_FLAGS})
|
||||
|
||||
check_c_source_compiles("int main(void) { __assume(1); return 0; }" HAVE_BUILTIN_ASSUME)
|
||||
|
||||
check_c_source_compiles(
|
||||
[=[
|
||||
#include <stddef.h>
|
||||
int main(void) { int a,b; size_t m; __builtin_mul_overflow(a,b,&m); return 0; }
|
||||
]=]
|
||||
HAVE_BUILTIN_MUL_OVERFLOW
|
||||
)
|
||||
|
||||
check_c_source_compiles(
|
||||
"int main(int c, char *v[]) { if (c) __builtin_unreachable(); return (int)(*v[0]); }"
|
||||
HAVE_BUILTIN_UNREACHABLE
|
||||
)
|
||||
|
||||
if(HAVE_VISIBILITY)
|
||||
set(PCRE2_EXPORT [=[__attribute__ ((visibility ("default")))]=])
|
||||
else()
|
||||
set(PCRE2_EXPORT)
|
||||
endif()
|
||||
|
||||
# Check whether Intel CET is enabled, and if so, adjust compiler flags. This
|
||||
# code was written by PH, trying to imitate the logic from the autotools
|
||||
# configuration.
|
||||
|
||||
check_c_source_compiles(
|
||||
[=[
|
||||
#ifndef __CET__
|
||||
#error CET is not enabled
|
||||
#endif
|
||||
int main() { return 0; }
|
||||
]=]
|
||||
INTEL_CET_ENABLED
|
||||
)
|
||||
|
||||
if(INTEL_CET_ENABLED)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mshstk")
|
||||
endif()
|
||||
|
||||
# User-configurable options
|
||||
#
|
||||
# Note: CMakeSetup displays these in alphabetical order, regardless of
|
||||
# the order we use here.
|
||||
|
||||
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries.")
|
||||
|
||||
option(BUILD_STATIC_LIBS "Build static libraries." ON)
|
||||
|
||||
option(PCRE2_BUILD_PCRE2_8 "Build 8 bit PCRE2 library" ON)
|
||||
|
||||
option(PCRE2_BUILD_PCRE2_16 "Build 16 bit PCRE2 library" OFF)
|
||||
|
||||
option(PCRE2_BUILD_PCRE2_32 "Build 32 bit PCRE2 library" OFF)
|
||||
|
||||
option(PCRE2_STATIC_PIC "Build the static library with the option position independent code enabled." OFF)
|
||||
|
||||
set(PCRE2_DEBUG "IfDebugBuild" CACHE STRING "Include debugging code")
|
||||
set_property(CACHE PCRE2_DEBUG PROPERTY STRINGS "IfDebugBuild" "ON" "OFF")
|
||||
|
||||
option(PCRE2_DISABLE_PERCENT_ZT "Disable the use of %zu and %td (rarely needed)" OFF)
|
||||
|
||||
set(
|
||||
PCRE2_EBCDIC
|
||||
OFF
|
||||
CACHE BOOL
|
||||
"Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems.)"
|
||||
)
|
||||
|
||||
set(PCRE2_EBCDIC_NL25 OFF CACHE BOOL "Use 0x25 as EBCDIC NL character instead of 0x15; implies EBCDIC.")
|
||||
|
||||
set(
|
||||
PCRE2_LINK_SIZE
|
||||
"2"
|
||||
CACHE STRING
|
||||
"Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details."
|
||||
)
|
||||
|
||||
set(
|
||||
PCRE2_PARENS_NEST_LIMIT
|
||||
"250"
|
||||
CACHE STRING
|
||||
"Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details."
|
||||
)
|
||||
|
||||
set(
|
||||
PCRE2_HEAP_LIMIT
|
||||
"20000000"
|
||||
CACHE STRING
|
||||
"Default limit on heap memory (kibibytes). See HEAP_LIMIT in config.h.in for details."
|
||||
)
|
||||
|
||||
set(PCRE2_MAX_VARLOOKBEHIND "255" CACHE STRING "Default limit on variable lookbehinds.")
|
||||
|
||||
set(
|
||||
PCRE2_MATCH_LIMIT
|
||||
"10000000"
|
||||
CACHE STRING
|
||||
"Default limit on internal looping. See MATCH_LIMIT in config.h.in for details."
|
||||
)
|
||||
|
||||
set(
|
||||
PCRE2_MATCH_LIMIT_DEPTH
|
||||
"MATCH_LIMIT"
|
||||
CACHE STRING
|
||||
"Default limit on internal depth of search. See MATCH_LIMIT_DEPTH in config.h.in for details."
|
||||
)
|
||||
|
||||
set(
|
||||
PCRE2GREP_BUFSIZE
|
||||
"20480"
|
||||
CACHE STRING
|
||||
"Buffer starting size parameter for pcre2grep. See PCRE2GREP_BUFSIZE in config.h.in for details."
|
||||
)
|
||||
|
||||
set(
|
||||
PCRE2GREP_MAX_BUFSIZE
|
||||
"1048576"
|
||||
CACHE STRING
|
||||
"Buffer maximum size parameter for pcre2grep. See PCRE2GREP_MAX_BUFSIZE in config.h.in for details."
|
||||
)
|
||||
|
||||
set(PCRE2_NEWLINE "LF" CACHE STRING "What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF, NUL).")
|
||||
|
||||
set(PCRE2_HEAP_MATCH_RECURSE OFF CACHE BOOL "Obsolete option: do not use")
|
||||
|
||||
set(PCRE2_SUPPORT_JIT OFF CACHE BOOL "Enable support for Just-in-time compiling.")
|
||||
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
|
||||
set(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL "Enable SELinux compatible execmem allocator in JIT (experimental).")
|
||||
else()
|
||||
set(PCRE2_SUPPORT_JIT_SEALLOC IGNORE)
|
||||
endif()
|
||||
|
||||
set(PCRE2GREP_SUPPORT_JIT ON CACHE BOOL "Enable use of Just-in-time compiling in pcre2grep.")
|
||||
|
||||
set(PCRE2GREP_SUPPORT_CALLOUT ON CACHE BOOL "Enable callout string support in pcre2grep.")
|
||||
|
||||
set(PCRE2GREP_SUPPORT_CALLOUT_FORK ON CACHE BOOL "Enable callout string fork support in pcre2grep.")
|
||||
|
||||
set(PCRE2_SUPPORT_UNICODE ON CACHE BOOL "Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.")
|
||||
|
||||
set(
|
||||
PCRE2_SUPPORT_BSR_ANYCRLF
|
||||
OFF
|
||||
CACHE BOOL
|
||||
"ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks"
|
||||
)
|
||||
|
||||
set(PCRE2_NEVER_BACKSLASH_C OFF CACHE BOOL "If ON, backslash-C (upper case C) is locked out.")
|
||||
|
||||
set(PCRE2_SUPPORT_VALGRIND OFF CACHE BOOL "Enable Valgrind support.")
|
||||
|
||||
option(PCRE2_SHOW_REPORT "Show the final configuration report" ON)
|
||||
option(PCRE2_BUILD_PCRE2GREP "Build pcre2grep" ON)
|
||||
option(PCRE2_BUILD_TESTS "Build the tests" ON)
|
||||
|
||||
set(
|
||||
PCRE2_INSTALL_CMAKEDIR
|
||||
"${CMAKE_INSTALL_LIBDIR}/cmake/pcre2"
|
||||
CACHE STRING
|
||||
"Path used during CMake install for placing PCRE2's CMake config files, relative to the installation root (prefix)"
|
||||
)
|
||||
|
||||
if(MINGW)
|
||||
option(
|
||||
NON_STANDARD_LIB_PREFIX
|
||||
"ON=Shared libraries built in mingw will be named pcre2.dll, etc., instead of libpcre2.dll, etc."
|
||||
OFF
|
||||
)
|
||||
|
||||
option(
|
||||
NON_STANDARD_LIB_SUFFIX
|
||||
"ON=Shared libraries built in mingw will be named libpcre2-0.dll, etc., instead of libpcre2.dll, etc."
|
||||
OFF
|
||||
)
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
option(PCRE2_STATIC_RUNTIME "ON=Compile against the static runtime (/MT)." OFF)
|
||||
option(INSTALL_MSVC_PDB "ON=Install .pdb files built by MSVC, if generated" OFF)
|
||||
endif()
|
||||
|
||||
# bzip2 lib
|
||||
if(BZIP2_FOUND)
|
||||
option(PCRE2_SUPPORT_LIBBZ2 "Enable support for linking pcre2grep with libbz2." ON)
|
||||
endif()
|
||||
if(PCRE2_SUPPORT_LIBBZ2)
|
||||
include_directories(${BZIP2_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
# zlib
|
||||
if(ZLIB_FOUND)
|
||||
option(PCRE2_SUPPORT_LIBZ "Enable support for linking pcre2grep with libz." ON)
|
||||
endif()
|
||||
if(PCRE2_SUPPORT_LIBZ)
|
||||
include_directories(${ZLIB_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
# editline lib
|
||||
if(EDITLINE_FOUND)
|
||||
option(PCRE2_SUPPORT_LIBEDIT "Enable support for linking pcre2test with libedit." OFF)
|
||||
endif()
|
||||
if(EDITLINE_FOUND)
|
||||
if(PCRE2_SUPPORT_LIBEDIT)
|
||||
include_directories(${EDITLINE_INCLUDE_DIR})
|
||||
endif()
|
||||
else()
|
||||
if(PCRE2_SUPPORT_LIBEDIT)
|
||||
message(
|
||||
FATAL_ERROR
|
||||
" libedit not found, set EDITLINE_INCLUDE_DIR to a compatible header\n"
|
||||
" or set Editline_ROOT to a full libedit installed tree, as needed\n"
|
||||
" Might need to enable policy CMP0074 in CMakeLists.txt"
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# readline lib
|
||||
if(READLINE_FOUND)
|
||||
option(PCRE2_SUPPORT_LIBREADLINE "Enable support for linking pcre2test with libreadline." ON)
|
||||
endif()
|
||||
if(PCRE2_SUPPORT_LIBREADLINE)
|
||||
include_directories(${READLINE_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
# Prepare build configuration
|
||||
|
||||
if(NOT BUILD_SHARED_LIBS AND NOT BUILD_STATIC_LIBS)
|
||||
message(FATAL_ERROR "At least one of BUILD_SHARED_LIBS or BUILD_STATIC_LIBS must be enabled.")
|
||||
endif()
|
||||
|
||||
if(NOT PCRE2_BUILD_PCRE2_8 AND NOT PCRE2_BUILD_PCRE2_16 AND NOT PCRE2_BUILD_PCRE2_32)
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"At least one of PCRE2_BUILD_PCRE2_8, PCRE2_BUILD_PCRE2_16 or PCRE2_BUILD_PCRE2_32 must be enabled"
|
||||
)
|
||||
endif()
|
||||
|
||||
if(PCRE2_BUILD_PCRE2_8)
|
||||
set(SUPPORT_PCRE2_8 1)
|
||||
endif()
|
||||
|
||||
if(PCRE2_BUILD_PCRE2_16)
|
||||
set(SUPPORT_PCRE2_16 1)
|
||||
endif()
|
||||
|
||||
if(PCRE2_BUILD_PCRE2_32)
|
||||
set(SUPPORT_PCRE2_32 1)
|
||||
endif()
|
||||
|
||||
if(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8)
|
||||
message(STATUS "** PCRE2_BUILD_PCRE2_8 must be enabled for the pcre2grep program")
|
||||
set(PCRE2_BUILD_PCRE2GREP OFF)
|
||||
endif()
|
||||
|
||||
if(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT)
|
||||
if(READLINE_FOUND)
|
||||
message(
|
||||
FATAL_ERROR
|
||||
" Only one of the readline compatible libraries can be enabled.\n"
|
||||
" Disable libreadline with -DPCRE2_SUPPORT_LIBREADLINE=OFF"
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(PCRE2_SUPPORT_BSR_ANYCRLF)
|
||||
set(BSR_ANYCRLF 1)
|
||||
endif()
|
||||
|
||||
if(PCRE2_NEVER_BACKSLASH_C)
|
||||
set(NEVER_BACKSLASH_C 1)
|
||||
endif()
|
||||
|
||||
if(PCRE2_SUPPORT_UNICODE)
|
||||
set(SUPPORT_UNICODE 1)
|
||||
endif()
|
||||
|
||||
if(PCRE2_SUPPORT_JIT)
|
||||
set(SUPPORT_JIT 1)
|
||||
if(UNIX)
|
||||
find_package(Threads REQUIRED)
|
||||
if(CMAKE_USE_PTHREADS_INIT)
|
||||
set(REQUIRE_PTHREAD 1)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(PCRE2_SUPPORT_JIT_SEALLOC)
|
||||
set(CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE)
|
||||
check_symbol_exists(mkostemp stdlib.h REQUIRED)
|
||||
unset(CMAKE_REQUIRED_DEFINITIONS)
|
||||
if(${REQUIRED})
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
|
||||
add_compile_definitions(_GNU_SOURCE)
|
||||
set(SLJIT_PROT_EXECUTABLE_ALLOCATOR 1)
|
||||
else()
|
||||
message(FATAL_ERROR "Your configuration is not supported")
|
||||
endif()
|
||||
else()
|
||||
set(PCRE2_SUPPORT_JIT_SEALLOC OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(PCRE2GREP_SUPPORT_JIT)
|
||||
set(SUPPORT_PCRE2GREP_JIT 1)
|
||||
endif()
|
||||
|
||||
if(PCRE2GREP_SUPPORT_CALLOUT)
|
||||
set(SUPPORT_PCRE2GREP_CALLOUT 1)
|
||||
if(PCRE2GREP_SUPPORT_CALLOUT_FORK)
|
||||
set(SUPPORT_PCRE2GREP_CALLOUT_FORK 1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(PCRE2_SUPPORT_VALGRIND)
|
||||
set(SUPPORT_VALGRIND 1)
|
||||
endif()
|
||||
|
||||
if(PCRE2_DISABLE_PERCENT_ZT)
|
||||
set(DISABLE_PERCENT_ZT 1)
|
||||
endif()
|
||||
|
||||
# This next one used to reference ${READLINE_LIBRARY})
|
||||
# but I was advised to add the NCURSES test as well, along with
|
||||
# some modifications to cmake/FindReadline.cmake which should
|
||||
# make it possible to override the default if necessary. PH
|
||||
|
||||
if(PCRE2_SUPPORT_LIBREADLINE)
|
||||
set(SUPPORT_LIBREADLINE 1)
|
||||
set(PCRE2TEST_LIBS ${READLINE_LIBRARY} ${NCURSES_LIBRARY})
|
||||
endif()
|
||||
|
||||
# libedit is a plug-compatible alternative to libreadline
|
||||
|
||||
if(PCRE2_SUPPORT_LIBEDIT)
|
||||
set(SUPPORT_LIBEDIT 1)
|
||||
set(PCRE2TEST_LIBS ${EDITLINE_LIBRARY})
|
||||
endif()
|
||||
|
||||
if(PCRE2_SUPPORT_LIBZ)
|
||||
set(SUPPORT_LIBZ 1)
|
||||
set(PCRE2GREP_LIBS ${PCRE2GREP_LIBS} ${ZLIB_LIBRARIES})
|
||||
endif()
|
||||
|
||||
if(PCRE2_SUPPORT_LIBBZ2)
|
||||
set(SUPPORT_LIBBZ2 1)
|
||||
set(PCRE2GREP_LIBS ${PCRE2GREP_LIBS} ${BZIP2_LIBRARIES})
|
||||
endif()
|
||||
|
||||
set(NEWLINE_DEFAULT "")
|
||||
|
||||
if(PCRE2_NEWLINE STREQUAL "CR")
|
||||
set(NEWLINE_DEFAULT "1")
|
||||
endif()
|
||||
if(PCRE2_NEWLINE STREQUAL "LF")
|
||||
set(NEWLINE_DEFAULT "2")
|
||||
endif()
|
||||
if(PCRE2_NEWLINE STREQUAL "CRLF")
|
||||
set(NEWLINE_DEFAULT "3")
|
||||
endif()
|
||||
if(PCRE2_NEWLINE STREQUAL "ANY")
|
||||
set(NEWLINE_DEFAULT "4")
|
||||
endif()
|
||||
if(PCRE2_NEWLINE STREQUAL "ANYCRLF")
|
||||
set(NEWLINE_DEFAULT "5")
|
||||
endif()
|
||||
if(PCRE2_NEWLINE STREQUAL "NUL")
|
||||
set(NEWLINE_DEFAULT "6")
|
||||
endif()
|
||||
|
||||
if(NEWLINE_DEFAULT STREQUAL "")
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"The PCRE2_NEWLINE variable must be set to one of the following values: \"LF\", \"CR\", \"CRLF\", \"ANY\", \"ANYCRLF\"."
|
||||
)
|
||||
endif()
|
||||
|
||||
if(PCRE2_EBCDIC)
|
||||
set(EBCDIC 1)
|
||||
endif()
|
||||
|
||||
if(PCRE2_EBCDIC_NL25)
|
||||
set(EBCDIC 1)
|
||||
set(EBCDIC_NL25 1)
|
||||
endif()
|
||||
|
||||
# Output files
|
||||
|
||||
configure_file(config-cmake.h.in ${PROJECT_BINARY_DIR}/config.h @ONLY)
|
||||
|
||||
# Parse version numbers and date out of configure.ac
|
||||
|
||||
file(
|
||||
STRINGS
|
||||
${PROJECT_SOURCE_DIR}/configure.ac
|
||||
configure_lines
|
||||
LIMIT_COUNT
|
||||
50 # Read only the first 50 lines of the file
|
||||
)
|
||||
|
||||
set(
|
||||
SEARCHED_VARIABLES
|
||||
"pcre2_major"
|
||||
"pcre2_minor"
|
||||
"pcre2_prerelease"
|
||||
"pcre2_date"
|
||||
"libpcre2_posix_version"
|
||||
"libpcre2_8_version"
|
||||
"libpcre2_16_version"
|
||||
"libpcre2_32_version"
|
||||
)
|
||||
foreach(configure_line ${configure_lines})
|
||||
foreach(substitution_variable ${SEARCHED_VARIABLES})
|
||||
string(TOUPPER ${substitution_variable} substitution_variable_upper)
|
||||
if(NOT ${substitution_variable_upper})
|
||||
string(REGEX MATCH "m4_define\\(${substitution_variable}, *\\[(.*)\\]" MATCHED_STRING ${configure_line})
|
||||
if(CMAKE_MATCH_1)
|
||||
set(${substitution_variable_upper} ${CMAKE_MATCH_1})
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
endforeach()
|
||||
|
||||
macro(PARSE_LIB_VERSION variable_prefix)
|
||||
string(REPLACE ":" ";" ${variable_prefix}_VERSION_LIST ${${variable_prefix}_VERSION})
|
||||
list(GET ${variable_prefix}_VERSION_LIST 0 ${variable_prefix}_VERSION_CURRENT)
|
||||
list(GET ${variable_prefix}_VERSION_LIST 1 ${variable_prefix}_VERSION_REVISION)
|
||||
list(GET ${variable_prefix}_VERSION_LIST 2 ${variable_prefix}_VERSION_AGE)
|
||||
|
||||
math(EXPR ${variable_prefix}_SOVERSION "${${variable_prefix}_VERSION_CURRENT} - ${${variable_prefix}_VERSION_AGE}")
|
||||
math(EXPR ${variable_prefix}_MACHO_COMPATIBILITY_VERSION "${${variable_prefix}_VERSION_CURRENT} + 1")
|
||||
math(EXPR ${variable_prefix}_MACHO_CURRENT_VERSION "${${variable_prefix}_VERSION_CURRENT} + 1")
|
||||
set(
|
||||
${variable_prefix}_MACHO_CURRENT_VERSION
|
||||
"${${variable_prefix}_MACHO_CURRENT_VERSION}.${${variable_prefix}_VERSION_REVISION}}"
|
||||
)
|
||||
set(
|
||||
${variable_prefix}_VERSION
|
||||
"${${variable_prefix}_SOVERSION}.${${variable_prefix}_VERSION_AGE}.${${variable_prefix}_VERSION_REVISION}"
|
||||
)
|
||||
endmacro()
|
||||
|
||||
parse_lib_version(LIBPCRE2_POSIX)
|
||||
parse_lib_version(LIBPCRE2_8)
|
||||
parse_lib_version(LIBPCRE2_16)
|
||||
parse_lib_version(LIBPCRE2_32)
|
||||
|
||||
configure_file(src/pcre2.h.in ${PROJECT_BINARY_DIR}/pcre2.h @ONLY)
|
||||
|
||||
# Make sure to not link debug libs
|
||||
# against release libs and vice versa
|
||||
if(WIN32)
|
||||
set(CMAKE_DEBUG_POSTFIX "d")
|
||||
endif()
|
||||
|
||||
# Character table generation
|
||||
|
||||
option(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)
|
||||
if(PCRE2_REBUILD_CHARTABLES)
|
||||
add_executable(pcre2_dftables src/pcre2_dftables.c)
|
||||
add_custom_command(
|
||||
OUTPUT ${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||
COMMAND pcre2_dftables
|
||||
ARGS ${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||
DEPENDS pcre2_dftables
|
||||
COMMENT "Generating character tables (pcre2_chartables.c) for current locale"
|
||||
VERBATIM
|
||||
)
|
||||
else()
|
||||
configure_file(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.dist ${PROJECT_BINARY_DIR}/pcre2_chartables.c COPYONLY)
|
||||
endif()
|
||||
|
||||
# Source code
|
||||
|
||||
set(PCRE2_HEADERS ${PROJECT_BINARY_DIR}/pcre2.h)
|
||||
|
||||
set(
|
||||
PCRE2_SOURCES
|
||||
src/pcre2_auto_possess.c
|
||||
${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||
src/pcre2_chkdint.c
|
||||
src/pcre2_compile.c
|
||||
src/pcre2_compile_class.c
|
||||
src/pcre2_config.c
|
||||
src/pcre2_context.c
|
||||
src/pcre2_convert.c
|
||||
src/pcre2_dfa_match.c
|
||||
src/pcre2_error.c
|
||||
src/pcre2_extuni.c
|
||||
src/pcre2_find_bracket.c
|
||||
src/pcre2_jit_compile.c
|
||||
src/pcre2_maketables.c
|
||||
src/pcre2_match.c
|
||||
src/pcre2_match_data.c
|
||||
src/pcre2_newline.c
|
||||
src/pcre2_ord2utf.c
|
||||
src/pcre2_pattern_info.c
|
||||
src/pcre2_script_run.c
|
||||
src/pcre2_serialize.c
|
||||
src/pcre2_string_utils.c
|
||||
src/pcre2_study.c
|
||||
src/pcre2_substitute.c
|
||||
src/pcre2_substring.c
|
||||
src/pcre2_tables.c
|
||||
src/pcre2_ucd.c
|
||||
src/pcre2_valid_utf.c
|
||||
src/pcre2_xclass.c
|
||||
)
|
||||
|
||||
set(PCRE2POSIX_HEADERS src/pcre2posix.h)
|
||||
set(PCRE2POSIX_SOURCES src/pcre2posix.c)
|
||||
|
||||
if(MINGW AND BUILD_SHARED_LIBS)
|
||||
if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
|
||||
add_custom_command(
|
||||
OUTPUT ${PROJECT_SOURCE_DIR}/pcre2.o PRE-LINK
|
||||
COMMAND windres
|
||||
ARGS pcre2.rc pcre2.o
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
COMMENT "Using pcre2 coff info in mingw build"
|
||||
)
|
||||
set(PCRE2_SOURCES ${PCRE2_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2.o)
|
||||
endif()
|
||||
|
||||
if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
|
||||
add_custom_command(
|
||||
OUTPUT ${PROJECT_SOURCE_DIR}/pcre2posix.o PRE-LINK
|
||||
COMMAND windres
|
||||
ARGS pcre2posix.rc pcre2posix.o
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
COMMENT "Using pcre2posix coff info in mingw build"
|
||||
)
|
||||
set(PCRE2POSIX_SOURCES ${PCRE2POSIX_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2posix.o)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(MSVC AND BUILD_SHARED_LIBS)
|
||||
if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
|
||||
set(PCRE2_SOURCES ${PCRE2_SOURCES} pcre2.rc)
|
||||
endif()
|
||||
|
||||
if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
|
||||
set(PCRE2POSIX_SOURCES ${PCRE2POSIX_SOURCES} pcre2posix.rc)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Fix static compilation with MSVC: https://bugs.exim.org/show_bug.cgi?id=1681
|
||||
# This code was taken from the CMake wiki, not from WebM.
|
||||
|
||||
if(MSVC AND PCRE2_STATIC_RUNTIME)
|
||||
message(STATUS "** MSVC and PCRE2_STATIC_RUNTIME: modifying compiler flags to use static runtime library")
|
||||
foreach(
|
||||
flag_var
|
||||
CMAKE_C_FLAGS
|
||||
CMAKE_C_FLAGS_DEBUG
|
||||
CMAKE_C_FLAGS_RELEASE
|
||||
CMAKE_C_FLAGS_MINSIZEREL
|
||||
CMAKE_C_FLAGS_RELWITHDEBINFO
|
||||
)
|
||||
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
# Build setup
|
||||
|
||||
add_compile_definitions(HAVE_CONFIG_H)
|
||||
|
||||
if(PCRE2_DEBUG STREQUAL "IfDebugBuild")
|
||||
add_compile_definitions("$<$<CONFIG:Debug>:PCRE2_DEBUG>")
|
||||
elseif(PCRE2_DEBUG)
|
||||
add_compile_definitions("PCRE2_DEBUG")
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
add_compile_definitions(_CRT_SECURE_NO_DEPRECATE _CRT_SECURE_NO_WARNINGS)
|
||||
endif()
|
||||
|
||||
set(CMAKE_INCLUDE_CURRENT_DIR 1)
|
||||
|
||||
set(TARGETS)
|
||||
|
||||
# 8-bit library
|
||||
|
||||
if(PCRE2_BUILD_PCRE2_8)
|
||||
if(BUILD_STATIC_LIBS)
|
||||
add_library(pcre2-8-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
set_target_properties(
|
||||
pcre2-8-static
|
||||
PROPERTIES
|
||||
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
|
||||
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}"
|
||||
MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}"
|
||||
VERSION ${LIBPCRE2_8_VERSION}
|
||||
SOVERSION ${LIBPCRE2_8_SOVERSION}
|
||||
)
|
||||
target_compile_definitions(pcre2-8-static PUBLIC PCRE2_STATIC)
|
||||
target_include_directories(pcre2-8-static PUBLIC ${PROJECT_BINARY_DIR})
|
||||
if(REQUIRE_PTHREAD)
|
||||
target_link_libraries(pcre2-8-static Threads::Threads)
|
||||
endif()
|
||||
set(TARGETS ${TARGETS} pcre2-8-static)
|
||||
add_library(pcre2-posix-static STATIC ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
|
||||
set_target_properties(
|
||||
pcre2-posix-static
|
||||
PROPERTIES
|
||||
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
|
||||
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}"
|
||||
MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}"
|
||||
VERSION ${LIBPCRE2_POSIX_VERSION}
|
||||
SOVERSION ${LIBPCRE2_POSIX_SOVERSION}
|
||||
)
|
||||
target_link_libraries(pcre2-posix-static pcre2-8-static)
|
||||
target_include_directories(pcre2-posix-static PUBLIC ${PROJECT_SOURCE_DIR}/src)
|
||||
set(TARGETS ${TARGETS} pcre2-posix-static)
|
||||
|
||||
if(MSVC)
|
||||
set_target_properties(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8-static)
|
||||
set_target_properties(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix-static)
|
||||
else()
|
||||
set_target_properties(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8)
|
||||
set_target_properties(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix)
|
||||
endif()
|
||||
if(PCRE2_STATIC_PIC)
|
||||
set_target_properties(pcre2-8-static pcre2-posix-static PROPERTIES POSITION_INDEPENDENT_CODE 1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(BUILD_SHARED_LIBS)
|
||||
add_library(pcre2-8-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
target_include_directories(pcre2-8-shared PUBLIC ${PROJECT_BINARY_DIR})
|
||||
set_target_properties(
|
||||
pcre2-8-shared
|
||||
PROPERTIES
|
||||
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
|
||||
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}"
|
||||
MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}"
|
||||
VERSION ${LIBPCRE2_8_VERSION}
|
||||
SOVERSION ${LIBPCRE2_8_SOVERSION}
|
||||
OUTPUT_NAME pcre2-8
|
||||
)
|
||||
if(REQUIRE_PTHREAD)
|
||||
target_link_libraries(pcre2-8-shared Threads::Threads)
|
||||
endif()
|
||||
set(TARGETS ${TARGETS} pcre2-8-shared)
|
||||
set(DLL_PDB_FILES $<TARGET_PDB_FILE_DIR:pcre2-8-shared>/pcre2-8.pdb ${DLL_PDB_FILES})
|
||||
set(DLL_PDB_DEBUG_FILES $<TARGET_PDB_FILE_DIR:pcre2-8-shared>/pcre2-8d.pdb ${DLL_PDB_DEBUG_FILES})
|
||||
|
||||
add_library(pcre2-posix-shared SHARED ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
|
||||
target_include_directories(pcre2-posix-shared PUBLIC ${PROJECT_SOURCE_DIR}/src)
|
||||
set_target_properties(
|
||||
pcre2-posix-shared
|
||||
PROPERTIES
|
||||
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
|
||||
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}"
|
||||
MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}"
|
||||
VERSION ${LIBPCRE2_POSIX_VERSION}
|
||||
SOVERSION ${LIBPCRE2_POSIX_SOVERSION}
|
||||
OUTPUT_NAME pcre2-posix
|
||||
)
|
||||
set(PCRE2POSIX_CFLAG "-DPCRE2POSIX_SHARED")
|
||||
target_compile_definitions(pcre2-posix-shared PUBLIC ${PCRE2POSIX_CFLAG})
|
||||
target_link_libraries(pcre2-posix-shared pcre2-8-shared)
|
||||
set(TARGETS ${TARGETS} pcre2-posix-shared)
|
||||
set(DLL_PDB_FILES $<TARGET_PDB_FILE_DIR:pcre2-posix-shared>/pcre2-posix.pdb ${DLL_PDB_FILES})
|
||||
set(DLL_PDB_DEBUG_FILES $<TARGET_PDB_FILE_DIR:pcre2-posix-shared>/pcre2-posixd.pdb ${DLL_PDB_DEBUG_FILES})
|
||||
|
||||
if(MINGW)
|
||||
if(NON_STANDARD_LIB_PREFIX)
|
||||
set_target_properties(pcre2-8-shared pcre2-posix-shared PROPERTIES PREFIX "")
|
||||
endif()
|
||||
if(NON_STANDARD_LIB_SUFFIX)
|
||||
set_target_properties(pcre2-8-shared pcre2-posix-shared PROPERTIES SUFFIX "-0.dll")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(BUILD_STATIC_LIBS)
|
||||
add_library(pcre2-8 ALIAS pcre2-8-static)
|
||||
add_library(pcre2-posix ALIAS pcre2-posix-static)
|
||||
else()
|
||||
add_library(pcre2-8 ALIAS pcre2-8-shared)
|
||||
add_library(pcre2-posix ALIAS pcre2-posix-shared)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# 16-bit library
|
||||
|
||||
if(PCRE2_BUILD_PCRE2_16)
|
||||
if(BUILD_STATIC_LIBS)
|
||||
add_library(pcre2-16-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
target_include_directories(pcre2-16-static PUBLIC ${PROJECT_BINARY_DIR})
|
||||
set_target_properties(
|
||||
pcre2-16-static
|
||||
PROPERTIES
|
||||
UNITY_BUILD OFF
|
||||
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16
|
||||
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
|
||||
MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
|
||||
VERSION ${LIBPCRE2_16_VERSION}
|
||||
SOVERSION ${LIBPCRE2_16_SOVERSION}
|
||||
)
|
||||
target_compile_definitions(pcre2-16-static PUBLIC PCRE2_STATIC)
|
||||
if(REQUIRE_PTHREAD)
|
||||
target_link_libraries(pcre2-16-static Threads::Threads)
|
||||
endif()
|
||||
set(TARGETS ${TARGETS} pcre2-16-static)
|
||||
|
||||
if(MSVC)
|
||||
set_target_properties(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16-static)
|
||||
else()
|
||||
set_target_properties(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16)
|
||||
endif()
|
||||
if(PCRE2_STATIC_PIC)
|
||||
set_target_properties(pcre2-16-static PROPERTIES POSITION_INDEPENDENT_CODE 1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(BUILD_SHARED_LIBS)
|
||||
add_library(pcre2-16-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
target_include_directories(pcre2-16-shared PUBLIC ${PROJECT_BINARY_DIR})
|
||||
set_target_properties(
|
||||
pcre2-16-shared
|
||||
PROPERTIES
|
||||
UNITY_BUILD OFF
|
||||
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16
|
||||
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
|
||||
MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
|
||||
VERSION ${LIBPCRE2_16_VERSION}
|
||||
SOVERSION ${LIBPCRE2_16_SOVERSION}
|
||||
OUTPUT_NAME pcre2-16
|
||||
)
|
||||
if(REQUIRE_PTHREAD)
|
||||
target_link_libraries(pcre2-16-shared Threads::Threads)
|
||||
endif()
|
||||
set(TARGETS ${TARGETS} pcre2-16-shared)
|
||||
set(DLL_PDB_FILES $<TARGET_PDB_FILE_DIR:pcre2-16-shared>/pcre2-16.pdb ${DLL_PDB_FILES})
|
||||
set(DLL_PDB_DEBUG_FILES $<TARGET_PDB_FILE_DIR:pcre2-16-shared>/pcre2-16d.pdb ${DLL_PDB_DEBUG_FILES})
|
||||
|
||||
if(MINGW)
|
||||
if(NON_STANDARD_LIB_PREFIX)
|
||||
set_target_properties(pcre2-16-shared PROPERTIES PREFIX "")
|
||||
endif()
|
||||
if(NON_STANDARD_LIB_SUFFIX)
|
||||
set_target_properties(pcre2-16-shared PROPERTIES SUFFIX "-0.dll")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(BUILD_STATIC_LIBS)
|
||||
add_library(pcre2-16 ALIAS pcre2-16-static)
|
||||
else()
|
||||
add_library(pcre2-16 ALIAS pcre2-16-shared)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# 32-bit library
|
||||
|
||||
if(PCRE2_BUILD_PCRE2_32)
|
||||
if(BUILD_STATIC_LIBS)
|
||||
add_library(pcre2-32-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
target_include_directories(pcre2-32-static PUBLIC ${PROJECT_BINARY_DIR})
|
||||
set_target_properties(
|
||||
pcre2-32-static
|
||||
PROPERTIES
|
||||
UNITY_BUILD OFF
|
||||
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32
|
||||
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
|
||||
MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
|
||||
VERSION ${LIBPCRE2_32_VERSION}
|
||||
SOVERSION ${LIBPCRE2_32_SOVERSION}
|
||||
)
|
||||
target_compile_definitions(pcre2-32-static PUBLIC PCRE2_STATIC)
|
||||
if(REQUIRE_PTHREAD)
|
||||
target_link_libraries(pcre2-32-static Threads::Threads)
|
||||
endif()
|
||||
set(TARGETS ${TARGETS} pcre2-32-static)
|
||||
|
||||
if(MSVC)
|
||||
set_target_properties(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32-static)
|
||||
else()
|
||||
set_target_properties(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32)
|
||||
endif()
|
||||
if(PCRE2_STATIC_PIC)
|
||||
set_target_properties(pcre2-32-static PROPERTIES POSITION_INDEPENDENT_CODE 1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(BUILD_SHARED_LIBS)
|
||||
add_library(pcre2-32-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
target_include_directories(pcre2-32-shared PUBLIC ${PROJECT_BINARY_DIR})
|
||||
set_target_properties(
|
||||
pcre2-32-shared
|
||||
PROPERTIES
|
||||
UNITY_BUILD OFF
|
||||
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32
|
||||
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
|
||||
MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
|
||||
VERSION ${LIBPCRE2_32_VERSION}
|
||||
SOVERSION ${LIBPCRE2_32_SOVERSION}
|
||||
OUTPUT_NAME pcre2-32
|
||||
)
|
||||
if(REQUIRE_PTHREAD)
|
||||
target_link_libraries(pcre2-32-shared Threads::Threads)
|
||||
endif()
|
||||
set(TARGETS ${TARGETS} pcre2-32-shared)
|
||||
set(DLL_PDB_FILES $<TARGET_PDB_FILE_DIR:pcre2-32-shared>/pcre2-32.pdb ${DLL_PDB_FILES})
|
||||
set(DLL_PDB_DEBUG_FILES $<TARGET_PDB_FILE_DIR:pcre2-32-shared>/pcre2-32d.pdb ${DLL_PDB_DEBUG_FILES})
|
||||
|
||||
if(MINGW)
|
||||
if(NON_STANDARD_LIB_PREFIX)
|
||||
set_target_properties(pcre2-32-shared PROPERTIES PREFIX "")
|
||||
endif()
|
||||
if(NON_STANDARD_LIB_SUFFIX)
|
||||
set_target_properties(pcre2-32-shared PROPERTIES SUFFIX "-0.dll")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(BUILD_STATIC_LIBS)
|
||||
add_library(pcre2-32 ALIAS pcre2-32-static)
|
||||
else()
|
||||
add_library(pcre2-32 ALIAS pcre2-32-shared)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Generate pkg-config files
|
||||
|
||||
set(PACKAGE_VERSION "${PCRE2_MAJOR}.${PCRE2_MINOR}")
|
||||
set(prefix ${CMAKE_INSTALL_PREFIX})
|
||||
set(exec_prefix "\${prefix}")
|
||||
set(libdir "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}")
|
||||
set(includedir "\${prefix}/include")
|
||||
if(WIN32 AND (CMAKE_BUILD_TYPE MATCHES Debug))
|
||||
set(LIB_POSTFIX ${CMAKE_DEBUG_POSTFIX})
|
||||
endif()
|
||||
|
||||
if(PCRE2_BUILD_PCRE2_8)
|
||||
configure_file(libpcre2-posix.pc.in libpcre2-posix.pc @ONLY)
|
||||
list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-posix.pc")
|
||||
configure_file(libpcre2-8.pc.in libpcre2-8.pc @ONLY)
|
||||
list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-8.pc")
|
||||
set(enable_pcre2_8 "yes")
|
||||
else()
|
||||
set(enable_pcre2_8 "no")
|
||||
endif()
|
||||
|
||||
if(PCRE2_BUILD_PCRE2_16)
|
||||
configure_file(libpcre2-16.pc.in libpcre2-16.pc @ONLY)
|
||||
list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-16.pc")
|
||||
set(enable_pcre2_16 "yes")
|
||||
else()
|
||||
set(enable_pcre2_16 "no")
|
||||
endif()
|
||||
|
||||
if(PCRE2_BUILD_PCRE2_32)
|
||||
configure_file(libpcre2-32.pc.in libpcre2-32.pc @ONLY)
|
||||
list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-32.pc")
|
||||
set(enable_pcre2_32 "yes")
|
||||
else()
|
||||
set(enable_pcre2_32 "no")
|
||||
endif()
|
||||
|
||||
configure_file(pcre2-config.in pcre2-config @ONLY NEWLINE_STYLE LF)
|
||||
|
||||
# Executables
|
||||
|
||||
if(PCRE2_BUILD_PCRE2GREP)
|
||||
add_executable(pcre2grep src/pcre2grep.c)
|
||||
set_property(TARGET pcre2grep PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
||||
set(TARGETS ${TARGETS} pcre2grep)
|
||||
target_link_libraries(pcre2grep pcre2-posix ${PCRE2GREP_LIBS})
|
||||
endif()
|
||||
|
||||
# Testing
|
||||
|
||||
if(PCRE2_BUILD_TESTS)
|
||||
enable_testing()
|
||||
|
||||
set(PCRE2TEST_SOURCES src/pcre2test.c)
|
||||
|
||||
if(MSVC)
|
||||
# This is needed to avoid a stack overflow error in the standard tests. The
|
||||
# flag should be indicated with a forward-slash instead of a hyphen, but
|
||||
# then CMake treats it as a file path.
|
||||
set(PCRE2TEST_LINKER_FLAGS -STACK:2500000)
|
||||
endif()
|
||||
|
||||
add_executable(pcre2test ${PCRE2TEST_SOURCES})
|
||||
set(TARGETS ${TARGETS} pcre2test)
|
||||
if(PCRE2_BUILD_PCRE2_8)
|
||||
list(APPEND PCRE2TEST_LIBS pcre2-posix pcre2-8)
|
||||
endif()
|
||||
if(PCRE2_BUILD_PCRE2_16)
|
||||
list(APPEND PCRE2TEST_LIBS pcre2-16)
|
||||
endif()
|
||||
if(PCRE2_BUILD_PCRE2_32)
|
||||
list(APPEND PCRE2TEST_LIBS pcre2-32)
|
||||
endif()
|
||||
target_link_libraries(pcre2test ${PCRE2TEST_LIBS} ${PCRE2TEST_LINKER_FLAGS})
|
||||
|
||||
if(PCRE2_BUILD_PCRE2_8)
|
||||
add_executable(pcre2posix_test src/pcre2posix_test.c)
|
||||
target_link_libraries(pcre2posix_test pcre2-posix pcre2-8)
|
||||
endif()
|
||||
|
||||
if(PCRE2_SUPPORT_JIT)
|
||||
add_executable(pcre2_jit_test src/pcre2_jit_test.c)
|
||||
set(PCRE2_JIT_TEST_LIBS)
|
||||
if(PCRE2_BUILD_PCRE2_8)
|
||||
list(APPEND PCRE2_JIT_TEST_LIBS pcre2-8)
|
||||
endif()
|
||||
if(PCRE2_BUILD_PCRE2_16)
|
||||
list(APPEND PCRE2_JIT_TEST_LIBS pcre2-16)
|
||||
endif()
|
||||
if(PCRE2_BUILD_PCRE2_32)
|
||||
list(APPEND PCRE2_JIT_TEST_LIBS pcre2-32)
|
||||
endif()
|
||||
target_link_libraries(pcre2_jit_test ${PCRE2_JIT_TEST_LIBS})
|
||||
endif()
|
||||
|
||||
# =================================================
|
||||
# Write out a CTest configuration file
|
||||
#
|
||||
file(
|
||||
WRITE
|
||||
${PROJECT_BINARY_DIR}/CTestCustom.ctest
|
||||
"# This is a generated file.
|
||||
MESSAGE(\"When testing is complete, review test output in the
|
||||
\\\"${PROJECT_BINARY_DIR}/Testing/Temporary\\\" folder.\")
|
||||
MESSAGE(\" \")
|
||||
"
|
||||
)
|
||||
|
||||
file(
|
||||
WRITE
|
||||
${PROJECT_BINARY_DIR}/pcre2_test.sh
|
||||
"#! /bin/sh
|
||||
# This is a generated file.
|
||||
srcdir=${PROJECT_SOURCE_DIR}
|
||||
pcre2test=${PROJECT_BINARY_DIR}/pcre2test
|
||||
test -z \"$CMAKE_CONFIG_TYPE\" || pcre2test=${PROJECT_BINARY_DIR}/$CMAKE_CONFIG_TYPE/pcre2test
|
||||
. ${PROJECT_SOURCE_DIR}/RunTest
|
||||
if test \"$?\" != \"0\"; then exit 1; fi
|
||||
# End
|
||||
"
|
||||
)
|
||||
|
||||
if(UNIX)
|
||||
add_test(pcre2_test sh ${PROJECT_BINARY_DIR}/pcre2_test.sh)
|
||||
endif()
|
||||
|
||||
if(PCRE2_BUILD_PCRE2GREP)
|
||||
file(
|
||||
WRITE
|
||||
${PROJECT_BINARY_DIR}/pcre2_grep_test.sh
|
||||
"#! /bin/sh
|
||||
# This is a generated file.
|
||||
srcdir=${PROJECT_SOURCE_DIR}
|
||||
pcre2grep=${PROJECT_BINARY_DIR}/pcre2grep
|
||||
test -z \"$CMAKE_CONFIG_TYPE\" || pcre2grep=${PROJECT_BINARY_DIR}/$CMAKE_CONFIG_TYPE/pcre2grep
|
||||
pcre2test=${PROJECT_BINARY_DIR}/pcre2test
|
||||
test -z \"$CMAKE_CONFIG_TYPE\" || pcre2test=${PROJECT_BINARY_DIR}/$CMAKE_CONFIG_TYPE/pcre2test
|
||||
. ${PROJECT_SOURCE_DIR}/RunGrepTest
|
||||
if test \"$?\" != \"0\"; then exit 1; fi
|
||||
# End
|
||||
"
|
||||
)
|
||||
|
||||
if(UNIX)
|
||||
add_test(pcre2_grep_test sh ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
# Provide environment for executing the bat file version of RunTest
|
||||
file(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} winsrc)
|
||||
file(TO_NATIVE_PATH ${PROJECT_BINARY_DIR} winbin)
|
||||
|
||||
file(
|
||||
WRITE
|
||||
${PROJECT_BINARY_DIR}/pcre2_test.bat
|
||||
"\@REM This is a generated file.
|
||||
\@echo off
|
||||
setlocal
|
||||
SET srcdir=\"${winsrc}\"
|
||||
SET pcre2test=\"${winbin}\\pcre2test.exe\"
|
||||
if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2test=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2test.exe\"
|
||||
call %srcdir%\\RunTest.bat
|
||||
if errorlevel 1 exit /b 1
|
||||
echo RunTest.bat tests successfully completed
|
||||
"
|
||||
)
|
||||
|
||||
add_test(NAME pcre2_test_bat COMMAND pcre2_test.bat)
|
||||
set_tests_properties(pcre2_test_bat PROPERTIES PASS_REGULAR_EXPRESSION "RunTest\\.bat tests successfully completed")
|
||||
|
||||
if(PCRE2_BUILD_PCRE2GREP)
|
||||
file(
|
||||
WRITE
|
||||
${PROJECT_BINARY_DIR}/pcre2_grep_test.bat
|
||||
"\@REM This is a generated file.
|
||||
\@echo off
|
||||
setlocal
|
||||
SET srcdir=\"${winsrc}\"
|
||||
SET pcre2test=\"${winbin}\\pcre2test.exe\"
|
||||
if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2test=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2test.exe\"
|
||||
SET pcre2grep=\"${winbin}\\pcre2grep.exe\"
|
||||
if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2grep=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2grep.exe\"
|
||||
call %srcdir%\\RunGrepTest.bat
|
||||
if errorlevel 1 exit /b 1
|
||||
echo RunGrepTest.bat tests successfully completed
|
||||
"
|
||||
)
|
||||
|
||||
add_test(NAME pcre2_grep_test_bat COMMAND pcre2_grep_test.bat)
|
||||
set_tests_properties(
|
||||
pcre2_grep_test_bat
|
||||
PROPERTIES PASS_REGULAR_EXPRESSION "RunGrepTest\\.bat tests successfully completed"
|
||||
)
|
||||
endif()
|
||||
|
||||
if("$ENV{OSTYPE}" STREQUAL "msys")
|
||||
# Both the sh and bat file versions of RunTest are run if make test is used
|
||||
# in msys
|
||||
add_test(pcre2_test_sh sh.exe ${PROJECT_BINARY_DIR}/pcre2_test.sh)
|
||||
if(PCRE2_BUILD_PCRE2GREP)
|
||||
add_test(pcre2_grep_test sh.exe ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Changed to accommodate testing whichever location was just built
|
||||
|
||||
if(PCRE2_SUPPORT_JIT)
|
||||
add_test(pcre2_jit_test pcre2_jit_test)
|
||||
endif()
|
||||
|
||||
if(PCRE2_BUILD_PCRE2_8)
|
||||
add_test(pcre2posix_test pcre2posix_test)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Installation
|
||||
|
||||
set(CMAKE_INSTALL_ALWAYS 1)
|
||||
|
||||
install(
|
||||
TARGETS ${TARGETS}
|
||||
RUNTIME DESTINATION bin
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
)
|
||||
install(FILES ${pkg_config_files} DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
|
||||
install(
|
||||
FILES "${CMAKE_CURRENT_BINARY_DIR}/pcre2-config"
|
||||
DESTINATION bin
|
||||
# Set 0755 permissions
|
||||
PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE
|
||||
)
|
||||
|
||||
install(FILES ${PCRE2_HEADERS} ${PCRE2POSIX_HEADERS} DESTINATION include)
|
||||
|
||||
# CMake config files.
|
||||
set(PCRE2_CONFIG_IN ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config.cmake.in)
|
||||
set(PCRE2_CONFIG_OUT ${CMAKE_CURRENT_BINARY_DIR}/cmake/pcre2-config.cmake)
|
||||
configure_file(${PCRE2_CONFIG_IN} ${PCRE2_CONFIG_OUT} @ONLY)
|
||||
set(PCRE2_CONFIG_VERSION_IN ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config-version.cmake.in)
|
||||
set(PCRE2_CONFIG_VERSION_OUT ${CMAKE_CURRENT_BINARY_DIR}/cmake/pcre2-config-version.cmake)
|
||||
configure_file(${PCRE2_CONFIG_VERSION_IN} ${PCRE2_CONFIG_VERSION_OUT} @ONLY)
|
||||
install(FILES ${PCRE2_CONFIG_OUT} ${PCRE2_CONFIG_VERSION_OUT} DESTINATION "${PCRE2_INSTALL_CMAKEDIR}")
|
||||
|
||||
file(GLOB html ${PROJECT_SOURCE_DIR}/doc/html/*.html ${PROJECT_SOURCE_DIR}/doc/html/*.txt)
|
||||
file(
|
||||
GLOB txts
|
||||
${PROJECT_SOURCE_DIR}/doc/*.txt
|
||||
AUTHORS.md
|
||||
COPYING
|
||||
ChangeLog
|
||||
LICENCE.md
|
||||
NEWS
|
||||
README
|
||||
SECURITY.md
|
||||
)
|
||||
file(GLOB man1 ${PROJECT_SOURCE_DIR}/doc/*.1)
|
||||
file(GLOB man3 ${PROJECT_SOURCE_DIR}/doc/*.3)
|
||||
|
||||
install(FILES ${man1} DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
|
||||
install(FILES ${man3} DESTINATION ${CMAKE_INSTALL_MANDIR}/man3)
|
||||
install(FILES ${txts} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/doc/pcre2)
|
||||
install(FILES ${html} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/doc/pcre2/html)
|
||||
|
||||
if(MSVC AND INSTALL_MSVC_PDB)
|
||||
install(FILES ${DLL_PDB_FILES} DESTINATION bin CONFIGURATIONS RelWithDebInfo)
|
||||
install(FILES ${DLL_PDB_DEBUG_FILES} DESTINATION bin CONFIGURATIONS Debug)
|
||||
endif()
|
||||
|
||||
# Help, only for nice output
|
||||
if(BUILD_STATIC_LIBS)
|
||||
set(BUILD_STATIC_LIBS ON)
|
||||
else()
|
||||
set(BUILD_STATIC_LIBS OFF)
|
||||
endif()
|
||||
|
||||
if(PCRE2_HEAP_MATCH_RECURSE)
|
||||
message(WARNING "HEAP_MATCH_RECURSE is obsolete and does nothing.")
|
||||
endif()
|
||||
|
||||
if(PCRE2_SHOW_REPORT)
|
||||
message(STATUS "")
|
||||
message(STATUS "")
|
||||
message(STATUS "PCRE2-${PCRE2_MAJOR}.${PCRE2_MINOR} configuration summary:")
|
||||
message(STATUS "")
|
||||
message(STATUS " Install prefix .................... : ${CMAKE_INSTALL_PREFIX}")
|
||||
message(STATUS " C compiler ........................ : ${CMAKE_C_COMPILER}")
|
||||
|
||||
if(CMAKE_C_FLAGS)
|
||||
set(CFSP " ")
|
||||
endif()
|
||||
if(CMAKE_CONFIGURATION_TYPES)
|
||||
foreach(config IN LISTS CMAKE_CONFIGURATION_TYPES)
|
||||
string(TOUPPER "${config}" buildtype)
|
||||
string(LENGTH " (${config})" buildtypelen)
|
||||
math(EXPR dotslen "18 - ${buildtypelen}")
|
||||
string(REPEAT "." ${dotslen} dots)
|
||||
message(STATUS " C compiler flags (${config}) ${dots} : ${CMAKE_C_FLAGS}${CFSP}${CMAKE_C_FLAGS_${buildtype}}")
|
||||
endforeach()
|
||||
else()
|
||||
string(TOUPPER "${CMAKE_BUILD_TYPE}" buildtype)
|
||||
message(STATUS " C compiler flags .................. : ${CMAKE_C_FLAGS}${CFSP}${CMAKE_C_FLAGS_${buildtype}}")
|
||||
endif()
|
||||
|
||||
message(STATUS "")
|
||||
if(CMAKE_CONFIGURATION_TYPES)
|
||||
message(STATUS " Build configurations .............. : ${CMAKE_CONFIGURATION_TYPES}")
|
||||
else()
|
||||
message(STATUS " Build type ........................ : ${CMAKE_BUILD_TYPE}")
|
||||
endif()
|
||||
message(STATUS " Build 8 bit PCRE2 library ......... : ${PCRE2_BUILD_PCRE2_8}")
|
||||
message(STATUS " Build 16 bit PCRE2 library ........ : ${PCRE2_BUILD_PCRE2_16}")
|
||||
message(STATUS " Build 32 bit PCRE2 library ........ : ${PCRE2_BUILD_PCRE2_32}")
|
||||
message(STATUS " Include debugging code ............ : ${PCRE2_DEBUG}")
|
||||
message(STATUS " Enable JIT compiling support ...... : ${PCRE2_SUPPORT_JIT}")
|
||||
message(STATUS " Use SELinux allocator in JIT ...... : ${PCRE2_SUPPORT_JIT_SEALLOC}")
|
||||
message(STATUS " Enable Unicode support ............ : ${PCRE2_SUPPORT_UNICODE}")
|
||||
message(STATUS " Newline char/sequence ............. : ${PCRE2_NEWLINE}")
|
||||
message(STATUS " \\R matches only ANYCRLF ........... : ${PCRE2_SUPPORT_BSR_ANYCRLF}")
|
||||
message(STATUS " \\C is disabled .................... : ${PCRE2_NEVER_BACKSLASH_C}")
|
||||
message(STATUS " EBCDIC coding ..................... : ${PCRE2_EBCDIC}")
|
||||
message(STATUS " EBCDIC coding with NL=0x25 ........ : ${PCRE2_EBCDIC_NL25}")
|
||||
message(STATUS " Rebuild char tables ............... : ${PCRE2_REBUILD_CHARTABLES}")
|
||||
message(STATUS " Internal link size ................ : ${PCRE2_LINK_SIZE}")
|
||||
message(STATUS " Maximum variable lookbehind ....... : ${PCRE2_MAX_VARLOOKBEHIND}")
|
||||
message(STATUS " Parentheses nest limit ............ : ${PCRE2_PARENS_NEST_LIMIT}")
|
||||
message(STATUS " Heap limit ........................ : ${PCRE2_HEAP_LIMIT}")
|
||||
message(STATUS " Match limit ....................... : ${PCRE2_MATCH_LIMIT}")
|
||||
message(STATUS " Match depth limit ................. : ${PCRE2_MATCH_LIMIT_DEPTH}")
|
||||
message(STATUS " Build shared libs ................. : ${BUILD_SHARED_LIBS}")
|
||||
message(STATUS " Build static libs ................. : ${BUILD_STATIC_LIBS}")
|
||||
message(STATUS " with PIC enabled ............... : ${PCRE2_STATIC_PIC}")
|
||||
message(STATUS " Build pcre2grep ................... : ${PCRE2_BUILD_PCRE2GREP}")
|
||||
message(STATUS " Enable JIT in pcre2grep ........... : ${PCRE2GREP_SUPPORT_JIT}")
|
||||
message(STATUS " Enable callouts in pcre2grep ...... : ${PCRE2GREP_SUPPORT_CALLOUT}")
|
||||
message(STATUS " Enable callout fork in pcre2grep .. : ${PCRE2GREP_SUPPORT_CALLOUT_FORK}")
|
||||
message(STATUS " Buffer size for pcre2grep ......... : ${PCRE2GREP_BUFSIZE}")
|
||||
message(STATUS " Build tests (implies pcre2test .... : ${PCRE2_BUILD_TESTS}")
|
||||
message(STATUS " and pcre2grep)")
|
||||
if(ZLIB_FOUND)
|
||||
message(STATUS " Link pcre2grep with libz .......... : ${PCRE2_SUPPORT_LIBZ}")
|
||||
else()
|
||||
message(STATUS " Link pcre2grep with libz .......... : Library not found")
|
||||
endif()
|
||||
if(BZIP2_FOUND)
|
||||
message(STATUS " Link pcre2grep with libbz2 ........ : ${PCRE2_SUPPORT_LIBBZ2}")
|
||||
else()
|
||||
message(STATUS " Link pcre2grep with libbz2 ........ : Library not found")
|
||||
endif()
|
||||
if(EDITLINE_FOUND)
|
||||
message(STATUS " Link pcre2test with libeditline ... : ${PCRE2_SUPPORT_LIBEDIT}")
|
||||
else()
|
||||
message(STATUS " Link pcre2test with libeditline ... : Library not found")
|
||||
endif()
|
||||
if(READLINE_FOUND)
|
||||
message(STATUS " Link pcre2test with libreadline ... : ${PCRE2_SUPPORT_LIBREADLINE}")
|
||||
else()
|
||||
message(STATUS " Link pcre2test with libreadline ... : Library not found")
|
||||
endif()
|
||||
message(STATUS " Support Valgrind .................. : ${PCRE2_SUPPORT_VALGRIND}")
|
||||
if(PCRE2_DISABLE_PERCENT_ZT)
|
||||
message(STATUS " Use %zu and %td ................... : OFF")
|
||||
else()
|
||||
message(STATUS " Use %zu and %td ................... : AUTO")
|
||||
endif()
|
||||
|
||||
if(MINGW AND BUILD_SHARED_LIBS)
|
||||
message(STATUS " Non-standard dll names (prefix) ... : ${NON_STANDARD_LIB_PREFIX}")
|
||||
message(STATUS " Non-standard dll names (suffix) ... : ${NON_STANDARD_LIB_SUFFIX}")
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
message(STATUS " Install MSVC .pdb files ........... : ${INSTALL_MSVC_PDB}")
|
||||
endif()
|
||||
|
||||
message(STATUS "")
|
||||
endif()
|
||||
|
||||
# end CMakeLists.txt
|
||||
@@ -1,22 +0,0 @@
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. The name of the author may not be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
@@ -1,13 +0,0 @@
|
||||
# Modified from FindReadline.cmake (PH Feb 2012)
|
||||
|
||||
if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)
|
||||
set(EDITLINE_FOUND TRUE)
|
||||
else()
|
||||
find_path(EDITLINE_INCLUDE_DIR readline.h PATH_SUFFIXES editline edit/readline)
|
||||
|
||||
find_library(EDITLINE_LIBRARY NAMES edit)
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
|
||||
|
||||
mark_as_advanced(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
|
||||
endif()
|
||||
@@ -1,27 +0,0 @@
|
||||
# from http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/FindReadline.cmake
|
||||
# http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/COPYING-CMAKE-SCRIPTS
|
||||
# --> BSD licensed
|
||||
#
|
||||
# GNU Readline library finder
|
||||
if(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
|
||||
set(READLINE_FOUND TRUE)
|
||||
else()
|
||||
find_path(READLINE_INCLUDE_DIR readline/readline.h /usr/include/readline)
|
||||
|
||||
# 2008-04-22 The next clause used to read like this:
|
||||
#
|
||||
# FIND_LIBRARY(READLINE_LIBRARY NAMES readline)
|
||||
# FIND_LIBRARY(NCURSES_LIBRARY NAMES ncurses )
|
||||
# include(FindPackageHandleStandardArgs)
|
||||
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG NCURSES_LIBRARY READLINE_INCLUDE_DIR READLINE_LIBRARY )
|
||||
#
|
||||
# I was advised to modify it such that it will find an ncurses library if
|
||||
# required, but not if one was explicitly given, that is, it allows the
|
||||
# default to be overridden. PH
|
||||
|
||||
find_library(READLINE_LIBRARY NAMES readline)
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(Readline DEFAULT_MSG READLINE_INCLUDE_DIR READLINE_LIBRARY)
|
||||
|
||||
mark_as_advanced(READLINE_INCLUDE_DIR READLINE_LIBRARY)
|
||||
endif()
|
||||
@@ -1,14 +0,0 @@
|
||||
set(PACKAGE_VERSION_MAJOR @PCRE2_MAJOR@)
|
||||
set(PACKAGE_VERSION_MINOR @PCRE2_MINOR@)
|
||||
set(PACKAGE_VERSION_PATCH 0)
|
||||
set(PACKAGE_VERSION @PCRE2_MAJOR@.@PCRE2_MINOR@.0)
|
||||
|
||||
# Check whether the requested PACKAGE_FIND_VERSION is compatible
|
||||
if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION OR PACKAGE_VERSION_MAJOR GREATER PACKAGE_FIND_VERSION_MAJOR)
|
||||
set(PACKAGE_VERSION_COMPATIBLE FALSE)
|
||||
else()
|
||||
set(PACKAGE_VERSION_COMPATIBLE TRUE)
|
||||
if(PACKAGE_VERSION VERSION_EQUAL PACKAGE_FIND_VERSION)
|
||||
set(PACKAGE_VERSION_EXACT TRUE)
|
||||
endif()
|
||||
endif()
|
||||
@@ -1,168 +0,0 @@
|
||||
# pcre2-config.cmake
|
||||
# ----------------
|
||||
#
|
||||
# Finds the PCRE2 library, specify the starting search path in PCRE2_ROOT.
|
||||
#
|
||||
# Static vs. shared
|
||||
# -----------------
|
||||
# To make use of the static library instead of the shared one, one needs
|
||||
# to set the variable PCRE2_USE_STATIC_LIBS to ON before calling find_package.
|
||||
# Example:
|
||||
# set(PCRE2_USE_STATIC_LIBS ON)
|
||||
# find_package(PCRE2 CONFIG COMPONENTS 8BIT)
|
||||
#
|
||||
# This will define the following variables:
|
||||
#
|
||||
# PCRE2_FOUND - True if the system has the PCRE2 library.
|
||||
# PCRE2_VERSION - The version of the PCRE2 library which was found.
|
||||
#
|
||||
# and the following imported targets:
|
||||
#
|
||||
# PCRE2::8BIT - The 8 bit PCRE2 library.
|
||||
# PCRE2::16BIT - The 16 bit PCRE2 library.
|
||||
# PCRE2::32BIT - The 32 bit PCRE2 library.
|
||||
# PCRE2::POSIX - The POSIX PCRE2 library.
|
||||
|
||||
set(PCRE2_NON_STANDARD_LIB_PREFIX @NON_STANDARD_LIB_PREFIX@)
|
||||
set(PCRE2_NON_STANDARD_LIB_SUFFIX @NON_STANDARD_LIB_SUFFIX@)
|
||||
set(PCRE2_8BIT_NAME pcre2-8)
|
||||
set(PCRE2_16BIT_NAME pcre2-16)
|
||||
set(PCRE2_32BIT_NAME pcre2-32)
|
||||
set(PCRE2_POSIX_NAME pcre2-posix)
|
||||
find_path(PCRE2_INCLUDE_DIR NAMES pcre2.h DOC "PCRE2 include directory")
|
||||
if(PCRE2_USE_STATIC_LIBS)
|
||||
if(MSVC)
|
||||
set(PCRE2_8BIT_NAME pcre2-8-static)
|
||||
set(PCRE2_16BIT_NAME pcre2-16-static)
|
||||
set(PCRE2_32BIT_NAME pcre2-32-static)
|
||||
set(PCRE2_POSIX_NAME pcre2-posix-static)
|
||||
endif()
|
||||
|
||||
set(PCRE2_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX})
|
||||
set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
|
||||
else()
|
||||
set(PCRE2_PREFIX ${CMAKE_SHARED_LIBRARY_PREFIX})
|
||||
if(MINGW AND PCRE2_NON_STANDARD_LIB_PREFIX)
|
||||
set(PCRE2_PREFIX "")
|
||||
endif()
|
||||
|
||||
set(PCRE2_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
|
||||
if(MINGW AND PCRE2_NON_STANDARD_LIB_SUFFIX)
|
||||
set(PCRE2_SUFFIX "-0.dll")
|
||||
elseif(MSVC)
|
||||
set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
|
||||
endif()
|
||||
endif()
|
||||
find_library(
|
||||
PCRE2_8BIT_LIBRARY
|
||||
NAMES ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX}
|
||||
DOC "8 bit PCRE2 library"
|
||||
)
|
||||
find_library(
|
||||
PCRE2_16BIT_LIBRARY
|
||||
NAMES ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}d${PCRE2_SUFFIX}
|
||||
DOC "16 bit PCRE2 library"
|
||||
)
|
||||
find_library(
|
||||
PCRE2_32BIT_LIBRARY
|
||||
NAMES ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}d${PCRE2_SUFFIX}
|
||||
DOC "32 bit PCRE2 library"
|
||||
)
|
||||
find_library(
|
||||
PCRE2_POSIX_LIBRARY
|
||||
NAMES ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}d${PCRE2_SUFFIX}
|
||||
DOC "8 bit POSIX PCRE2 library"
|
||||
)
|
||||
unset(PCRE2_NON_STANDARD_LIB_PREFIX)
|
||||
unset(PCRE2_NON_STANDARD_LIB_SUFFIX)
|
||||
unset(PCRE2_8BIT_NAME)
|
||||
unset(PCRE2_16BIT_NAME)
|
||||
unset(PCRE2_32BIT_NAME)
|
||||
unset(PCRE2_POSIX_NAME)
|
||||
|
||||
# Set version
|
||||
if(PCRE2_INCLUDE_DIR)
|
||||
set(PCRE2_VERSION "@PCRE2_MAJOR@.@PCRE2_MINOR@.0")
|
||||
endif()
|
||||
|
||||
# Which components have been found.
|
||||
if(PCRE2_8BIT_LIBRARY)
|
||||
set(PCRE2_8BIT_FOUND TRUE)
|
||||
endif()
|
||||
if(PCRE2_16BIT_LIBRARY)
|
||||
set(PCRE2_16BIT_FOUND TRUE)
|
||||
endif()
|
||||
if(PCRE2_32BIT_LIBRARY)
|
||||
set(PCRE2_32BIT_FOUND TRUE)
|
||||
endif()
|
||||
if(PCRE2_POSIX_LIBRARY)
|
||||
set(PCRE2_POSIX_FOUND TRUE)
|
||||
endif()
|
||||
|
||||
# Check if at least one component has been specified.
|
||||
list(LENGTH PCRE2_FIND_COMPONENTS PCRE2_NCOMPONENTS)
|
||||
if(PCRE2_NCOMPONENTS LESS 1)
|
||||
message(FATAL_ERROR "No components have been specified. This is not allowed. Please, specify at least one component.")
|
||||
endif()
|
||||
unset(PCRE2_NCOMPONENTS)
|
||||
|
||||
# When POSIX component has been specified make sure that also 8BIT component is specified.
|
||||
set(PCRE2_8BIT_COMPONENT FALSE)
|
||||
set(PCRE2_POSIX_COMPONENT FALSE)
|
||||
foreach(component ${PCRE2_FIND_COMPONENTS})
|
||||
if(component STREQUAL "8BIT")
|
||||
set(PCRE2_8BIT_COMPONENT TRUE)
|
||||
elseif(component STREQUAL "POSIX")
|
||||
set(PCRE2_POSIX_COMPONENT TRUE)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
if(PCRE2_POSIX_COMPONENT AND NOT PCRE2_8BIT_COMPONENT)
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"The component POSIX is specified while the 8BIT one is not. This is not allowed. Please, also specify the 8BIT component."
|
||||
)
|
||||
endif()
|
||||
unset(PCRE2_8BIT_COMPONENT)
|
||||
unset(PCRE2_POSIX_COMPONENT)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
set(${CMAKE_FIND_PACKAGE_NAME}_CONFIG "${CMAKE_CURRENT_LIST_FILE}")
|
||||
find_package_handle_standard_args(
|
||||
PCRE2
|
||||
FOUND_VAR PCRE2_FOUND
|
||||
REQUIRED_VARS PCRE2_INCLUDE_DIR
|
||||
HANDLE_COMPONENTS
|
||||
VERSION_VAR PCRE2_VERSION
|
||||
CONFIG_MODE
|
||||
)
|
||||
|
||||
set(PCRE2_LIBRARIES)
|
||||
if(PCRE2_FOUND)
|
||||
foreach(component ${PCRE2_FIND_COMPONENTS})
|
||||
if(PCRE2_USE_STATIC_LIBS)
|
||||
add_library(PCRE2::${component} STATIC IMPORTED)
|
||||
target_compile_definitions(PCRE2::${component} INTERFACE PCRE2_STATIC)
|
||||
else()
|
||||
add_library(PCRE2::${component} SHARED IMPORTED)
|
||||
endif()
|
||||
set_target_properties(
|
||||
PCRE2::${component}
|
||||
PROPERTIES
|
||||
IMPORTED_LOCATION "${PCRE2_${component}_LIBRARY}"
|
||||
IMPORTED_IMPLIB "${PCRE2_${component}_LIBRARY}"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${PCRE2_INCLUDE_DIR}"
|
||||
)
|
||||
if(component STREQUAL "POSIX")
|
||||
set_target_properties(
|
||||
PCRE2::${component}
|
||||
PROPERTIES INTERFACE_LINK_LIBRARIES "PCRE2::8BIT" LINK_LIBRARIES "PCRE2::8BIT"
|
||||
)
|
||||
endif()
|
||||
|
||||
set(PCRE2_LIBRARIES ${PCRE2_LIBRARIES} ${PCRE2_${component}_LIBRARY})
|
||||
mark_as_advanced(PCRE2_${component}_LIBRARY)
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
mark_as_advanced(PCRE2_INCLUDE_DIR)
|
||||
@@ -1,58 +0,0 @@
|
||||
/* config.h for CMake builds */
|
||||
|
||||
#cmakedefine HAVE_ASSERT_H 1
|
||||
#cmakedefine HAVE_BUILTIN_ASSUME 1
|
||||
#cmakedefine HAVE_BUILTIN_MUL_OVERFLOW 1
|
||||
#cmakedefine HAVE_BUILTIN_UNREACHABLE 1
|
||||
#cmakedefine HAVE_ATTRIBUTE_UNINITIALIZED 1
|
||||
#cmakedefine HAVE_DIRENT_H 1
|
||||
#cmakedefine HAVE_SYS_STAT_H 1
|
||||
#cmakedefine HAVE_SYS_TYPES_H 1
|
||||
#cmakedefine HAVE_UNISTD_H 1
|
||||
#cmakedefine HAVE_WINDOWS_H 1
|
||||
|
||||
#cmakedefine HAVE_BCOPY 1
|
||||
#cmakedefine HAVE_MEMFD_CREATE 1
|
||||
#cmakedefine HAVE_MEMMOVE 1
|
||||
#cmakedefine HAVE_SECURE_GETENV 1
|
||||
#cmakedefine HAVE_STRERROR 1
|
||||
|
||||
#cmakedefine SUPPORT_PCRE2_8 1
|
||||
#cmakedefine SUPPORT_PCRE2_16 1
|
||||
#cmakedefine SUPPORT_PCRE2_32 1
|
||||
#cmakedefine DISABLE_PERCENT_ZT 1
|
||||
|
||||
#cmakedefine SUPPORT_LIBBZ2 1
|
||||
#cmakedefine SUPPORT_LIBEDIT 1
|
||||
#cmakedefine SUPPORT_LIBREADLINE 1
|
||||
#cmakedefine SUPPORT_LIBZ 1
|
||||
|
||||
#cmakedefine SUPPORT_JIT 1
|
||||
#cmakedefine SLJIT_PROT_EXECUTABLE_ALLOCATOR 1
|
||||
#cmakedefine SUPPORT_PCRE2GREP_JIT 1
|
||||
#cmakedefine SUPPORT_PCRE2GREP_CALLOUT 1
|
||||
#cmakedefine SUPPORT_PCRE2GREP_CALLOUT_FORK 1
|
||||
#cmakedefine SUPPORT_UNICODE 1
|
||||
#cmakedefine SUPPORT_VALGRIND 1
|
||||
|
||||
#cmakedefine BSR_ANYCRLF 1
|
||||
#cmakedefine EBCDIC 1
|
||||
#cmakedefine EBCDIC_NL25 1
|
||||
#cmakedefine HEAP_MATCH_RECURSE 1
|
||||
#cmakedefine NEVER_BACKSLASH_C 1
|
||||
|
||||
#define PCRE2_EXPORT @PCRE2_EXPORT@
|
||||
#define LINK_SIZE @PCRE2_LINK_SIZE@
|
||||
#define HEAP_LIMIT @PCRE2_HEAP_LIMIT@
|
||||
#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@
|
||||
#define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@
|
||||
#define MAX_VARLOOKBEHIND @PCRE2_MAX_VARLOOKBEHIND@
|
||||
#define NEWLINE_DEFAULT @NEWLINE_DEFAULT@
|
||||
#define PARENS_NEST_LIMIT @PCRE2_PARENS_NEST_LIMIT@
|
||||
#define PCRE2GREP_BUFSIZE @PCRE2GREP_BUFSIZE@
|
||||
#define PCRE2GREP_MAX_BUFSIZE @PCRE2GREP_MAX_BUFSIZE@
|
||||
|
||||
#define MAX_NAME_SIZE 128
|
||||
#define MAX_NAME_COUNT 10000
|
||||
|
||||
/* end config.h for CMake builds */
|
||||
@@ -1,1228 +0,0 @@
|
||||
dnl Process this file with autoconf to produce a configure script.
|
||||
|
||||
dnl NOTE FOR MAINTAINERS: Do not use minor version numbers 08 or 09 because
|
||||
dnl the leading zeros may cause them to be treated as invalid octal constants
|
||||
dnl if a PCRE2 user writes code that uses PCRE2_MINOR as a number. There is now
|
||||
dnl a check further down that throws an error if 08 or 09 are used.
|
||||
|
||||
dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
|
||||
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||
|
||||
m4_define(pcre2_major, [10])
|
||||
m4_define(pcre2_minor, [45])
|
||||
m4_define(pcre2_prerelease, [])
|
||||
m4_define(pcre2_date, [2025-02-05])
|
||||
|
||||
# Libtool shared library interface versions (current:revision:age)
|
||||
m4_define(libpcre2_8_version, [14:0:14])
|
||||
m4_define(libpcre2_16_version, [14:0:14])
|
||||
m4_define(libpcre2_32_version, [14:0:14])
|
||||
m4_define(libpcre2_posix_version, [3:6:0])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
|
||||
AC_PREREQ([2.60])
|
||||
AC_INIT([PCRE2],pcre2_major.pcre2_minor[]pcre2_prerelease,[],[pcre2])
|
||||
AC_CONFIG_SRCDIR([src/pcre2.h.in])
|
||||
AM_INIT_AUTOMAKE([dist-bzip2 dist-zip foreign])
|
||||
ifelse(pcre2_prerelease, [-DEV],
|
||||
[dnl For development builds, ./configure is not checked in to Git, so we are
|
||||
dnl happy to have it regenerated as needed.
|
||||
AM_MAINTAINER_MODE([enable])],
|
||||
[dnl For a release build (or RC), the ./configure script we ship in the
|
||||
dnl tarball (and check in to the Git tag) should not be regenerated
|
||||
dnl implicitly. This is important if users want to check out a release tag
|
||||
dnl using Git.
|
||||
AM_MAINTAINER_MODE])
|
||||
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
||||
AC_CONFIG_HEADERS(src/config.h)
|
||||
|
||||
# This was added at the suggestion of libtoolize (03-Jan-10)
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
|
||||
# The default CFLAGS in Autoconf are "-g -O2" for gcc and just "-g" for any
|
||||
# other compiler. There doesn't seem to be a standard way of getting rid of the
|
||||
# -g (which I don't think is needed for a production library). This fudge seems
|
||||
# to achieve the necessary. First, we remember the externally set values of
|
||||
# CFLAGS. Then call the AC_PROG_CC macro to find the compiler - if CFLAGS is
|
||||
# not set, it will be set to Autoconf's defaults. Afterwards, if the original
|
||||
# values were not set, remove the -g from the Autoconf defaults.
|
||||
|
||||
remember_set_CFLAGS="$CFLAGS"
|
||||
|
||||
m4_version_prereq(2.70, [AC_PROG_CC], [AC_PROG_CC_C99])
|
||||
AM_PROG_CC_C_O
|
||||
AC_USE_SYSTEM_EXTENSIONS
|
||||
|
||||
if test "x$remember_set_CFLAGS" = "x"
|
||||
then
|
||||
if test "$CFLAGS" = "-g -O2"
|
||||
then
|
||||
CFLAGS="-O2"
|
||||
elif test "$CFLAGS" = "-g"
|
||||
then
|
||||
CFLAGS=""
|
||||
fi
|
||||
fi
|
||||
|
||||
# This is a new thing required to stop a warning from automake 1.12
|
||||
m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
|
||||
|
||||
# Check for a 64-bit integer type
|
||||
AC_TYPE_INT64_T
|
||||
|
||||
AC_PROG_INSTALL
|
||||
LT_INIT([win32-dll])
|
||||
AC_PROG_LN_S
|
||||
|
||||
AC_SYS_LARGEFILE
|
||||
|
||||
# Check for GCC visibility feature
|
||||
|
||||
PCRE2_VISIBILITY
|
||||
|
||||
# Check for Clang __attribute__((uninitialized)) feature
|
||||
|
||||
AC_MSG_CHECKING([for __attribute__((uninitialized))])
|
||||
AC_LANG_PUSH([C])
|
||||
tmp_CFLAGS=$CFLAGS
|
||||
if test $WORKING_WERROR -eq 1; then
|
||||
CFLAGS="$CFLAGS -Werror"
|
||||
fi
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,
|
||||
[[char buf[128] __attribute__((uninitialized));(void)buf]])],
|
||||
[pcre2_cc_cv_attribute_uninitialized=yes],
|
||||
[pcre2_cc_cv_attribute_uninitialized=no])
|
||||
AC_MSG_RESULT([$pcre2_cc_cv_attribute_uninitialized])
|
||||
if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then
|
||||
AC_DEFINE([HAVE_ATTRIBUTE_UNINITIALIZED], 1, [Define this if your compiler
|
||||
supports __attribute__((uninitialized))])
|
||||
fi
|
||||
CFLAGS=$tmp_CFLAGS
|
||||
AC_LANG_POP([C])
|
||||
|
||||
# Check for the assume() builtin
|
||||
|
||||
AC_MSG_CHECKING([for __assume()])
|
||||
AC_LANG_PUSH([C])
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([[]], [[__assume(1)]])],
|
||||
[pcre2_cc_cv_builtin_assume=yes],
|
||||
[pcre2_cc_cv_builtin_assume=no])
|
||||
AC_MSG_RESULT([$pcre2_cc_cv_builtin_assume])
|
||||
if test "$pcre2_cc_cv_builtin_assume" = yes; then
|
||||
AC_DEFINE([HAVE_BUILTIN_ASSUME], 1,
|
||||
[Define this if your compiler provides __assume()])
|
||||
fi
|
||||
AC_LANG_POP([C])
|
||||
|
||||
# Check for the mul_overflow() builtin
|
||||
|
||||
AC_MSG_CHECKING([for __builtin_mul_overflow()])
|
||||
AC_LANG_PUSH([C])
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([[
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#include <stddef.h>
|
||||
|
||||
int a, b;
|
||||
size_t m;
|
||||
]], [[__builtin_mul_overflow(a, b, &m)]])],
|
||||
[pcre2_cc_cv_builtin_mul_overflow=yes],
|
||||
[pcre2_cc_cv_builtin_mul_overflow=no])
|
||||
AC_MSG_RESULT([$pcre2_cc_cv_builtin_mul_overflow])
|
||||
if test "$pcre2_cc_cv_builtin_mul_overflow" = yes; then
|
||||
AC_DEFINE([HAVE_BUILTIN_MUL_OVERFLOW], 1,
|
||||
[Define this if your compiler provides __builtin_mul_overflow()])
|
||||
fi
|
||||
AC_LANG_POP([C])
|
||||
|
||||
# Check for the unreachable() builtin
|
||||
|
||||
AC_MSG_CHECKING([for __builtin_unreachable()])
|
||||
AC_LANG_PUSH([C])
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([[int r;]], [[if (r) __builtin_unreachable()]])],
|
||||
[pcre2_cc_cv_builtin_unreachable=yes],
|
||||
[pcre2_cc_cv_builtin_unreachable=no])
|
||||
AC_MSG_RESULT([$pcre2_cc_cv_builtin_unreachable])
|
||||
if test "$pcre2_cc_cv_builtin_unreachable" = yes; then
|
||||
AC_DEFINE([HAVE_BUILTIN_UNREACHABLE], 1,
|
||||
[Define this if your compiler provides __builtin_unreachable()])
|
||||
fi
|
||||
AC_LANG_POP([C])
|
||||
|
||||
# Versioning
|
||||
|
||||
PCRE2_MAJOR="pcre2_major"
|
||||
PCRE2_MINOR="pcre2_minor"
|
||||
PCRE2_PRERELEASE="pcre2_prerelease"
|
||||
PCRE2_DATE="pcre2_date"
|
||||
|
||||
if test "$PCRE2_MINOR" = "08" -o "$PCRE2_MINOR" = "09"
|
||||
then
|
||||
echo "***"
|
||||
echo "*** Minor version number $PCRE2_MINOR must not be used. ***"
|
||||
echo "*** Use only 00 to 07 or 10 onwards, to avoid octal issues. ***"
|
||||
echo "***"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
AC_SUBST(PCRE2_MAJOR)
|
||||
AC_SUBST(PCRE2_MINOR)
|
||||
AC_SUBST(PCRE2_PRERELEASE)
|
||||
AC_SUBST(PCRE2_DATE)
|
||||
|
||||
# Set a more sensible default value for $(htmldir).
|
||||
if test "x$htmldir" = 'x${docdir}'
|
||||
then
|
||||
htmldir='${docdir}/html'
|
||||
fi
|
||||
|
||||
# Force an error for PCRE1 size options
|
||||
AC_ARG_ENABLE(pcre8,,,enable_pcre8=no)
|
||||
AC_ARG_ENABLE(pcre16,,,enable_pcre16=no)
|
||||
AC_ARG_ENABLE(pcre32,,,enable_pcre32=no)
|
||||
|
||||
if test "$enable_pcre8$enable_pcre16$enable_pcre32" != "nonono"
|
||||
then
|
||||
echo "** ERROR: Use --[[en|dis]]able-pcre2-[[8|16|32]], not --[[en|dis]]able-pcre[[8|16|32]]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Handle --disable-pcre2-8 (enabled by default)
|
||||
AC_ARG_ENABLE(pcre2-8,
|
||||
AS_HELP_STRING([--disable-pcre2-8],
|
||||
[disable 8 bit character support]),
|
||||
, enable_pcre2_8=unset)
|
||||
AC_SUBST(enable_pcre2_8)
|
||||
|
||||
# Handle --enable-pcre2-16 (disabled by default)
|
||||
AC_ARG_ENABLE(pcre2-16,
|
||||
AS_HELP_STRING([--enable-pcre2-16],
|
||||
[enable 16 bit character support]),
|
||||
, enable_pcre2_16=unset)
|
||||
AC_SUBST(enable_pcre2_16)
|
||||
|
||||
# Handle --enable-pcre2-32 (disabled by default)
|
||||
AC_ARG_ENABLE(pcre2-32,
|
||||
AS_HELP_STRING([--enable-pcre2-32],
|
||||
[enable 32 bit character support]),
|
||||
, enable_pcre2_32=unset)
|
||||
AC_SUBST(enable_pcre2_32)
|
||||
|
||||
# Handle --enable-debug (disabled by default)
|
||||
AC_ARG_ENABLE(debug,
|
||||
AS_HELP_STRING([--enable-debug],
|
||||
[enable debugging code]),
|
||||
, enable_debug=no)
|
||||
|
||||
# Handle --enable-jit (disabled by default)
|
||||
AC_ARG_ENABLE(jit,
|
||||
AS_HELP_STRING([--enable-jit],
|
||||
[enable Just-In-Time compiling support]),
|
||||
, enable_jit=no)
|
||||
|
||||
# This code enables JIT if the hardware supports it.
|
||||
if test "$enable_jit" = "auto"; then
|
||||
AC_LANG(C)
|
||||
SAVE_CPPFLAGS=$CPPFLAGS
|
||||
CPPFLAGS=-I$srcdir
|
||||
AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
|
||||
#define SLJIT_CONFIG_AUTO 1
|
||||
#include "deps/sljit/sljit_src/sljitConfigCPU.h"
|
||||
#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
|
||||
#error unsupported
|
||||
#endif]])], enable_jit=yes, enable_jit=no)
|
||||
CPPFLAGS=$SAVE_CPPFLAGS
|
||||
echo checking for JIT support on this hardware... $enable_jit
|
||||
fi
|
||||
|
||||
# Handle --enable-jit-sealloc (disabled by default and only experimental)
|
||||
case $host_os in
|
||||
linux* | netbsd*)
|
||||
AC_ARG_ENABLE(jit-sealloc,
|
||||
AS_HELP_STRING([--enable-jit-sealloc],
|
||||
[enable SELinux compatible execmem allocator in JIT (experimental)]),
|
||||
,enable_jit_sealloc=no)
|
||||
;;
|
||||
*)
|
||||
enable_jit_sealloc=unsupported
|
||||
;;
|
||||
esac
|
||||
|
||||
# Handle --disable-pcre2grep-jit (enabled by default)
|
||||
AC_ARG_ENABLE(pcre2grep-jit,
|
||||
AS_HELP_STRING([--disable-pcre2grep-jit],
|
||||
[disable JIT support in pcre2grep]),
|
||||
, enable_pcre2grep_jit=yes)
|
||||
|
||||
# Handle --disable-pcre2grep-callout (enabled by default)
|
||||
AC_ARG_ENABLE(pcre2grep-callout,
|
||||
AS_HELP_STRING([--disable-pcre2grep-callout],
|
||||
[disable callout script support in pcre2grep]),
|
||||
, enable_pcre2grep_callout=yes)
|
||||
|
||||
# Handle --disable-pcre2grep-callout-fork (enabled by default)
|
||||
AC_ARG_ENABLE(pcre2grep-callout-fork,
|
||||
AS_HELP_STRING([--disable-pcre2grep-callout-fork],
|
||||
[disable callout script fork support in pcre2grep]),
|
||||
, enable_pcre2grep_callout_fork=yes)
|
||||
|
||||
# Handle --enable-rebuild-chartables
|
||||
AC_ARG_ENABLE(rebuild-chartables,
|
||||
AS_HELP_STRING([--enable-rebuild-chartables],
|
||||
[rebuild character tables in current locale]),
|
||||
, enable_rebuild_chartables=no)
|
||||
|
||||
# Handle --disable-unicode (enabled by default)
|
||||
AC_ARG_ENABLE(unicode,
|
||||
AS_HELP_STRING([--disable-unicode],
|
||||
[disable Unicode support]),
|
||||
, enable_unicode=unset)
|
||||
|
||||
# Handle newline options
|
||||
ac_pcre2_newline=lf
|
||||
AC_ARG_ENABLE(newline-is-cr,
|
||||
AS_HELP_STRING([--enable-newline-is-cr],
|
||||
[use CR as newline character]),
|
||||
ac_pcre2_newline=cr)
|
||||
AC_ARG_ENABLE(newline-is-lf,
|
||||
AS_HELP_STRING([--enable-newline-is-lf],
|
||||
[use LF as newline character (default)]),
|
||||
ac_pcre2_newline=lf)
|
||||
AC_ARG_ENABLE(newline-is-crlf,
|
||||
AS_HELP_STRING([--enable-newline-is-crlf],
|
||||
[use CRLF as newline sequence]),
|
||||
ac_pcre2_newline=crlf)
|
||||
AC_ARG_ENABLE(newline-is-anycrlf,
|
||||
AS_HELP_STRING([--enable-newline-is-anycrlf],
|
||||
[use CR, LF, or CRLF as newline sequence]),
|
||||
ac_pcre2_newline=anycrlf)
|
||||
AC_ARG_ENABLE(newline-is-any,
|
||||
AS_HELP_STRING([--enable-newline-is-any],
|
||||
[use any valid Unicode newline sequence]),
|
||||
ac_pcre2_newline=any)
|
||||
AC_ARG_ENABLE(newline-is-nul,
|
||||
AS_HELP_STRING([--enable-newline-is-nul],
|
||||
[use NUL (binary zero) as newline character]),
|
||||
ac_pcre2_newline=nul)
|
||||
enable_newline="$ac_pcre2_newline"
|
||||
|
||||
# Handle --enable-bsr-anycrlf
|
||||
AC_ARG_ENABLE(bsr-anycrlf,
|
||||
AS_HELP_STRING([--enable-bsr-anycrlf],
|
||||
[\R matches only CR, LF, CRLF by default]),
|
||||
, enable_bsr_anycrlf=no)
|
||||
|
||||
# Handle --enable-never-backslash-C
|
||||
AC_ARG_ENABLE(never-backslash-C,
|
||||
AS_HELP_STRING([--enable-never-backslash-C],
|
||||
[use of \C causes an error]),
|
||||
, enable_never_backslash_C=no)
|
||||
|
||||
# Handle --enable-ebcdic
|
||||
AC_ARG_ENABLE(ebcdic,
|
||||
AS_HELP_STRING([--enable-ebcdic],
|
||||
[assume EBCDIC coding rather than ASCII; incompatible with --enable-unicode; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]),
|
||||
, enable_ebcdic=no)
|
||||
|
||||
# Handle --enable-ebcdic-nl25
|
||||
AC_ARG_ENABLE(ebcdic-nl25,
|
||||
AS_HELP_STRING([--enable-ebcdic-nl25],
|
||||
[set EBCDIC code for NL to 0x25 instead of 0x15; it implies --enable-ebcdic]),
|
||||
, enable_ebcdic_nl25=no)
|
||||
|
||||
# Handle --enable-pcre2grep-libz
|
||||
AC_ARG_ENABLE(pcre2grep-libz,
|
||||
AS_HELP_STRING([--enable-pcre2grep-libz],
|
||||
[link pcre2grep with libz to handle .gz files]),
|
||||
, enable_pcre2grep_libz=no)
|
||||
|
||||
# Handle --enable-pcre2grep-libbz2
|
||||
AC_ARG_ENABLE(pcre2grep-libbz2,
|
||||
AS_HELP_STRING([--enable-pcre2grep-libbz2],
|
||||
[link pcre2grep with libbz2 to handle .bz2 files]),
|
||||
, enable_pcre2grep_libbz2=no)
|
||||
|
||||
# Handle --with-pcre2grep-bufsize=N
|
||||
AC_ARG_WITH(pcre2grep-bufsize,
|
||||
AS_HELP_STRING([--with-pcre2grep-bufsize=N],
|
||||
[pcre2grep initial buffer size (default=20480, minimum=8192)]),
|
||||
, with_pcre2grep_bufsize=20480)
|
||||
|
||||
# Handle --with-pcre2grep-max-bufsize=N
|
||||
AC_ARG_WITH(pcre2grep-max-bufsize,
|
||||
AS_HELP_STRING([--with-pcre2grep-max-bufsize=N],
|
||||
[pcre2grep maximum buffer size (default=1048576, minimum=8192)]),
|
||||
, with_pcre2grep_max_bufsize=1048576)
|
||||
|
||||
# Handle --enable-pcre2test-libedit
|
||||
AC_ARG_ENABLE(pcre2test-libedit,
|
||||
AS_HELP_STRING([--enable-pcre2test-libedit],
|
||||
[link pcre2test with libedit]),
|
||||
, enable_pcre2test_libedit=no)
|
||||
|
||||
# Handle --enable-pcre2test-libreadline
|
||||
AC_ARG_ENABLE(pcre2test-libreadline,
|
||||
AS_HELP_STRING([--enable-pcre2test-libreadline],
|
||||
[link pcre2test with libreadline]),
|
||||
, enable_pcre2test_libreadline=no)
|
||||
|
||||
# Handle --with-link-size=N
|
||||
AC_ARG_WITH(link-size,
|
||||
AS_HELP_STRING([--with-link-size=N],
|
||||
[internal link size (2, 3, or 4 allowed; default=2)]),
|
||||
, with_link_size=2)
|
||||
|
||||
# Handle --with-max-varlookbehind=N
|
||||
AC_ARG_WITH(max-varlookbehind,
|
||||
AS_HELP_STRING([--with-max-varlookbehind=N],
|
||||
[maximum length of variable lookbehind (default=255)]),
|
||||
, with_max_varlookbehind=255)
|
||||
|
||||
# Handle --with-parens-nest-limit=N
|
||||
AC_ARG_WITH(parens-nest-limit,
|
||||
AS_HELP_STRING([--with-parens-nest-limit=N],
|
||||
[nested parentheses limit (default=250)]),
|
||||
, with_parens_nest_limit=250)
|
||||
|
||||
# Handle --with-heap-limit
|
||||
AC_ARG_WITH(heap-limit,
|
||||
AS_HELP_STRING([--with-heap-limit=N],
|
||||
[default limit on heap memory (kibibytes, default=20000000)]),
|
||||
, with_heap_limit=20000000)
|
||||
|
||||
# Handle --with-match-limit=N
|
||||
AC_ARG_WITH(match-limit,
|
||||
AS_HELP_STRING([--with-match-limit=N],
|
||||
[default limit on internal looping (default=10000000)]),
|
||||
, with_match_limit=10000000)
|
||||
|
||||
# Handle --with-match-limit-depth=N
|
||||
# Recognize old synonym --with-match-limit-recursion
|
||||
#
|
||||
# Note: In config.h, the default is to define MATCH_LIMIT_DEPTH symbolically as
|
||||
# MATCH_LIMIT, which in turn is defined to be some numeric value (e.g.
|
||||
# 10000000). MATCH_LIMIT_DEPTH can otherwise be set to some different numeric
|
||||
# value (or even the same numeric value as MATCH_LIMIT, though no longer
|
||||
# defined in terms of the latter).
|
||||
#
|
||||
AC_ARG_WITH(match-limit-depth,
|
||||
AS_HELP_STRING([--with-match-limit-depth=N],
|
||||
[default limit on match tree depth (default=MATCH_LIMIT)]),
|
||||
, with_match_limit_depth=MATCH_LIMIT)
|
||||
|
||||
AC_ARG_WITH(match-limit-recursion,,
|
||||
, with_match_limit_recursion=UNSET)
|
||||
|
||||
# Handle --enable-valgrind
|
||||
AC_ARG_ENABLE(valgrind,
|
||||
AS_HELP_STRING([--enable-valgrind],
|
||||
[enable valgrind support]),
|
||||
, enable_valgrind=no)
|
||||
|
||||
# Enable code coverage reports using gcov
|
||||
AC_ARG_ENABLE(coverage,
|
||||
AS_HELP_STRING([--enable-coverage],
|
||||
[enable code coverage reports using gcov]),
|
||||
, enable_coverage=no)
|
||||
|
||||
# Handle --enable-fuzz-support
|
||||
AC_ARG_ENABLE(fuzz_support,
|
||||
AS_HELP_STRING([--enable-fuzz-support],
|
||||
[enable fuzzer support]),
|
||||
, enable_fuzz_support=no)
|
||||
|
||||
# Handle --enable-diff-fuzz-support
|
||||
AC_ARG_ENABLE(diff_fuzz_support,
|
||||
AS_HELP_STRING([--enable-diff-fuzz-support],
|
||||
[enable differential fuzzer support]),
|
||||
, enable_diff_fuzz_support=no)
|
||||
|
||||
# Handle --disable-stack-for-recursion
|
||||
# This option became obsolete at release 10.30.
|
||||
AC_ARG_ENABLE(stack-for-recursion,,
|
||||
, enable_stack_for_recursion=yes)
|
||||
|
||||
# Original code
|
||||
# AC_ARG_ENABLE(stack-for-recursion,
|
||||
# AS_HELP_STRING([--disable-stack-for-recursion],
|
||||
# [don't use stack recursion when matching]),
|
||||
# , enable_stack_for_recursion=yes)
|
||||
|
||||
# Handle --disable-percent_zt (set as "auto" by default)
|
||||
AC_ARG_ENABLE(percent-zt,
|
||||
AS_HELP_STRING([--disable-percent-zt],
|
||||
[disable the use of z and t formatting modifiers]),
|
||||
, enable_percent_zt=auto)
|
||||
|
||||
# Set the default value for pcre2-8
|
||||
if test "x$enable_pcre2_8" = "xunset"
|
||||
then
|
||||
enable_pcre2_8=yes
|
||||
fi
|
||||
|
||||
# Set the default value for pcre2-16
|
||||
if test "x$enable_pcre2_16" = "xunset"
|
||||
then
|
||||
enable_pcre2_16=no
|
||||
fi
|
||||
|
||||
# Set the default value for pcre2-32
|
||||
if test "x$enable_pcre2_32" = "xunset"
|
||||
then
|
||||
enable_pcre2_32=no
|
||||
fi
|
||||
|
||||
# Make sure at least one library is selected
|
||||
if test "x$enable_pcre2_8$enable_pcre2_16$enable_pcre2_32" = "xnonono"
|
||||
then
|
||||
AC_MSG_ERROR([At least one of the 8, 16 or 32 bit libraries must be enabled])
|
||||
fi
|
||||
|
||||
# Unicode is enabled by default.
|
||||
if test "x$enable_unicode" = "xunset"
|
||||
then
|
||||
enable_unicode=yes
|
||||
fi
|
||||
|
||||
# Convert the newline identifier into the appropriate integer value. These must
|
||||
# agree with the PCRE2_NEWLINE_xxx values in pcre2.h.
|
||||
|
||||
case "$enable_newline" in
|
||||
cr) ac_pcre2_newline_value=1 ;;
|
||||
lf) ac_pcre2_newline_value=2 ;;
|
||||
crlf) ac_pcre2_newline_value=3 ;;
|
||||
any) ac_pcre2_newline_value=4 ;;
|
||||
anycrlf) ac_pcre2_newline_value=5 ;;
|
||||
nul) ac_pcre2_newline_value=6 ;;
|
||||
*)
|
||||
AC_MSG_ERROR([invalid argument "$enable_newline" to --enable-newline option])
|
||||
;;
|
||||
esac
|
||||
|
||||
# --enable-ebcdic-nl25 implies --enable-ebcdic
|
||||
if test "x$enable_ebcdic_nl25" = "xyes"; then
|
||||
enable_ebcdic=yes
|
||||
fi
|
||||
|
||||
# Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled.
|
||||
# Also check that UTF support is not requested, because PCRE2 cannot handle
|
||||
# EBCDIC and UTF in the same build. To do so it would need to use different
|
||||
# character constants depending on the mode. Also, EBCDIC cannot be used with
|
||||
# 16-bit and 32-bit libraries.
|
||||
#
|
||||
if test "x$enable_ebcdic" = "xyes"; then
|
||||
enable_rebuild_chartables=yes
|
||||
if test "x$enable_unicode" = "xyes"; then
|
||||
AC_MSG_ERROR([support for EBCDIC and Unicode cannot be enabled at the same time])
|
||||
fi
|
||||
if test "x$enable_pcre2_16" = "xyes" -o "x$enable_pcre2_32" = "xyes"; then
|
||||
AC_MSG_ERROR([EBCDIC support is available only for the 8-bit library])
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check argument to --with-link-size
|
||||
case "$with_link_size" in
|
||||
2|3|4) ;;
|
||||
*)
|
||||
AC_MSG_ERROR([invalid argument "$with_link_size" to --with-link-size option])
|
||||
;;
|
||||
esac
|
||||
|
||||
AH_TOP([
|
||||
/* PCRE2 is written in Standard C, but there are a few non-standard things it
|
||||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||
systems.
|
||||
|
||||
In environments that support the GNU autotools, config.h.in is converted into
|
||||
config.h by the "configure" script. In environments that use CMake,
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
|
||||
hand" without using "configure" or CMake, you should copy the distributed
|
||||
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||
so that config.h is included at the start of every source.
|
||||
|
||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
||||
but if you do, default values will be taken from config.h for non-boolean
|
||||
macros that are not defined on the command line.
|
||||
|
||||
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be
|
||||
defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All
|
||||
such macros are listed as a commented #undef in config.h.generic. Macros such
|
||||
as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||
|
||||
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||
sure both macros are undefined; an emulation function will then be used. */])
|
||||
|
||||
# Checks for header files.
|
||||
AC_CHECK_HEADERS(assert.h limits.h sys/types.h sys/stat.h dirent.h)
|
||||
AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1])
|
||||
AC_CHECK_HEADERS([sys/wait.h], [HAVE_SYS_WAIT_H=1])
|
||||
|
||||
# Conditional compilation
|
||||
AM_CONDITIONAL(WITH_PCRE2_8, test "x$enable_pcre2_8" = "xyes")
|
||||
AM_CONDITIONAL(WITH_PCRE2_16, test "x$enable_pcre2_16" = "xyes")
|
||||
AM_CONDITIONAL(WITH_PCRE2_32, test "x$enable_pcre2_32" = "xyes")
|
||||
AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
|
||||
AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
|
||||
AM_CONDITIONAL(WITH_UNICODE, test "x$enable_unicode" = "xyes")
|
||||
AM_CONDITIONAL(WITH_VALGRIND, test "x$enable_valgrind" = "xyes")
|
||||
AM_CONDITIONAL(WITH_FUZZ_SUPPORT, test "x$enable_fuzz_support" = "xyes")
|
||||
AM_CONDITIONAL(WITH_DIFF_FUZZ_SUPPORT, test "x$enable_diff_fuzz_support" = "xyes")
|
||||
|
||||
if test "$enable_fuzz_support" = "yes" -a "$enable_pcre2_8" = "no"; then
|
||||
echo "** ERROR: Fuzzer support requires the 8-bit library"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if test "$enable_diff_fuzz_support" = "yes"; then
|
||||
if test "$enable_fuzz_support" = "no"; then
|
||||
echo "** ERROR: Differential fuzzing support requires fuzzing support"
|
||||
exit 1
|
||||
fi
|
||||
if test "$enable_jit" = "no"; then
|
||||
echo "** ERROR: Differential fuzzing support requires Just-in-Time compilation support"
|
||||
exit 1
|
||||
fi
|
||||
AC_DEFINE([SUPPORT_DIFF_FUZZ], [], [
|
||||
Define to any value to enable differential fuzzing support.])
|
||||
fi
|
||||
|
||||
# Checks for typedefs, structures, and compiler characteristics.
|
||||
|
||||
AC_C_CONST
|
||||
AC_TYPE_SIZE_T
|
||||
|
||||
# Checks for library functions.
|
||||
|
||||
AC_CHECK_FUNCS(bcopy memfd_create memmove mkostemp secure_getenv strerror)
|
||||
AC_MSG_CHECKING([for realpath])
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([[
|
||||
#include <stdlib.h>
|
||||
#include <limits.h>
|
||||
]],[[
|
||||
char buffer[PATH_MAX];
|
||||
realpath(".", buffer);
|
||||
]])],
|
||||
[AC_MSG_RESULT([yes])
|
||||
AC_DEFINE([HAVE_REALPATH], 1,
|
||||
[Define to 1 if you have the `realpath' function.])
|
||||
],
|
||||
AC_MSG_RESULT([no]))
|
||||
|
||||
# Check for the availability of libz (aka zlib)
|
||||
|
||||
AC_CHECK_HEADERS([zlib.h], [HAVE_ZLIB_H=1])
|
||||
AC_CHECK_LIB([z], [gzopen], [HAVE_LIBZ=1])
|
||||
|
||||
# Check for the availability of libbz2. Originally we just used AC_CHECK_LIB,
|
||||
# as for libz. However, this had the following problem, diagnosed and fixed by
|
||||
# a user:
|
||||
#
|
||||
# - libbz2 uses the Pascal calling convention (WINAPI) for the functions
|
||||
# under Win32.
|
||||
# - The standard autoconf AC_CHECK_LIB fails to include "bzlib.h",
|
||||
# therefore missing the function definition.
|
||||
# - The compiler thus generates a "C" signature for the test function.
|
||||
# - The linker fails to find the "C" function.
|
||||
# - PCRE2 fails to configure if asked to do so against libbz2.
|
||||
#
|
||||
# Solution:
|
||||
#
|
||||
# - Replace the AC_CHECK_LIB test with a custom test.
|
||||
|
||||
AC_CHECK_HEADERS([bzlib.h], [HAVE_BZLIB_H=1])
|
||||
# Original test
|
||||
# AC_CHECK_LIB([bz2], [BZ2_bzopen], [HAVE_LIBBZ2=1])
|
||||
#
|
||||
# Custom test follows
|
||||
|
||||
AC_MSG_CHECKING([for libbz2])
|
||||
OLD_LIBS="$LIBS"
|
||||
LIBS="$LIBS -lbz2"
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([[
|
||||
#ifdef HAVE_BZLIB_H
|
||||
#include <bzlib.h>
|
||||
#endif]],
|
||||
[[return (int)BZ2_bzopen("conftest", "rb");]])],
|
||||
[AC_MSG_RESULT([yes]);HAVE_LIBBZ2=1; break;],
|
||||
AC_MSG_RESULT([no]))
|
||||
LIBS="$OLD_LIBS"
|
||||
|
||||
# Check for the availabiity of libreadline
|
||||
|
||||
if test "$enable_pcre2test_libreadline" = "yes"; then
|
||||
AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_H=1])
|
||||
AC_CHECK_HEADERS([readline/history.h], [HAVE_HISTORY_H=1])
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lreadline"],
|
||||
[unset ac_cv_lib_readline_readline;
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltinfo"],
|
||||
[unset ac_cv_lib_readline_readline;
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lcurses"],
|
||||
[unset ac_cv_lib_readline_readline;
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncurses"],
|
||||
[unset ac_cv_lib_readline_readline;
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncursesw"],
|
||||
[unset ac_cv_lib_readline_readline;
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltermcap"],
|
||||
[LIBREADLINE=""],
|
||||
[-ltermcap])],
|
||||
[-lncursesw])],
|
||||
[-lncurses])],
|
||||
[-lcurses])],
|
||||
[-ltinfo])])
|
||||
AC_SUBST(LIBREADLINE)
|
||||
if test -n "$LIBREADLINE"; then
|
||||
if test "$LIBREADLINE" != "-lreadline"; then
|
||||
echo "-lreadline needs $LIBREADLINE"
|
||||
LIBREADLINE="-lreadline $LIBREADLINE"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check for the availability of libedit. Different distributions put its
|
||||
# headers in different places. Try to cover the most common ones.
|
||||
|
||||
if test "$enable_pcre2test_libedit" = "yes"; then
|
||||
AC_CHECK_HEADERS([editline/readline.h edit/readline/readline.h readline.h], [
|
||||
HAVE_LIBEDIT_HEADER=1
|
||||
break
|
||||
])
|
||||
AC_CHECK_LIB([edit], [readline], [LIBEDIT="-ledit"])
|
||||
fi
|
||||
|
||||
PCRE2_STATIC_CFLAG=""
|
||||
if test "x$enable_shared" = "xno" ; then
|
||||
AC_DEFINE([PCRE2_STATIC], [1], [
|
||||
Define to any value if linking statically (TODO: make nice with Libtool)])
|
||||
PCRE2_STATIC_CFLAG="-DPCRE2_STATIC"
|
||||
fi
|
||||
AC_SUBST(PCRE2_STATIC_CFLAG)
|
||||
|
||||
PCRE2POSIX_CFLAG=""
|
||||
if test "x$enable_shared" = "xyes" ; then
|
||||
PCRE2POSIX_CFLAG="-DPCRE2POSIX_SHARED"
|
||||
fi
|
||||
AC_SUBST(PCRE2POSIX_CFLAG)
|
||||
|
||||
# Here is where PCRE2-specific defines are handled
|
||||
|
||||
if test "$enable_pcre2_8" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_PCRE2_8], [], [
|
||||
Define to any value to enable the 8 bit PCRE2 library.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2_16" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_PCRE2_16], [], [
|
||||
Define to any value to enable the 16 bit PCRE2 library.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2_32" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_PCRE2_32], [], [
|
||||
Define to any value to enable the 32 bit PCRE2 library.])
|
||||
fi
|
||||
|
||||
if test "$enable_debug" = "yes"; then
|
||||
AC_DEFINE([PCRE2_DEBUG], [], [
|
||||
Define to any value to include debugging code.])
|
||||
fi
|
||||
|
||||
if test "$enable_percent_zt" = "no"; then
|
||||
AC_DEFINE([DISABLE_PERCENT_ZT], [], [
|
||||
Define to any value to disable the use of the z and t modifiers in
|
||||
formatting settings such as %zu or %td (this is rarely needed).])
|
||||
else
|
||||
enable_percent_zt=auto
|
||||
fi
|
||||
|
||||
# Unless running under Windows, JIT support requires pthreads.
|
||||
|
||||
if test "$enable_jit" = "yes"; then
|
||||
if test "$HAVE_WINDOWS_H" != "1"; then
|
||||
AX_PTHREAD([], [AC_MSG_ERROR([JIT support requires pthreads])])
|
||||
CC="$PTHREAD_CC"
|
||||
CFLAGS="$PTHREAD_CFLAGS $CFLAGS"
|
||||
LIBS="$PTHREAD_LIBS $LIBS"
|
||||
fi
|
||||
AC_DEFINE([SUPPORT_JIT], [], [
|
||||
Define to any value to enable support for Just-In-Time compiling.])
|
||||
else
|
||||
enable_pcre2grep_jit="no"
|
||||
fi
|
||||
|
||||
if test "$enable_jit_sealloc" = "yes"; then
|
||||
AC_DEFINE([SLJIT_PROT_EXECUTABLE_ALLOCATOR], [1], [
|
||||
Define to any non-zero number to enable support for SELinux
|
||||
compatible executable memory allocator in JIT. Note that this
|
||||
will have no effect unless SUPPORT_JIT is also defined.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2grep_jit" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_PCRE2GREP_JIT], [], [
|
||||
Define to any value to enable JIT support in pcre2grep. Note that this will
|
||||
have no effect unless SUPPORT_JIT is also defined.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2grep_callout" = "yes"; then
|
||||
if test "$enable_pcre2grep_callout_fork" = "yes"; then
|
||||
if test "$HAVE_WINDOWS_H" != "1"; then
|
||||
if test "$HAVE_SYS_WAIT_H" != "1"; then
|
||||
AC_MSG_ERROR([Callout script support needs sys/wait.h.])
|
||||
fi
|
||||
fi
|
||||
AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT_FORK], [], [
|
||||
Define to any value to enable fork support in pcre2grep callout scripts.
|
||||
This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also
|
||||
defined.])
|
||||
fi
|
||||
AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT], [], [
|
||||
Define to any value to enable callout script support in pcre2grep.])
|
||||
else
|
||||
enable_pcre2grep_callout_fork="no"
|
||||
fi
|
||||
|
||||
if test "$enable_unicode" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_UNICODE], [], [
|
||||
Define to any value to enable support for Unicode and UTF encoding.
|
||||
This will work even in an EBCDIC environment, but it is incompatible
|
||||
with the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC
|
||||
code *or* ASCII/Unicode, but not both at once.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2grep_libz" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBZ], [], [
|
||||
Define to any value to allow pcre2grep to be linked with libz, so that it is
|
||||
able to handle .gz files.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2grep_libbz2" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBBZ2], [], [
|
||||
Define to any value to allow pcre2grep to be linked with libbz2, so that it
|
||||
is able to handle .bz2 files.])
|
||||
fi
|
||||
|
||||
if test $with_pcre2grep_bufsize -lt 8192 ; then
|
||||
AC_MSG_WARN([$with_pcre2grep_bufsize is too small for --with-pcre2grep-bufsize; using 8192])
|
||||
with_pcre2grep_bufsize="8192"
|
||||
else
|
||||
if test $? -gt 1 ; then
|
||||
AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize])
|
||||
fi
|
||||
fi
|
||||
|
||||
if test $with_pcre2grep_max_bufsize -lt $with_pcre2grep_bufsize ; then
|
||||
with_pcre2grep_max_bufsize="$with_pcre2grep_bufsize"
|
||||
else
|
||||
if test $? -gt 1 ; then
|
||||
AC_MSG_ERROR([Bad value for --with-pcre2grep-max-bufsize])
|
||||
fi
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [
|
||||
The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
|
||||
pcre2grep to hold parts of the file it is searching. The buffer will be
|
||||
expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing very
|
||||
long lines. The actual amount of memory used by pcre2grep is three times this
|
||||
number, because it allows for the buffering of "before" and "after" lines.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([PCRE2GREP_MAX_BUFSIZE], [$with_pcre2grep_max_bufsize], [
|
||||
The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer
|
||||
used by pcre2grep to hold parts of the file it is searching. The actual
|
||||
amount of memory used by pcre2grep is three times this number, because it
|
||||
allows for the buffering of "before" and "after" lines.])
|
||||
|
||||
if test "$enable_pcre2test_libedit" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBEDIT], [], [
|
||||
Define to any value to allow pcre2test to be linked with libedit.])
|
||||
LIBREADLINE="$LIBEDIT"
|
||||
elif test "$enable_pcre2test_libreadline" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBREADLINE], [], [
|
||||
Define to any value to allow pcre2test to be linked with libreadline.])
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([NEWLINE_DEFAULT], [$ac_pcre2_newline_value], [
|
||||
The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY),
|
||||
5 (ANYCRLF), and 6 (NUL).])
|
||||
|
||||
if test "$enable_bsr_anycrlf" = "yes"; then
|
||||
AC_DEFINE([BSR_ANYCRLF], [], [
|
||||
By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||
The build-time default can be overridden by the user of PCRE2 at runtime.])
|
||||
fi
|
||||
|
||||
if test "$enable_never_backslash_C" = "yes"; then
|
||||
AC_DEFINE([NEVER_BACKSLASH_C], [], [
|
||||
Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns.])
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [
|
||||
The value of LINK_SIZE determines the number of bytes used to store
|
||||
links as offsets within the compiled regex. The default is 2, which
|
||||
allows for compiled patterns up to 65535 code units long. This covers the
|
||||
vast majority of cases. However, PCRE2 can also be compiled to use 3 or 4
|
||||
bytes instead. This allows for longer patterns in extreme cases.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([MAX_VARLOOKBEHIND], [$with_max_varlookbehind], [
|
||||
The value of MAX_VARLOOKBEHIND specifies the default maximum length, in
|
||||
characters, for a variable-length lookbehind assertion.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
|
||||
The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
stack that is used while compiling a pattern.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
|
||||
The value of MATCH_LIMIT determines the default number of times the
|
||||
pcre2_match() function can record a backtrack position during a single
|
||||
matching attempt. The value is also used to limit a loop counter in
|
||||
pcre2_dfa_match(). There is a runtime interface for setting a different
|
||||
limit. The limit exists in order to catch runaway regular expressions that
|
||||
take forever to determine that they do not match. The default is set very
|
||||
large so that it does not accidentally catch legitimate cases.])
|
||||
|
||||
# --with-match-limit-recursion is an obsolete synonym for --with-match-limit-depth
|
||||
|
||||
if test "$with_match_limit_recursion" != "UNSET"; then
|
||||
cat <<EOF
|
||||
|
||||
WARNING: --with-match-limit-recursion is an obsolete option. Please use
|
||||
--with-match-limit-depth in future. If both are set, --with-match-limit-depth
|
||||
will be used. See also --with-heap-limit.
|
||||
|
||||
EOF
|
||||
if test "$with_match_limit_depth" = "MATCH_LIMIT"; then
|
||||
with_match_limit_depth=$with_match_limit_recursion
|
||||
fi
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([MATCH_LIMIT_DEPTH], [$with_match_limit_depth], [
|
||||
The above limit applies to all backtracks, whether or not they are nested. In
|
||||
some environments it is desirable to limit the nesting of backtracking (that
|
||||
is, the depth of tree that is searched) more strictly, in order to restrict
|
||||
the maximum amount of heap memory that is used. The value of
|
||||
MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it must
|
||||
be less than the value of MATCH_LIMIT. The default is to use the same value
|
||||
as MATCH_LIMIT. There is a runtime method for setting a different limit. In
|
||||
the case of pcre2_dfa_match(), this limit controls the depth of the internal
|
||||
nested function calls that are used for pattern recursions, lookarounds, and
|
||||
atomic groups.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([HEAP_LIMIT], [$with_heap_limit], [
|
||||
This limits the amount of memory that may be used while matching
|
||||
a pattern. It applies to both pcre2_match() and pcre2_dfa_match(). It does
|
||||
not apply to JIT matching. The value is in kibibytes (units of 1024 bytes).])
|
||||
|
||||
AC_DEFINE([MAX_NAME_SIZE], [128], [
|
||||
This limit is parameterized just in case anybody ever wants to
|
||||
change it. Care must be taken if it is increased, because it guards
|
||||
against integer overflow caused by enormously large patterns.])
|
||||
|
||||
AC_DEFINE([MAX_NAME_COUNT], [10000], [
|
||||
This limit is parameterized just in case anybody ever wants to
|
||||
change it. Care must be taken if it is increased, because it guards
|
||||
against integer overflow caused by enormously large patterns.])
|
||||
|
||||
AH_VERBATIM([PCRE2_EXP_DEFN], [
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
Win32, and it needs some magic to be inserted before the definition
|
||||
of a function that is exported by the library, define this macro to
|
||||
contain the relevant magic. If you do not define this macro, a suitable
|
||||
__declspec value is used for Windows systems; in other environments
|
||||
a compiler relevant "extern" is used with any "visibility" related
|
||||
attributes from PCRE2_EXPORT included.
|
||||
This macro apears at the start of every exported function that is part
|
||||
of the external API. It does not appear on functions that are "external"
|
||||
in the C sense, but which are internal to the library. */
|
||||
#undef PCRE2_EXP_DEFN])
|
||||
|
||||
if test "$enable_ebcdic" = "yes"; then
|
||||
AC_DEFINE_UNQUOTED([EBCDIC], [], [
|
||||
If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro to any value. When EBCDIC is set, PCRE2
|
||||
assumes that all input strings are in EBCDIC. If you do not define this
|
||||
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
|
||||
is not possible to build a version of PCRE2 that supports both EBCDIC and
|
||||
UTF-8/16/32.])
|
||||
fi
|
||||
|
||||
if test "$enable_ebcdic_nl25" = "yes"; then
|
||||
AC_DEFINE_UNQUOTED([EBCDIC_NL25], [], [
|
||||
In an EBCDIC environment, define this macro to any value to arrange for
|
||||
the NL character to be 0x25 instead of the default 0x15. NL plays the role
|
||||
that LF does in an ASCII/Unicode environment.])
|
||||
fi
|
||||
|
||||
if test "$enable_valgrind" = "yes"; then
|
||||
AC_DEFINE_UNQUOTED([SUPPORT_VALGRIND], [], [
|
||||
Define to any value for valgrind support to find invalid memory reads.])
|
||||
fi
|
||||
|
||||
# Platform specific issues
|
||||
NO_UNDEFINED=
|
||||
EXPORT_ALL_SYMBOLS=
|
||||
case $host_os in
|
||||
cygwin* | mingw* )
|
||||
if test X"$enable_shared" = Xyes; then
|
||||
NO_UNDEFINED="-no-undefined"
|
||||
EXPORT_ALL_SYMBOLS="-Wl,--export-all-symbols"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
# The extra LDFLAGS for each particular library. The libpcre2*_version values
|
||||
# are m4 variables, assigned above.
|
||||
|
||||
EXTRA_LIBPCRE2_8_LDFLAGS="$EXTRA_LIBPCRE2_8_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info libpcre2_8_version"
|
||||
|
||||
EXTRA_LIBPCRE2_16_LDFLAGS="$EXTRA_LIBPCRE2_16_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info libpcre2_16_version"
|
||||
|
||||
EXTRA_LIBPCRE2_32_LDFLAGS="$EXTRA_LIBPCRE2_32_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info libpcre2_32_version"
|
||||
|
||||
EXTRA_LIBPCRE2_POSIX_LDFLAGS="$EXTRA_LIBPCRE2_POSIX_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info libpcre2_posix_version"
|
||||
|
||||
AC_SUBST(EXTRA_LIBPCRE2_8_LDFLAGS)
|
||||
AC_SUBST(EXTRA_LIBPCRE2_16_LDFLAGS)
|
||||
AC_SUBST(EXTRA_LIBPCRE2_32_LDFLAGS)
|
||||
AC_SUBST(EXTRA_LIBPCRE2_POSIX_LDFLAGS)
|
||||
|
||||
# When we run 'make distcheck', use these arguments. Turning off compiler
|
||||
# optimization makes it run faster.
|
||||
DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre2-16 --enable-pcre2-32 --enable-jit"
|
||||
AC_SUBST(DISTCHECK_CONFIGURE_FLAGS)
|
||||
|
||||
# Check that, if --enable-pcre2grep-libz or --enable-pcre2grep-libbz2 is
|
||||
# specified, the relevant library is available.
|
||||
|
||||
if test "$enable_pcre2grep_libz" = "yes"; then
|
||||
if test "$HAVE_ZLIB_H" != "1"; then
|
||||
echo "** Cannot --enable-pcre2grep-libz because zlib.h was not found"
|
||||
exit 1
|
||||
fi
|
||||
if test "$HAVE_LIBZ" != "1"; then
|
||||
echo "** Cannot --enable-pcre2grep-libz because libz was not found"
|
||||
exit 1
|
||||
fi
|
||||
LIBZ="-lz"
|
||||
fi
|
||||
AC_SUBST(LIBZ)
|
||||
|
||||
if test "$enable_pcre2grep_libbz2" = "yes"; then
|
||||
if test "$HAVE_BZLIB_H" != "1"; then
|
||||
echo "** Cannot --enable-pcre2grep-libbz2 because bzlib.h was not found"
|
||||
exit 1
|
||||
fi
|
||||
if test "$HAVE_LIBBZ2" != "1"; then
|
||||
echo "** Cannot --enable-pcre2grep-libbz2 because libbz2 was not found"
|
||||
exit 1
|
||||
fi
|
||||
LIBBZ2="-lbz2"
|
||||
fi
|
||||
AC_SUBST(LIBBZ2)
|
||||
|
||||
# Similarly for --enable-pcre2test-readline
|
||||
|
||||
if test "$enable_pcre2test_libedit" = "yes"; then
|
||||
if test "$enable_pcre2test_libreadline" = "yes"; then
|
||||
echo "** Cannot use both --enable-pcre2test-libedit and --enable-pcre2test-readline"
|
||||
exit 1
|
||||
fi
|
||||
if test -z "$HAVE_LIBEDIT_HEADER"; then
|
||||
echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h,"
|
||||
echo "** edit/readline/readline.h nor a compatible header was found."
|
||||
exit 1
|
||||
fi
|
||||
if test -z "$LIBEDIT"; then
|
||||
echo "** Cannot --enable-pcre2test-libedit because libedit library was not found."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2test_libreadline" = "yes"; then
|
||||
if test "$HAVE_READLINE_H" != "1"; then
|
||||
echo "** Cannot --enable-pcre2test-readline because readline/readline.h was not found."
|
||||
exit 1
|
||||
fi
|
||||
if test "$HAVE_HISTORY_H" != "1"; then
|
||||
echo "** Cannot --enable-pcre2test-readline because readline/history.h was not found."
|
||||
exit 1
|
||||
fi
|
||||
if test -z "$LIBREADLINE"; then
|
||||
echo "** Cannot --enable-pcre2test-readline because readline library was not found."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Handle valgrind support
|
||||
|
||||
if test "$enable_valgrind" = "yes"; then
|
||||
m4_ifdef([PKG_CHECK_MODULES],
|
||||
[PKG_CHECK_MODULES([VALGRIND],[valgrind])],
|
||||
[AC_MSG_ERROR([pkg-config not supported])])
|
||||
fi
|
||||
|
||||
# Handle code coverage reporting support
|
||||
if test "$enable_coverage" = "yes"; then
|
||||
if test "x$GCC" != "xyes"; then
|
||||
AC_MSG_ERROR([Code coverage reports can only be generated when using GCC])
|
||||
fi
|
||||
|
||||
# ccache is incompatible with gcov
|
||||
AC_PATH_PROG([SHTOOL],[shtool],[false])
|
||||
case `$SHTOOL path $CC` in
|
||||
*ccache*) cc_ccache=yes;;
|
||||
*) cc_ccache=no;;
|
||||
esac
|
||||
|
||||
if test "$cc_ccache" = "yes"; then
|
||||
if test -z "$CCACHE_DISABLE" -o "$CCACHE_DISABLE" != "1"; then
|
||||
AC_MSG_ERROR([must export CCACHE_DISABLE=1 to disable ccache for code coverage])
|
||||
fi
|
||||
fi
|
||||
|
||||
AC_ARG_VAR([LCOV],[the ltp lcov program])
|
||||
AC_PATH_PROG([LCOV],[lcov],[false])
|
||||
if test "x$LCOV" = "xfalse"; then
|
||||
AC_MSG_ERROR([lcov not found])
|
||||
fi
|
||||
|
||||
AC_ARG_VAR([GENHTML],[the ltp genhtml program])
|
||||
AC_PATH_PROG([GENHTML],[genhtml],[false])
|
||||
if test "x$GENHTML" = "xfalse"; then
|
||||
AC_MSG_ERROR([genhtml not found])
|
||||
fi
|
||||
|
||||
# Set flags needed for gcov
|
||||
GCOV_CFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
||||
GCOV_CXXFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
||||
GCOV_LIBS="-lgcov"
|
||||
AC_SUBST([GCOV_CFLAGS])
|
||||
AC_SUBST([GCOV_CXXFLAGS])
|
||||
AC_SUBST([GCOV_LIBS])
|
||||
fi # enable_coverage
|
||||
|
||||
AM_CONDITIONAL([WITH_GCOV],[test "x$enable_coverage" = "xyes"])
|
||||
|
||||
AC_MSG_CHECKING([whether Intel CET is enabled])
|
||||
AC_LANG_PUSH([C])
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,
|
||||
[[#ifndef __CET__
|
||||
# error CET is not enabled
|
||||
#endif]])],
|
||||
[pcre2_cc_cv_intel_cet_enabled=yes],
|
||||
[pcre2_cc_cv_intel_cet_enabled=no])
|
||||
AC_MSG_RESULT([$pcre2_cc_cv_intel_cet_enabled])
|
||||
if test "$pcre2_cc_cv_intel_cet_enabled" = yes; then
|
||||
CET_CFLAGS="-mshstk"
|
||||
AC_SUBST([CET_CFLAGS])
|
||||
fi
|
||||
AC_LANG_POP([C])
|
||||
|
||||
# LIB_POSTFIX is used by CMakeLists.txt for Windows debug builds.
|
||||
# Pass empty LIB_POSTFIX to *.pc files and pcre2-config here.
|
||||
AC_SUBST(LIB_POSTFIX)
|
||||
|
||||
# Produce these files, in addition to config.h.
|
||||
|
||||
AC_CONFIG_FILES(
|
||||
Makefile
|
||||
libpcre2-8.pc
|
||||
libpcre2-16.pc
|
||||
libpcre2-32.pc
|
||||
libpcre2-posix.pc
|
||||
pcre2-config
|
||||
src/pcre2.h
|
||||
)
|
||||
|
||||
# Make the generated script files executable.
|
||||
AC_CONFIG_COMMANDS([script-chmod], [chmod a+x pcre2-config])
|
||||
|
||||
# Make sure that pcre2_chartables.c is removed in case the method for
|
||||
# creating it was changed by reconfiguration.
|
||||
AC_CONFIG_COMMANDS([delete-old-chartables], [rm -f pcre2_chartables.c])
|
||||
|
||||
AC_OUTPUT
|
||||
|
||||
# --disable-stack-for-recursion is obsolete and has no effect.
|
||||
|
||||
if test "$enable_stack_for_recursion" = "no"; then
|
||||
cat <<EOF
|
||||
|
||||
WARNING: --disable-stack-for-recursion is obsolete and has no effect.
|
||||
EOF
|
||||
fi
|
||||
|
||||
# Print out a nice little message after configure is run displaying the
|
||||
# chosen options.
|
||||
|
||||
ebcdic_nl_code=n/a
|
||||
if test "$enable_ebcdic_nl25" = "yes"; then
|
||||
ebcdic_nl_code=0x25
|
||||
elif test "$enable_ebcdic" = "yes"; then
|
||||
ebcdic_nl_code=0x15
|
||||
fi
|
||||
|
||||
cat <<EOF
|
||||
|
||||
$PACKAGE-$VERSION configuration summary:
|
||||
|
||||
Install prefix ..................... : ${prefix}
|
||||
C preprocessor ..................... : ${CPP}
|
||||
C compiler ......................... : ${CC}
|
||||
Linker ............................. : ${LD}
|
||||
C preprocessor flags ............... : ${CPPFLAGS}
|
||||
C compiler flags ................... : ${CFLAGS} ${VISIBILITY_CFLAGS}
|
||||
Linker flags ....................... : ${LDFLAGS}
|
||||
Extra libraries .................... : ${LIBS}
|
||||
|
||||
Build 8-bit pcre2 library .......... : ${enable_pcre2_8}
|
||||
Build 16-bit pcre2 library ......... : ${enable_pcre2_16}
|
||||
Build 32-bit pcre2 library ......... : ${enable_pcre2_32}
|
||||
Include debugging code ............. : ${enable_debug}
|
||||
Enable JIT compiling support ....... : ${enable_jit}
|
||||
Use SELinux allocator in JIT ....... : ${enable_jit_sealloc}
|
||||
Enable Unicode support ............. : ${enable_unicode}
|
||||
Newline char/sequence .............. : ${enable_newline}
|
||||
\R matches only ANYCRLF ............ : ${enable_bsr_anycrlf}
|
||||
\C is disabled ..................... : ${enable_never_backslash_C}
|
||||
EBCDIC coding ...................... : ${enable_ebcdic}
|
||||
EBCDIC code for NL ................. : ${ebcdic_nl_code}
|
||||
Rebuild char tables ................ : ${enable_rebuild_chartables}
|
||||
Internal link size ................. : ${with_link_size}
|
||||
Maximum variable lookbehind ........ : ${with_max_varlookbehind}
|
||||
Nested parentheses limit ........... : ${with_parens_nest_limit}
|
||||
Heap limit ......................... : ${with_heap_limit} kibibytes
|
||||
Match limit ........................ : ${with_match_limit}
|
||||
Match depth limit .................. : ${with_match_limit_depth}
|
||||
Build shared libs .................. : ${enable_shared}
|
||||
Build static libs .................. : ${enable_static}
|
||||
Use JIT in pcre2grep ............... : ${enable_pcre2grep_jit}
|
||||
Enable callouts in pcre2grep ....... : ${enable_pcre2grep_callout}
|
||||
Enable fork in pcre2grep callouts .. : ${enable_pcre2grep_callout_fork}
|
||||
Initial buffer size for pcre2grep .. : ${with_pcre2grep_bufsize}
|
||||
Maximum buffer size for pcre2grep .. : ${with_pcre2grep_max_bufsize}
|
||||
Link pcre2grep with libz ........... : ${enable_pcre2grep_libz}
|
||||
Link pcre2grep with libbz2 ......... : ${enable_pcre2grep_libbz2}
|
||||
Link pcre2test with libedit ........ : ${enable_pcre2test_libedit}
|
||||
Link pcre2test with libreadline .... : ${enable_pcre2test_libreadline}
|
||||
Valgrind support ................... : ${enable_valgrind}
|
||||
Code coverage ...................... : ${enable_coverage}
|
||||
Fuzzer support ..................... : ${enable_fuzz_support}
|
||||
Differential fuzzer support ........ : ${enable_diff_fuzz_support}
|
||||
Use %zu and %td .................... : ${enable_percent_zt}
|
||||
|
||||
EOF
|
||||
|
||||
dnl end configure.ac
|
||||
@@ -1,13 +0,0 @@
|
||||
# Package Information for pkg-config
|
||||
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
libdir=@libdir@
|
||||
includedir=@includedir@
|
||||
|
||||
Name: libpcre2-16
|
||||
Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 16 bit character support
|
||||
Version: @PACKAGE_VERSION@
|
||||
Libs: -L${libdir} -lpcre2-16@LIB_POSTFIX@
|
||||
Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
|
||||
Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
|
||||
@@ -1,13 +0,0 @@
|
||||
# Package Information for pkg-config
|
||||
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
libdir=@libdir@
|
||||
includedir=@includedir@
|
||||
|
||||
Name: libpcre2-32
|
||||
Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 32 bit character support
|
||||
Version: @PACKAGE_VERSION@
|
||||
Libs: -L${libdir} -lpcre2-32@LIB_POSTFIX@
|
||||
Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
|
||||
Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
|
||||
@@ -1,13 +0,0 @@
|
||||
# Package Information for pkg-config
|
||||
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
libdir=@libdir@
|
||||
includedir=@includedir@
|
||||
|
||||
Name: libpcre2-8
|
||||
Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 8 bit character support
|
||||
Version: @PACKAGE_VERSION@
|
||||
Libs: -L${libdir} -lpcre2-8@LIB_POSTFIX@
|
||||
Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
|
||||
Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
|
||||
@@ -1,13 +0,0 @@
|
||||
# Package Information for pkg-config
|
||||
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
libdir=@libdir@
|
||||
includedir=@includedir@
|
||||
|
||||
Name: libpcre2-posix
|
||||
Description: Posix compatible interface to libpcre2-8
|
||||
Version: @PACKAGE_VERSION@
|
||||
Libs: -L${libdir} -lpcre2-posix@LIB_POSTFIX@
|
||||
Cflags: -I${includedir} @PCRE2POSIX_CFLAG@
|
||||
Requires.private: libpcre2-8
|
||||
@@ -1,121 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
exec_prefix_set=no
|
||||
|
||||
cflags="[--cflags]"
|
||||
libs=
|
||||
|
||||
if test @enable_pcre2_16@ = yes ; then
|
||||
libs="[--libs16] $libs"
|
||||
fi
|
||||
|
||||
if test @enable_pcre2_32@ = yes ; then
|
||||
libs="[--libs32] $libs"
|
||||
fi
|
||||
|
||||
if test @enable_pcre2_8@ = yes ; then
|
||||
libs="[--libs8] [--libs-posix] $libs"
|
||||
cflags="$cflags [--cflags-posix]"
|
||||
fi
|
||||
|
||||
usage="Usage: pcre2-config [--prefix] [--exec-prefix] [--version] $libs $cflags"
|
||||
|
||||
if test $# -eq 0; then
|
||||
echo "${usage}" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
libR=
|
||||
case `uname -s` in
|
||||
*SunOS*)
|
||||
libR=" -R@libdir@"
|
||||
;;
|
||||
*BSD*)
|
||||
libR=" -Wl,-R@libdir@"
|
||||
;;
|
||||
esac
|
||||
|
||||
libS=
|
||||
if test @libdir@ != /usr/lib ; then
|
||||
libS=-L@libdir@
|
||||
fi
|
||||
|
||||
while test $# -gt 0; do
|
||||
case "$1" in
|
||||
-*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
|
||||
*) optarg= ;;
|
||||
esac
|
||||
|
||||
case $1 in
|
||||
--prefix=*)
|
||||
prefix=$optarg
|
||||
if test $exec_prefix_set = no ; then
|
||||
exec_prefix=$optarg
|
||||
fi
|
||||
;;
|
||||
--prefix)
|
||||
echo $prefix
|
||||
;;
|
||||
--exec-prefix=*)
|
||||
exec_prefix=$optarg
|
||||
exec_prefix_set=yes
|
||||
;;
|
||||
--exec-prefix)
|
||||
echo $exec_prefix
|
||||
;;
|
||||
--version)
|
||||
echo @PACKAGE_VERSION@
|
||||
;;
|
||||
--cflags)
|
||||
if test @includedir@ != /usr/include ; then
|
||||
includes=-I@includedir@
|
||||
fi
|
||||
echo $includes @PCRE2_STATIC_CFLAG@
|
||||
;;
|
||||
--cflags-posix)
|
||||
if test @enable_pcre2_8@ = yes ; then
|
||||
if test @includedir@ != /usr/include ; then
|
||||
includes=-I@includedir@
|
||||
fi
|
||||
echo $includes @PCRE2POSIX_CFLAG@
|
||||
else
|
||||
echo "${usage}" 1>&2
|
||||
fi
|
||||
;;
|
||||
--libs-posix)
|
||||
if test @enable_pcre2_8@ = yes ; then
|
||||
echo $libS$libR -lpcre2-posix@LIB_POSTFIX@ -lpcre2-8@LIB_POSTFIX@
|
||||
else
|
||||
echo "${usage}" 1>&2
|
||||
fi
|
||||
;;
|
||||
--libs8)
|
||||
if test @enable_pcre2_8@ = yes ; then
|
||||
echo $libS$libR -lpcre2-8@LIB_POSTFIX@
|
||||
else
|
||||
echo "${usage}" 1>&2
|
||||
fi
|
||||
;;
|
||||
--libs16)
|
||||
if test @enable_pcre2_16@ = yes ; then
|
||||
echo $libS$libR -lpcre2-16@LIB_POSTFIX@
|
||||
else
|
||||
echo "${usage}" 1>&2
|
||||
fi
|
||||
;;
|
||||
--libs32)
|
||||
if test @enable_pcre2_32@ = yes ; then
|
||||
echo $libS$libR -lpcre2-32@LIB_POSTFIX@
|
||||
else
|
||||
echo "${usage}" 1>&2
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
echo "${usage}" 1>&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
@@ -1,483 +0,0 @@
|
||||
/* src/config.h. Generated from config.h.in by configure. */
|
||||
/* src/config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* PCRE2 is written in Standard C, but there are a few non-standard things it
|
||||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||
systems.
|
||||
|
||||
In environments that support the GNU autotools, config.h.in is converted into
|
||||
config.h by the "configure" script. In environments that use CMake,
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
|
||||
hand" without using "configure" or CMake, you should copy the distributed
|
||||
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||
so that config.h is included at the start of every source.
|
||||
|
||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
||||
but if you do, default values will be taken from config.h for non-boolean
|
||||
macros that are not defined on the command line.
|
||||
|
||||
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be
|
||||
defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All
|
||||
such macros are listed as a commented #undef in config.h.generic. Macros such
|
||||
as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||
|
||||
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||
sure both macros are undefined; an emulation function will then be used. */
|
||||
|
||||
/* By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||
The build-time default can be overridden by the user of PCRE2 at runtime.
|
||||
*/
|
||||
/* #undef BSR_ANYCRLF */
|
||||
|
||||
/* Define to any value to disable the use of the z and t modifiers in
|
||||
formatting settings such as %zu or %td (this is rarely needed). */
|
||||
/* #undef DISABLE_PERCENT_ZT */
|
||||
|
||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro to any value. When EBCDIC is set, PCRE2
|
||||
assumes that all input strings are in EBCDIC. If you do not define this
|
||||
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
|
||||
is not possible to build a version of PCRE2 that supports both EBCDIC and
|
||||
UTF-8/16/32. */
|
||||
/* #undef EBCDIC */
|
||||
|
||||
/* In an EBCDIC environment, define this macro to any value to arrange for the
|
||||
NL character to be 0x25 instead of the default 0x15. NL plays the role that
|
||||
LF does in an ASCII/Unicode environment. */
|
||||
/* #undef EBCDIC_NL25 */
|
||||
|
||||
/* Define to 1 if you have the <assert.h> header file. */
|
||||
/* #undef HAVE_ASSERT_H */
|
||||
|
||||
/* Define this if your compiler supports __attribute__((uninitialized)) */
|
||||
/* #undef HAVE_ATTRIBUTE_UNINITIALIZED */
|
||||
|
||||
/* Define to 1 if you have the `bcopy' function. */
|
||||
/* #undef HAVE_BCOPY */
|
||||
|
||||
/* Define this if your compiler provides __assume() */
|
||||
/* #undef HAVE_BUILTIN_ASSUME */
|
||||
|
||||
/* Define this if your compiler provides __builtin_mul_overflow() */
|
||||
/* #undef HAVE_BUILTIN_MUL_OVERFLOW */
|
||||
|
||||
/* Define this if your compiler provides __builtin_unreachable() */
|
||||
/* #undef HAVE_BUILTIN_UNREACHABLE */
|
||||
|
||||
/* Define to 1 if you have the <bzlib.h> header file. */
|
||||
/* #undef HAVE_BZLIB_H */
|
||||
|
||||
/* Define to 1 if you have the <dirent.h> header file. */
|
||||
/* #undef HAVE_DIRENT_H */
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
/* #undef HAVE_DLFCN_H */
|
||||
|
||||
/* Define to 1 if you have the <editline/readline.h> header file. */
|
||||
/* #undef HAVE_EDITLINE_READLINE_H */
|
||||
|
||||
/* Define to 1 if you have the <edit/readline/readline.h> header file. */
|
||||
/* #undef HAVE_EDIT_READLINE_READLINE_H */
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
/* #undef HAVE_INTTYPES_H */
|
||||
|
||||
/* Define to 1 if you have the <limits.h> header file. */
|
||||
/* #undef HAVE_LIMITS_H */
|
||||
|
||||
/* Define to 1 if you have the `memfd_create' function. */
|
||||
/* #undef HAVE_MEMFD_CREATE */
|
||||
|
||||
/* Define to 1 if you have the `memmove' function. */
|
||||
/* #undef HAVE_MEMMOVE */
|
||||
|
||||
/* Define to 1 if you have the <minix/config.h> header file. */
|
||||
/* #undef HAVE_MINIX_CONFIG_H */
|
||||
|
||||
/* Define to 1 if you have the `mkostemp' function. */
|
||||
/* #undef HAVE_MKOSTEMP */
|
||||
|
||||
/* Define if you have POSIX threads libraries and header files. */
|
||||
/* #undef HAVE_PTHREAD */
|
||||
|
||||
/* Have PTHREAD_PRIO_INHERIT. */
|
||||
/* #undef HAVE_PTHREAD_PRIO_INHERIT */
|
||||
|
||||
/* Define to 1 if you have the <readline.h> header file. */
|
||||
/* #undef HAVE_READLINE_H */
|
||||
|
||||
/* Define to 1 if you have the <readline/history.h> header file. */
|
||||
/* #undef HAVE_READLINE_HISTORY_H */
|
||||
|
||||
/* Define to 1 if you have the <readline/readline.h> header file. */
|
||||
/* #undef HAVE_READLINE_READLINE_H */
|
||||
|
||||
/* Define to 1 if you have the `realpath' function. */
|
||||
/* #undef HAVE_REALPATH */
|
||||
|
||||
/* Define to 1 if you have the `secure_getenv' function. */
|
||||
/* #undef HAVE_SECURE_GETENV */
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
/* #undef HAVE_STDINT_H */
|
||||
|
||||
/* Define to 1 if you have the <stdio.h> header file. */
|
||||
/* #undef HAVE_STDIO_H */
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
/* #undef HAVE_STDLIB_H */
|
||||
|
||||
/* Define to 1 if you have the `strerror' function. */
|
||||
/* #undef HAVE_STRERROR */
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
/* #undef HAVE_STRINGS_H */
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
/* #undef HAVE_STRING_H */
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
/* #undef HAVE_SYS_STAT_H */
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
/* #undef HAVE_SYS_TYPES_H */
|
||||
|
||||
/* Define to 1 if you have the <sys/wait.h> header file. */
|
||||
/* #undef HAVE_SYS_WAIT_H */
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
/* #undef HAVE_UNISTD_H */
|
||||
|
||||
/* Define to 1 if the compiler supports GCC compatible visibility
|
||||
declarations. */
|
||||
/* #undef HAVE_VISIBILITY */
|
||||
|
||||
/* Define to 1 if you have the <wchar.h> header file. */
|
||||
/* #undef HAVE_WCHAR_H */
|
||||
|
||||
/* Define to 1 if you have the <windows.h> header file. */
|
||||
/* #undef HAVE_WINDOWS_H */
|
||||
|
||||
/* Define to 1 if you have the <zlib.h> header file. */
|
||||
/* #undef HAVE_ZLIB_H */
|
||||
|
||||
/* This limits the amount of memory that may be used while matching a pattern.
|
||||
It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply
|
||||
to JIT matching. The value is in kibibytes (units of 1024 bytes). */
|
||||
#ifndef HEAP_LIMIT
|
||||
#define HEAP_LIMIT 20000000
|
||||
#endif
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||
as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 65535 code units long. This covers the vast
|
||||
majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes
|
||||
instead. This allows for longer patterns in extreme cases. */
|
||||
#ifndef LINK_SIZE
|
||||
#define LINK_SIZE 2
|
||||
#endif
|
||||
|
||||
/* Define to the sub-directory where libtool stores uninstalled libraries. */
|
||||
/* This is ignored unless you are using libtool. */
|
||||
#ifndef LT_OBJDIR
|
||||
#define LT_OBJDIR ".libs/"
|
||||
#endif
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the
|
||||
pcre2_match() function can record a backtrack position during a single
|
||||
matching attempt. The value is also used to limit a loop counter in
|
||||
pcre2_dfa_match(). There is a runtime interface for setting a different
|
||||
limit. The limit exists in order to catch runaway regular expressions that
|
||||
take forever to determine that they do not match. The default is set very
|
||||
large so that it does not accidentally catch legitimate cases. */
|
||||
#ifndef MATCH_LIMIT
|
||||
#define MATCH_LIMIT 10000000
|
||||
#endif
|
||||
|
||||
/* The above limit applies to all backtracks, whether or not they are nested.
|
||||
In some environments it is desirable to limit the nesting of backtracking
|
||||
(that is, the depth of tree that is searched) more strictly, in order to
|
||||
restrict the maximum amount of heap memory that is used. The value of
|
||||
MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it
|
||||
must be less than the value of MATCH_LIMIT. The default is to use the same
|
||||
value as MATCH_LIMIT. There is a runtime method for setting a different
|
||||
limit. In the case of pcre2_dfa_match(), this limit controls the depth of
|
||||
the internal nested function calls that are used for pattern recursions,
|
||||
lookarounds, and atomic groups. */
|
||||
#ifndef MATCH_LIMIT_DEPTH
|
||||
#define MATCH_LIMIT_DEPTH MATCH_LIMIT
|
||||
#endif
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#ifndef MAX_NAME_COUNT
|
||||
#define MAX_NAME_COUNT 10000
|
||||
#endif
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#ifndef MAX_NAME_SIZE
|
||||
#define MAX_NAME_SIZE 128
|
||||
#endif
|
||||
|
||||
/* The value of MAX_VARLOOKBEHIND specifies the default maximum length, in
|
||||
characters, for a variable-length lookbehind assertion. */
|
||||
#ifndef MAX_VARLOOKBEHIND
|
||||
#define MAX_VARLOOKBEHIND 255
|
||||
#endif
|
||||
|
||||
/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
|
||||
/* #undef NEVER_BACKSLASH_C */
|
||||
|
||||
/* The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), 5
|
||||
(ANYCRLF), and 6 (NUL). */
|
||||
#ifndef NEWLINE_DEFAULT
|
||||
#define NEWLINE_DEFAULT 2
|
||||
#endif
|
||||
|
||||
/* Name of package */
|
||||
#define PACKAGE "pcre2"
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#define PACKAGE_BUGREPORT ""
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#define PACKAGE_NAME "PCRE2"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE2 10.45"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre2"
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "10.45"
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
stack that is used while compiling a pattern. */
|
||||
#ifndef PARENS_NEST_LIMIT
|
||||
#define PARENS_NEST_LIMIT 250
|
||||
#endif
|
||||
|
||||
/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
|
||||
pcre2grep to hold parts of the file it is searching. The buffer will be
|
||||
expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing
|
||||
very long lines. The actual amount of memory used by pcre2grep is three
|
||||
times this number, because it allows for the buffering of "before" and
|
||||
"after" lines. */
|
||||
#ifndef PCRE2GREP_BUFSIZE
|
||||
#define PCRE2GREP_BUFSIZE 20480
|
||||
#endif
|
||||
|
||||
/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer
|
||||
used by pcre2grep to hold parts of the file it is searching. The actual
|
||||
amount of memory used by pcre2grep is three times this number, because it
|
||||
allows for the buffering of "before" and "after" lines. */
|
||||
#ifndef PCRE2GREP_MAX_BUFSIZE
|
||||
#define PCRE2GREP_MAX_BUFSIZE 1048576
|
||||
#endif
|
||||
|
||||
/* Define to any value to include debugging code. */
|
||||
/* #undef PCRE2_DEBUG */
|
||||
|
||||
/* to make a symbol visible */
|
||||
#define PCRE2_EXPORT
|
||||
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
Win32, and it needs some magic to be inserted before the definition
|
||||
of a function that is exported by the library, define this macro to
|
||||
contain the relevant magic. If you do not define this macro, a suitable
|
||||
__declspec value is used for Windows systems; in other environments
|
||||
a compiler relevant "extern" is used with any "visibility" related
|
||||
attributes from PCRE2_EXPORT included.
|
||||
This macro apears at the start of every exported function that is part
|
||||
of the external API. It does not appear on functions that are "external"
|
||||
in the C sense, but which are internal to the library. */
|
||||
/* #undef PCRE2_EXP_DEFN */
|
||||
|
||||
/* Define to any value if linking statically (TODO: make nice with Libtool) */
|
||||
/* #undef PCRE2_STATIC */
|
||||
|
||||
/* Define to necessary symbol if this constant uses a non-standard name on
|
||||
your system. */
|
||||
/* #undef PTHREAD_CREATE_JOINABLE */
|
||||
|
||||
/* Define to any non-zero number to enable support for SELinux compatible
|
||||
executable memory allocator in JIT. Note that this will have no effect
|
||||
unless SUPPORT_JIT is also defined. */
|
||||
/* #undef SLJIT_PROT_EXECUTABLE_ALLOCATOR */
|
||||
|
||||
/* Define to 1 if all of the C90 standard headers exist (not just the ones
|
||||
required in a freestanding environment). This macro is provided for
|
||||
backward compatibility; new code need not use it. */
|
||||
/* #undef STDC_HEADERS */
|
||||
|
||||
/* Define to any value to enable differential fuzzing support. */
|
||||
/* #undef SUPPORT_DIFF_FUZZ */
|
||||
|
||||
/* Define to any value to enable support for Just-In-Time compiling. */
|
||||
/* #undef SUPPORT_JIT */
|
||||
|
||||
/* Define to any value to allow pcre2grep to be linked with libbz2, so that it
|
||||
is able to handle .bz2 files. */
|
||||
/* #undef SUPPORT_LIBBZ2 */
|
||||
|
||||
/* Define to any value to allow pcre2test to be linked with libedit. */
|
||||
/* #undef SUPPORT_LIBEDIT */
|
||||
|
||||
/* Define to any value to allow pcre2test to be linked with libreadline. */
|
||||
/* #undef SUPPORT_LIBREADLINE */
|
||||
|
||||
/* Define to any value to allow pcre2grep to be linked with libz, so that it
|
||||
is able to handle .gz files. */
|
||||
/* #undef SUPPORT_LIBZ */
|
||||
|
||||
/* Define to any value to enable callout script support in pcre2grep. */
|
||||
/* #undef SUPPORT_PCRE2GREP_CALLOUT */
|
||||
|
||||
/* Define to any value to enable fork support in pcre2grep callout scripts.
|
||||
This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined.
|
||||
*/
|
||||
/* #undef SUPPORT_PCRE2GREP_CALLOUT_FORK */
|
||||
|
||||
/* Define to any value to enable JIT support in pcre2grep. Note that this will
|
||||
have no effect unless SUPPORT_JIT is also defined. */
|
||||
/* #undef SUPPORT_PCRE2GREP_JIT */
|
||||
|
||||
/* Define to any value to enable the 16 bit PCRE2 library. */
|
||||
/* #undef SUPPORT_PCRE2_16 */
|
||||
|
||||
/* Define to any value to enable the 32 bit PCRE2 library. */
|
||||
/* #undef SUPPORT_PCRE2_32 */
|
||||
|
||||
/* Define to any value to enable the 8 bit PCRE2 library. */
|
||||
/* #undef SUPPORT_PCRE2_8 */
|
||||
|
||||
/* Define to any value to enable support for Unicode and UTF encoding. This
|
||||
will work even in an EBCDIC environment, but it is incompatible with the
|
||||
EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
|
||||
ASCII/Unicode, but not both at once. */
|
||||
/* #undef SUPPORT_UNICODE */
|
||||
|
||||
/* Define to any value for valgrind support to find invalid memory reads. */
|
||||
/* #undef SUPPORT_VALGRIND */
|
||||
|
||||
/* Enable extensions on AIX 3, Interix. */
|
||||
#ifndef _ALL_SOURCE
|
||||
# define _ALL_SOURCE 1
|
||||
#endif
|
||||
/* Enable general extensions on macOS. */
|
||||
#ifndef _DARWIN_C_SOURCE
|
||||
# define _DARWIN_C_SOURCE 1
|
||||
#endif
|
||||
/* Enable general extensions on Solaris. */
|
||||
#ifndef __EXTENSIONS__
|
||||
# define __EXTENSIONS__ 1
|
||||
#endif
|
||||
/* Enable GNU extensions on systems that have them. */
|
||||
#ifndef _GNU_SOURCE
|
||||
# define _GNU_SOURCE 1
|
||||
#endif
|
||||
/* Enable X/Open compliant socket functions that do not require linking
|
||||
with -lxnet on HP-UX 11.11. */
|
||||
#ifndef _HPUX_ALT_XOPEN_SOCKET_API
|
||||
# define _HPUX_ALT_XOPEN_SOCKET_API 1
|
||||
#endif
|
||||
/* Identify the host operating system as Minix.
|
||||
This macro does not affect the system headers' behavior.
|
||||
A future release of Autoconf may stop defining this macro. */
|
||||
#ifndef _MINIX
|
||||
/* # undef _MINIX */
|
||||
#endif
|
||||
/* Enable general extensions on NetBSD.
|
||||
Enable NetBSD compatibility extensions on Minix. */
|
||||
#ifndef _NETBSD_SOURCE
|
||||
# define _NETBSD_SOURCE 1
|
||||
#endif
|
||||
/* Enable OpenBSD compatibility extensions on NetBSD.
|
||||
Oddly enough, this does nothing on OpenBSD. */
|
||||
#ifndef _OPENBSD_SOURCE
|
||||
# define _OPENBSD_SOURCE 1
|
||||
#endif
|
||||
/* Define to 1 if needed for POSIX-compatible behavior. */
|
||||
#ifndef _POSIX_SOURCE
|
||||
/* # undef _POSIX_SOURCE */
|
||||
#endif
|
||||
/* Define to 2 if needed for POSIX-compatible behavior. */
|
||||
#ifndef _POSIX_1_SOURCE
|
||||
/* # undef _POSIX_1_SOURCE */
|
||||
#endif
|
||||
/* Enable POSIX-compatible threading on Solaris. */
|
||||
#ifndef _POSIX_PTHREAD_SEMANTICS
|
||||
# define _POSIX_PTHREAD_SEMANTICS 1
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TS 18661-5:2014. */
|
||||
#ifndef __STDC_WANT_IEC_60559_ATTRIBS_EXT__
|
||||
# define __STDC_WANT_IEC_60559_ATTRIBS_EXT__ 1
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TS 18661-1:2014. */
|
||||
#ifndef __STDC_WANT_IEC_60559_BFP_EXT__
|
||||
# define __STDC_WANT_IEC_60559_BFP_EXT__ 1
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TS 18661-2:2015. */
|
||||
#ifndef __STDC_WANT_IEC_60559_DFP_EXT__
|
||||
# define __STDC_WANT_IEC_60559_DFP_EXT__ 1
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TS 18661-4:2015. */
|
||||
#ifndef __STDC_WANT_IEC_60559_FUNCS_EXT__
|
||||
# define __STDC_WANT_IEC_60559_FUNCS_EXT__ 1
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TS 18661-3:2015. */
|
||||
#ifndef __STDC_WANT_IEC_60559_TYPES_EXT__
|
||||
# define __STDC_WANT_IEC_60559_TYPES_EXT__ 1
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TR 24731-2:2010. */
|
||||
#ifndef __STDC_WANT_LIB_EXT2__
|
||||
# define __STDC_WANT_LIB_EXT2__ 1
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC 24747:2009. */
|
||||
#ifndef __STDC_WANT_MATH_SPEC_FUNCS__
|
||||
# define __STDC_WANT_MATH_SPEC_FUNCS__ 1
|
||||
#endif
|
||||
/* Enable extensions on HP NonStop. */
|
||||
#ifndef _TANDEM_SOURCE
|
||||
# define _TANDEM_SOURCE 1
|
||||
#endif
|
||||
/* Enable X/Open extensions. Define to 500 only if necessary
|
||||
to make mbstate_t available. */
|
||||
#ifndef _XOPEN_SOURCE
|
||||
/* # undef _XOPEN_SOURCE */
|
||||
#endif
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "10.45"
|
||||
|
||||
/* Number of bits in a file offset, on hosts where this is settable. */
|
||||
/* #undef _FILE_OFFSET_BITS */
|
||||
|
||||
/* Define for large files, on AIX-style hosts. */
|
||||
/* #undef _LARGE_FILES */
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
/* #undef const */
|
||||
|
||||
/* Define to the type of a signed integer type of width exactly 64 bits if
|
||||
such a type exists and the standard includes do not define it. */
|
||||
/* #undef int64_t */
|
||||
|
||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
||||
/* #undef size_t */
|
||||
@@ -1,460 +0,0 @@
|
||||
/* src/config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
|
||||
/* PCRE2 is written in Standard C, but there are a few non-standard things it
|
||||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||
systems.
|
||||
|
||||
In environments that support the GNU autotools, config.h.in is converted into
|
||||
config.h by the "configure" script. In environments that use CMake,
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
|
||||
hand" without using "configure" or CMake, you should copy the distributed
|
||||
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||
so that config.h is included at the start of every source.
|
||||
|
||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
||||
but if you do, default values will be taken from config.h for non-boolean
|
||||
macros that are not defined on the command line.
|
||||
|
||||
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be
|
||||
defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All
|
||||
such macros are listed as a commented #undef in config.h.generic. Macros such
|
||||
as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||
|
||||
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||
sure both macros are undefined; an emulation function will then be used. */
|
||||
|
||||
/* By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||
The build-time default can be overridden by the user of PCRE2 at runtime.
|
||||
*/
|
||||
#undef BSR_ANYCRLF
|
||||
|
||||
/* Define to any value to disable the use of the z and t modifiers in
|
||||
formatting settings such as %zu or %td (this is rarely needed). */
|
||||
#undef DISABLE_PERCENT_ZT
|
||||
|
||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro to any value. When EBCDIC is set, PCRE2
|
||||
assumes that all input strings are in EBCDIC. If you do not define this
|
||||
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
|
||||
is not possible to build a version of PCRE2 that supports both EBCDIC and
|
||||
UTF-8/16/32. */
|
||||
#undef EBCDIC
|
||||
|
||||
/* In an EBCDIC environment, define this macro to any value to arrange for the
|
||||
NL character to be 0x25 instead of the default 0x15. NL plays the role that
|
||||
LF does in an ASCII/Unicode environment. */
|
||||
#undef EBCDIC_NL25
|
||||
|
||||
/* Define to 1 if you have the <assert.h> header file. */
|
||||
#undef HAVE_ASSERT_H
|
||||
|
||||
/* Define this if your compiler supports __attribute__((uninitialized)) */
|
||||
#undef HAVE_ATTRIBUTE_UNINITIALIZED
|
||||
|
||||
/* Define to 1 if you have the `bcopy' function. */
|
||||
#undef HAVE_BCOPY
|
||||
|
||||
/* Define this if your compiler provides __assume() */
|
||||
#undef HAVE_BUILTIN_ASSUME
|
||||
|
||||
/* Define this if your compiler provides __builtin_mul_overflow() */
|
||||
#undef HAVE_BUILTIN_MUL_OVERFLOW
|
||||
|
||||
/* Define this if your compiler provides __builtin_unreachable() */
|
||||
#undef HAVE_BUILTIN_UNREACHABLE
|
||||
|
||||
/* Define to 1 if you have the <bzlib.h> header file. */
|
||||
#undef HAVE_BZLIB_H
|
||||
|
||||
/* Define to 1 if you have the <dirent.h> header file. */
|
||||
#undef HAVE_DIRENT_H
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#undef HAVE_DLFCN_H
|
||||
|
||||
/* Define to 1 if you have the <editline/readline.h> header file. */
|
||||
#undef HAVE_EDITLINE_READLINE_H
|
||||
|
||||
/* Define to 1 if you have the <edit/readline/readline.h> header file. */
|
||||
#undef HAVE_EDIT_READLINE_READLINE_H
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#undef HAVE_INTTYPES_H
|
||||
|
||||
/* Define to 1 if you have the <limits.h> header file. */
|
||||
#undef HAVE_LIMITS_H
|
||||
|
||||
/* Define to 1 if you have the `memfd_create' function. */
|
||||
#undef HAVE_MEMFD_CREATE
|
||||
|
||||
/* Define to 1 if you have the `memmove' function. */
|
||||
#undef HAVE_MEMMOVE
|
||||
|
||||
/* Define to 1 if you have the <minix/config.h> header file. */
|
||||
#undef HAVE_MINIX_CONFIG_H
|
||||
|
||||
/* Define to 1 if you have the `mkostemp' function. */
|
||||
#undef HAVE_MKOSTEMP
|
||||
|
||||
/* Define if you have POSIX threads libraries and header files. */
|
||||
#undef HAVE_PTHREAD
|
||||
|
||||
/* Have PTHREAD_PRIO_INHERIT. */
|
||||
#undef HAVE_PTHREAD_PRIO_INHERIT
|
||||
|
||||
/* Define to 1 if you have the <readline.h> header file. */
|
||||
#undef HAVE_READLINE_H
|
||||
|
||||
/* Define to 1 if you have the <readline/history.h> header file. */
|
||||
#undef HAVE_READLINE_HISTORY_H
|
||||
|
||||
/* Define to 1 if you have the <readline/readline.h> header file. */
|
||||
#undef HAVE_READLINE_READLINE_H
|
||||
|
||||
/* Define to 1 if you have the `realpath' function. */
|
||||
#undef HAVE_REALPATH
|
||||
|
||||
/* Define to 1 if you have the `secure_getenv' function. */
|
||||
#undef HAVE_SECURE_GETENV
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#undef HAVE_STDINT_H
|
||||
|
||||
/* Define to 1 if you have the <stdio.h> header file. */
|
||||
#undef HAVE_STDIO_H
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#undef HAVE_STDLIB_H
|
||||
|
||||
/* Define to 1 if you have the `strerror' function. */
|
||||
#undef HAVE_STRERROR
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#undef HAVE_STRINGS_H
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#undef HAVE_STRING_H
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#undef HAVE_SYS_STAT_H
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#undef HAVE_SYS_TYPES_H
|
||||
|
||||
/* Define to 1 if you have the <sys/wait.h> header file. */
|
||||
#undef HAVE_SYS_WAIT_H
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#undef HAVE_UNISTD_H
|
||||
|
||||
/* Define to 1 if the compiler supports GCC compatible visibility
|
||||
declarations. */
|
||||
#undef HAVE_VISIBILITY
|
||||
|
||||
/* Define to 1 if you have the <wchar.h> header file. */
|
||||
#undef HAVE_WCHAR_H
|
||||
|
||||
/* Define to 1 if you have the <windows.h> header file. */
|
||||
#undef HAVE_WINDOWS_H
|
||||
|
||||
/* Define to 1 if you have the <zlib.h> header file. */
|
||||
#undef HAVE_ZLIB_H
|
||||
|
||||
/* This limits the amount of memory that may be used while matching a pattern.
|
||||
It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply
|
||||
to JIT matching. The value is in kibibytes (units of 1024 bytes). */
|
||||
#undef HEAP_LIMIT
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||
as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 65535 code units long. This covers the vast
|
||||
majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes
|
||||
instead. This allows for longer patterns in extreme cases. */
|
||||
#undef LINK_SIZE
|
||||
|
||||
/* Define to the sub-directory where libtool stores uninstalled libraries. */
|
||||
#undef LT_OBJDIR
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the
|
||||
pcre2_match() function can record a backtrack position during a single
|
||||
matching attempt. The value is also used to limit a loop counter in
|
||||
pcre2_dfa_match(). There is a runtime interface for setting a different
|
||||
limit. The limit exists in order to catch runaway regular expressions that
|
||||
take forever to determine that they do not match. The default is set very
|
||||
large so that it does not accidentally catch legitimate cases. */
|
||||
#undef MATCH_LIMIT
|
||||
|
||||
/* The above limit applies to all backtracks, whether or not they are nested.
|
||||
In some environments it is desirable to limit the nesting of backtracking
|
||||
(that is, the depth of tree that is searched) more strictly, in order to
|
||||
restrict the maximum amount of heap memory that is used. The value of
|
||||
MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it
|
||||
must be less than the value of MATCH_LIMIT. The default is to use the same
|
||||
value as MATCH_LIMIT. There is a runtime method for setting a different
|
||||
limit. In the case of pcre2_dfa_match(), this limit controls the depth of
|
||||
the internal nested function calls that are used for pattern recursions,
|
||||
lookarounds, and atomic groups. */
|
||||
#undef MATCH_LIMIT_DEPTH
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#undef MAX_NAME_COUNT
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#undef MAX_NAME_SIZE
|
||||
|
||||
/* The value of MAX_VARLOOKBEHIND specifies the default maximum length, in
|
||||
characters, for a variable-length lookbehind assertion. */
|
||||
#undef MAX_VARLOOKBEHIND
|
||||
|
||||
/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
|
||||
#undef NEVER_BACKSLASH_C
|
||||
|
||||
/* The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), 5
|
||||
(ANYCRLF), and 6 (NUL). */
|
||||
#undef NEWLINE_DEFAULT
|
||||
|
||||
/* Name of package */
|
||||
#undef PACKAGE
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#undef PACKAGE_BUGREPORT
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#undef PACKAGE_NAME
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#undef PACKAGE_STRING
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#undef PACKAGE_TARNAME
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#undef PACKAGE_URL
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#undef PACKAGE_VERSION
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
stack that is used while compiling a pattern. */
|
||||
#undef PARENS_NEST_LIMIT
|
||||
|
||||
/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
|
||||
pcre2grep to hold parts of the file it is searching. The buffer will be
|
||||
expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing
|
||||
very long lines. The actual amount of memory used by pcre2grep is three
|
||||
times this number, because it allows for the buffering of "before" and
|
||||
"after" lines. */
|
||||
#undef PCRE2GREP_BUFSIZE
|
||||
|
||||
/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer
|
||||
used by pcre2grep to hold parts of the file it is searching. The actual
|
||||
amount of memory used by pcre2grep is three times this number, because it
|
||||
allows for the buffering of "before" and "after" lines. */
|
||||
#undef PCRE2GREP_MAX_BUFSIZE
|
||||
|
||||
/* Define to any value to include debugging code. */
|
||||
#undef PCRE2_DEBUG
|
||||
|
||||
/* to make a symbol visible */
|
||||
#undef PCRE2_EXPORT
|
||||
|
||||
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
Win32, and it needs some magic to be inserted before the definition
|
||||
of a function that is exported by the library, define this macro to
|
||||
contain the relevant magic. If you do not define this macro, a suitable
|
||||
__declspec value is used for Windows systems; in other environments
|
||||
a compiler relevant "extern" is used with any "visibility" related
|
||||
attributes from PCRE2_EXPORT included.
|
||||
This macro apears at the start of every exported function that is part
|
||||
of the external API. It does not appear on functions that are "external"
|
||||
in the C sense, but which are internal to the library. */
|
||||
#undef PCRE2_EXP_DEFN
|
||||
|
||||
/* Define to any value if linking statically (TODO: make nice with Libtool) */
|
||||
#undef PCRE2_STATIC
|
||||
|
||||
/* Define to necessary symbol if this constant uses a non-standard name on
|
||||
your system. */
|
||||
#undef PTHREAD_CREATE_JOINABLE
|
||||
|
||||
/* Define to any non-zero number to enable support for SELinux compatible
|
||||
executable memory allocator in JIT. Note that this will have no effect
|
||||
unless SUPPORT_JIT is also defined. */
|
||||
#undef SLJIT_PROT_EXECUTABLE_ALLOCATOR
|
||||
|
||||
/* Define to 1 if all of the C90 standard headers exist (not just the ones
|
||||
required in a freestanding environment). This macro is provided for
|
||||
backward compatibility; new code need not use it. */
|
||||
#undef STDC_HEADERS
|
||||
|
||||
/* Define to any value to enable differential fuzzing support. */
|
||||
#undef SUPPORT_DIFF_FUZZ
|
||||
|
||||
/* Define to any value to enable support for Just-In-Time compiling. */
|
||||
#undef SUPPORT_JIT
|
||||
|
||||
/* Define to any value to allow pcre2grep to be linked with libbz2, so that it
|
||||
is able to handle .bz2 files. */
|
||||
#undef SUPPORT_LIBBZ2
|
||||
|
||||
/* Define to any value to allow pcre2test to be linked with libedit. */
|
||||
#undef SUPPORT_LIBEDIT
|
||||
|
||||
/* Define to any value to allow pcre2test to be linked with libreadline. */
|
||||
#undef SUPPORT_LIBREADLINE
|
||||
|
||||
/* Define to any value to allow pcre2grep to be linked with libz, so that it
|
||||
is able to handle .gz files. */
|
||||
#undef SUPPORT_LIBZ
|
||||
|
||||
/* Define to any value to enable callout script support in pcre2grep. */
|
||||
#undef SUPPORT_PCRE2GREP_CALLOUT
|
||||
|
||||
/* Define to any value to enable fork support in pcre2grep callout scripts.
|
||||
This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined.
|
||||
*/
|
||||
#undef SUPPORT_PCRE2GREP_CALLOUT_FORK
|
||||
|
||||
/* Define to any value to enable JIT support in pcre2grep. Note that this will
|
||||
have no effect unless SUPPORT_JIT is also defined. */
|
||||
#undef SUPPORT_PCRE2GREP_JIT
|
||||
|
||||
/* Define to any value to enable the 16 bit PCRE2 library. */
|
||||
#undef SUPPORT_PCRE2_16
|
||||
|
||||
/* Define to any value to enable the 32 bit PCRE2 library. */
|
||||
#undef SUPPORT_PCRE2_32
|
||||
|
||||
/* Define to any value to enable the 8 bit PCRE2 library. */
|
||||
#undef SUPPORT_PCRE2_8
|
||||
|
||||
/* Define to any value to enable support for Unicode and UTF encoding. This
|
||||
will work even in an EBCDIC environment, but it is incompatible with the
|
||||
EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
|
||||
ASCII/Unicode, but not both at once. */
|
||||
#undef SUPPORT_UNICODE
|
||||
|
||||
/* Define to any value for valgrind support to find invalid memory reads. */
|
||||
#undef SUPPORT_VALGRIND
|
||||
|
||||
/* Enable extensions on AIX 3, Interix. */
|
||||
#ifndef _ALL_SOURCE
|
||||
# undef _ALL_SOURCE
|
||||
#endif
|
||||
/* Enable general extensions on macOS. */
|
||||
#ifndef _DARWIN_C_SOURCE
|
||||
# undef _DARWIN_C_SOURCE
|
||||
#endif
|
||||
/* Enable general extensions on Solaris. */
|
||||
#ifndef __EXTENSIONS__
|
||||
# undef __EXTENSIONS__
|
||||
#endif
|
||||
/* Enable GNU extensions on systems that have them. */
|
||||
#ifndef _GNU_SOURCE
|
||||
# undef _GNU_SOURCE
|
||||
#endif
|
||||
/* Enable X/Open compliant socket functions that do not require linking
|
||||
with -lxnet on HP-UX 11.11. */
|
||||
#ifndef _HPUX_ALT_XOPEN_SOCKET_API
|
||||
# undef _HPUX_ALT_XOPEN_SOCKET_API
|
||||
#endif
|
||||
/* Identify the host operating system as Minix.
|
||||
This macro does not affect the system headers' behavior.
|
||||
A future release of Autoconf may stop defining this macro. */
|
||||
#ifndef _MINIX
|
||||
# undef _MINIX
|
||||
#endif
|
||||
/* Enable general extensions on NetBSD.
|
||||
Enable NetBSD compatibility extensions on Minix. */
|
||||
#ifndef _NETBSD_SOURCE
|
||||
# undef _NETBSD_SOURCE
|
||||
#endif
|
||||
/* Enable OpenBSD compatibility extensions on NetBSD.
|
||||
Oddly enough, this does nothing on OpenBSD. */
|
||||
#ifndef _OPENBSD_SOURCE
|
||||
# undef _OPENBSD_SOURCE
|
||||
#endif
|
||||
/* Define to 1 if needed for POSIX-compatible behavior. */
|
||||
#ifndef _POSIX_SOURCE
|
||||
# undef _POSIX_SOURCE
|
||||
#endif
|
||||
/* Define to 2 if needed for POSIX-compatible behavior. */
|
||||
#ifndef _POSIX_1_SOURCE
|
||||
# undef _POSIX_1_SOURCE
|
||||
#endif
|
||||
/* Enable POSIX-compatible threading on Solaris. */
|
||||
#ifndef _POSIX_PTHREAD_SEMANTICS
|
||||
# undef _POSIX_PTHREAD_SEMANTICS
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TS 18661-5:2014. */
|
||||
#ifndef __STDC_WANT_IEC_60559_ATTRIBS_EXT__
|
||||
# undef __STDC_WANT_IEC_60559_ATTRIBS_EXT__
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TS 18661-1:2014. */
|
||||
#ifndef __STDC_WANT_IEC_60559_BFP_EXT__
|
||||
# undef __STDC_WANT_IEC_60559_BFP_EXT__
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TS 18661-2:2015. */
|
||||
#ifndef __STDC_WANT_IEC_60559_DFP_EXT__
|
||||
# undef __STDC_WANT_IEC_60559_DFP_EXT__
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TS 18661-4:2015. */
|
||||
#ifndef __STDC_WANT_IEC_60559_FUNCS_EXT__
|
||||
# undef __STDC_WANT_IEC_60559_FUNCS_EXT__
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TS 18661-3:2015. */
|
||||
#ifndef __STDC_WANT_IEC_60559_TYPES_EXT__
|
||||
# undef __STDC_WANT_IEC_60559_TYPES_EXT__
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TR 24731-2:2010. */
|
||||
#ifndef __STDC_WANT_LIB_EXT2__
|
||||
# undef __STDC_WANT_LIB_EXT2__
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC 24747:2009. */
|
||||
#ifndef __STDC_WANT_MATH_SPEC_FUNCS__
|
||||
# undef __STDC_WANT_MATH_SPEC_FUNCS__
|
||||
#endif
|
||||
/* Enable extensions on HP NonStop. */
|
||||
#ifndef _TANDEM_SOURCE
|
||||
# undef _TANDEM_SOURCE
|
||||
#endif
|
||||
/* Enable X/Open extensions. Define to 500 only if necessary
|
||||
to make mbstate_t available. */
|
||||
#ifndef _XOPEN_SOURCE
|
||||
# undef _XOPEN_SOURCE
|
||||
#endif
|
||||
|
||||
|
||||
/* Version number of package */
|
||||
#undef VERSION
|
||||
|
||||
/* Number of bits in a file offset, on hosts where this is settable. */
|
||||
#undef _FILE_OFFSET_BITS
|
||||
|
||||
/* Define for large files, on AIX-style hosts. */
|
||||
#undef _LARGE_FILES
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
#undef const
|
||||
|
||||
/* Define to the type of a signed integer type of width exactly 64 bits if
|
||||
such a type exists and the standard includes do not define it. */
|
||||
#undef int64_t
|
||||
|
||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
||||
#undef size_t
|
||||
@@ -1,1069 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This is the public header file for the PCRE library, second API, to be
|
||||
#included by applications that call PCRE2 functions.
|
||||
|
||||
Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef PCRE2_H_IDEMPOTENT_GUARD
|
||||
#define PCRE2_H_IDEMPOTENT_GUARD
|
||||
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 45
|
||||
#define PCRE2_PRERELEASE
|
||||
#define PCRE2_DATE 2025-02-05
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
export setting is defined in pcre2_internal.h, which includes this file. So we
|
||||
don't change existing definitions of PCRE2_EXP_DECL. */
|
||||
|
||||
#if defined(_WIN32) && !defined(PCRE2_STATIC)
|
||||
# ifndef PCRE2_EXP_DECL
|
||||
# define PCRE2_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCRE2_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCRE2_EXP_DECL extern "C"
|
||||
# else
|
||||
# define PCRE2_EXP_DECL extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* When compiling with the MSVC compiler, it is sometimes necessary to include
|
||||
a "calling convention" before exported function names. (This is secondhand
|
||||
information; I know nothing about MSVC myself). For example, something like
|
||||
|
||||
void __cdecl function(....)
|
||||
|
||||
might be needed. In order so make this easy, all the exported functions have
|
||||
PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
|
||||
set, we ensure here that it has no effect. */
|
||||
|
||||
#ifndef PCRE2_CALL_CONVENTION
|
||||
#define PCRE2_CALL_CONVENTION
|
||||
#endif
|
||||
|
||||
/* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and
|
||||
uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do
|
||||
not have stdint.h, which is why we use inttypes.h, which according to the C
|
||||
standard is a superset of stdint.h. If inttypes.h is not available the build
|
||||
will break and the relevant values must be provided by some other means. */
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
/* Allow for C++ users compiling this directly. */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* The following option bits can be passed to pcre2_compile(), pcre2_match(),
|
||||
or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it
|
||||
is passed. Put these bits at the most significant end of the options word so
|
||||
others can be added next to them */
|
||||
|
||||
#define PCRE2_ANCHORED 0x80000000u
|
||||
#define PCRE2_NO_UTF_CHECK 0x40000000u
|
||||
#define PCRE2_ENDANCHORED 0x20000000u
|
||||
|
||||
/* The following option bits can be passed only to pcre2_compile(). However,
|
||||
they may affect compilation, JIT compilation, and/or interpretive execution.
|
||||
The following tags indicate which:
|
||||
|
||||
C alters what is compiled by pcre2_compile()
|
||||
J alters what is compiled by pcre2_jit_compile()
|
||||
M is inspected during pcre2_match() execution
|
||||
D is inspected during pcre2_dfa_match() execution
|
||||
*/
|
||||
|
||||
#define PCRE2_ALLOW_EMPTY_CLASS 0x00000001u /* C */
|
||||
#define PCRE2_ALT_BSUX 0x00000002u /* C */
|
||||
#define PCRE2_AUTO_CALLOUT 0x00000004u /* C */
|
||||
#define PCRE2_CASELESS 0x00000008u /* C */
|
||||
#define PCRE2_DOLLAR_ENDONLY 0x00000010u /* J M D */
|
||||
#define PCRE2_DOTALL 0x00000020u /* C */
|
||||
#define PCRE2_DUPNAMES 0x00000040u /* C */
|
||||
#define PCRE2_EXTENDED 0x00000080u /* C */
|
||||
#define PCRE2_FIRSTLINE 0x00000100u /* J M D */
|
||||
#define PCRE2_MATCH_UNSET_BACKREF 0x00000200u /* C J M */
|
||||
#define PCRE2_MULTILINE 0x00000400u /* C */
|
||||
#define PCRE2_NEVER_UCP 0x00000800u /* C */
|
||||
#define PCRE2_NEVER_UTF 0x00001000u /* C */
|
||||
#define PCRE2_NO_AUTO_CAPTURE 0x00002000u /* C */
|
||||
#define PCRE2_NO_AUTO_POSSESS 0x00004000u /* C */
|
||||
#define PCRE2_NO_DOTSTAR_ANCHOR 0x00008000u /* C */
|
||||
#define PCRE2_NO_START_OPTIMIZE 0x00010000u /* J M D */
|
||||
#define PCRE2_UCP 0x00020000u /* C J M D */
|
||||
#define PCRE2_UNGREEDY 0x00040000u /* C */
|
||||
#define PCRE2_UTF 0x00080000u /* C J M D */
|
||||
#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */
|
||||
#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */
|
||||
#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */
|
||||
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
|
||||
#define PCRE2_EXTENDED_MORE 0x01000000u /* C */
|
||||
#define PCRE2_LITERAL 0x02000000u /* C */
|
||||
#define PCRE2_MATCH_INVALID_UTF 0x04000000u /* J M D */
|
||||
#define PCRE2_ALT_EXTENDED_CLASS 0x08000000u /* C */
|
||||
|
||||
/* An additional compile options word is available in the compile context. */
|
||||
|
||||
#define PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES 0x00000001u /* C */
|
||||
#define PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL 0x00000002u /* C */
|
||||
#define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */
|
||||
#define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */
|
||||
#define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */
|
||||
#define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */
|
||||
#define PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK 0x00000040u /* C */
|
||||
#define PCRE2_EXTRA_CASELESS_RESTRICT 0x00000080u /* C */
|
||||
#define PCRE2_EXTRA_ASCII_BSD 0x00000100u /* C */
|
||||
#define PCRE2_EXTRA_ASCII_BSS 0x00000200u /* C */
|
||||
#define PCRE2_EXTRA_ASCII_BSW 0x00000400u /* C */
|
||||
#define PCRE2_EXTRA_ASCII_POSIX 0x00000800u /* C */
|
||||
#define PCRE2_EXTRA_ASCII_DIGIT 0x00001000u /* C */
|
||||
#define PCRE2_EXTRA_PYTHON_OCTAL 0x00002000u /* C */
|
||||
#define PCRE2_EXTRA_NO_BS0 0x00004000u /* C */
|
||||
#define PCRE2_EXTRA_NEVER_CALLOUT 0x00008000u /* C */
|
||||
#define PCRE2_EXTRA_TURKISH_CASING 0x00010000u /* C */
|
||||
|
||||
/* These are for pcre2_jit_compile(). */
|
||||
|
||||
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
|
||||
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
|
||||
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
|
||||
#define PCRE2_JIT_INVALID_UTF 0x00000100u
|
||||
#define PCRE2_JIT_TEST_ALLOC 0x00000200u
|
||||
|
||||
/* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and
|
||||
pcre2_substitute(). Some are allowed only for one of the functions, and in
|
||||
these cases it is noted below. Note that PCRE2_ANCHORED, PCRE2_ENDANCHORED and
|
||||
PCRE2_NO_UTF_CHECK can also be passed to these functions (though
|
||||
pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */
|
||||
|
||||
#define PCRE2_NOTBOL 0x00000001u
|
||||
#define PCRE2_NOTEOL 0x00000002u
|
||||
#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
|
||||
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
|
||||
#define PCRE2_PARTIAL_SOFT 0x00000010u
|
||||
#define PCRE2_PARTIAL_HARD 0x00000020u
|
||||
#define PCRE2_DFA_RESTART 0x00000040u /* pcre2_dfa_match() only */
|
||||
#define PCRE2_DFA_SHORTEST 0x00000080u /* pcre2_dfa_match() only */
|
||||
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */
|
||||
#define PCRE2_NO_JIT 0x00002000u /* not for pcre2_dfa_match() */
|
||||
#define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u
|
||||
#define PCRE2_SUBSTITUTE_LITERAL 0x00008000u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_MATCHED 0x00010000u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_REPLACEMENT_ONLY 0x00020000u /* pcre2_substitute() only */
|
||||
#define PCRE2_DISABLE_RECURSELOOP_CHECK 0x00040000u /* not for pcre2_dfa_match() or pcre2_jit_match() */
|
||||
|
||||
/* Options for pcre2_pattern_convert(). */
|
||||
|
||||
#define PCRE2_CONVERT_UTF 0x00000001u
|
||||
#define PCRE2_CONVERT_NO_UTF_CHECK 0x00000002u
|
||||
#define PCRE2_CONVERT_POSIX_BASIC 0x00000004u
|
||||
#define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u
|
||||
#define PCRE2_CONVERT_GLOB 0x00000010u
|
||||
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000030u
|
||||
#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000050u
|
||||
|
||||
/* Newline and \R settings, for use in compile contexts. The newline values
|
||||
must be kept in step with values set in config.h and both sets must all be
|
||||
greater than zero. */
|
||||
|
||||
#define PCRE2_NEWLINE_CR 1
|
||||
#define PCRE2_NEWLINE_LF 2
|
||||
#define PCRE2_NEWLINE_CRLF 3
|
||||
#define PCRE2_NEWLINE_ANY 4
|
||||
#define PCRE2_NEWLINE_ANYCRLF 5
|
||||
#define PCRE2_NEWLINE_NUL 6
|
||||
|
||||
#define PCRE2_BSR_UNICODE 1
|
||||
#define PCRE2_BSR_ANYCRLF 2
|
||||
|
||||
/* Error codes for pcre2_compile(). Some of these are also used by
|
||||
pcre2_pattern_convert(). */
|
||||
|
||||
#define PCRE2_ERROR_END_BACKSLASH 101
|
||||
#define PCRE2_ERROR_END_BACKSLASH_C 102
|
||||
#define PCRE2_ERROR_UNKNOWN_ESCAPE 103
|
||||
#define PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER 104
|
||||
#define PCRE2_ERROR_QUANTIFIER_TOO_BIG 105
|
||||
#define PCRE2_ERROR_MISSING_SQUARE_BRACKET 106
|
||||
#define PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS 107
|
||||
#define PCRE2_ERROR_CLASS_RANGE_ORDER 108
|
||||
#define PCRE2_ERROR_QUANTIFIER_INVALID 109
|
||||
#define PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT 110
|
||||
#define PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY 111
|
||||
#define PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS 112
|
||||
#define PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING 113
|
||||
#define PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS 114
|
||||
#define PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE 115
|
||||
#define PCRE2_ERROR_NULL_PATTERN 116
|
||||
#define PCRE2_ERROR_BAD_OPTIONS 117
|
||||
#define PCRE2_ERROR_MISSING_COMMENT_CLOSING 118
|
||||
#define PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP 119
|
||||
#define PCRE2_ERROR_PATTERN_TOO_LARGE 120
|
||||
#define PCRE2_ERROR_HEAP_FAILED 121
|
||||
#define PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS 122
|
||||
#define PCRE2_ERROR_INTERNAL_CODE_OVERFLOW 123
|
||||
#define PCRE2_ERROR_MISSING_CONDITION_CLOSING 124
|
||||
#define PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH 125
|
||||
#define PCRE2_ERROR_ZERO_RELATIVE_REFERENCE 126
|
||||
#define PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES 127
|
||||
#define PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED 128
|
||||
#define PCRE2_ERROR_BAD_RELATIVE_REFERENCE 129
|
||||
#define PCRE2_ERROR_UNKNOWN_POSIX_CLASS 130
|
||||
#define PCRE2_ERROR_INTERNAL_STUDY_ERROR 131
|
||||
#define PCRE2_ERROR_UNICODE_NOT_SUPPORTED 132
|
||||
#define PCRE2_ERROR_PARENTHESES_STACK_CHECK 133
|
||||
#define PCRE2_ERROR_CODE_POINT_TOO_BIG 134
|
||||
#define PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED 135
|
||||
#define PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C 136
|
||||
#define PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE 137
|
||||
#define PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG 138
|
||||
#define PCRE2_ERROR_MISSING_CALLOUT_CLOSING 139
|
||||
#define PCRE2_ERROR_ESCAPE_INVALID_IN_VERB 140
|
||||
#define PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P 141
|
||||
#define PCRE2_ERROR_MISSING_NAME_TERMINATOR 142
|
||||
#define PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME 143
|
||||
#define PCRE2_ERROR_INVALID_SUBPATTERN_NAME 144
|
||||
#define PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE 145
|
||||
#define PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY 146
|
||||
#define PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY 147
|
||||
#define PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG 148
|
||||
#define PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS 149
|
||||
#define PCRE2_ERROR_CLASS_INVALID_RANGE 150
|
||||
#define PCRE2_ERROR_OCTAL_BYTE_TOO_BIG 151
|
||||
#define PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE 152
|
||||
#define PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN 153
|
||||
#define PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES 154
|
||||
#define PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE 155
|
||||
#define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE 156
|
||||
#define PCRE2_ERROR_BACKSLASH_G_SYNTAX 157
|
||||
#define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158
|
||||
/* Error 159 is obsolete and should now never occur */
|
||||
#define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED 159
|
||||
#define PCRE2_ERROR_VERB_UNKNOWN 160
|
||||
#define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG 161
|
||||
#define PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED 162
|
||||
#define PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW 163
|
||||
#define PCRE2_ERROR_INVALID_OCTAL 164
|
||||
#define PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH 165
|
||||
#define PCRE2_ERROR_MARK_MISSING_ARGUMENT 166
|
||||
#define PCRE2_ERROR_INVALID_HEXADECIMAL 167
|
||||
#define PCRE2_ERROR_BACKSLASH_C_SYNTAX 168
|
||||
#define PCRE2_ERROR_BACKSLASH_K_SYNTAX 169
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS 170
|
||||
#define PCRE2_ERROR_BACKSLASH_N_IN_CLASS 171
|
||||
#define PCRE2_ERROR_CALLOUT_STRING_TOO_LONG 172
|
||||
#define PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT 173
|
||||
#define PCRE2_ERROR_UTF_IS_DISABLED 174
|
||||
#define PCRE2_ERROR_UCP_IS_DISABLED 175
|
||||
#define PCRE2_ERROR_VERB_NAME_TOO_LONG 176
|
||||
#define PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG 177
|
||||
#define PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS 178
|
||||
#define PCRE2_ERROR_VERSION_CONDITION_SYNTAX 179
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS 180
|
||||
#define PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER 181
|
||||
#define PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER 182
|
||||
#define PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED 183
|
||||
#define PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP 184
|
||||
#define PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED 185
|
||||
#define PCRE2_ERROR_PATTERN_TOO_COMPLICATED 186
|
||||
#define PCRE2_ERROR_LOOKBEHIND_TOO_LONG 187
|
||||
#define PCRE2_ERROR_PATTERN_STRING_TOO_LONG 188
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE 189
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190
|
||||
#define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191
|
||||
#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192
|
||||
#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193
|
||||
#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194
|
||||
#define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195
|
||||
#define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196
|
||||
#define PCRE2_ERROR_TOO_MANY_CAPTURES 197
|
||||
#define PCRE2_ERROR_MISSING_OCTAL_DIGIT 198
|
||||
#define PCRE2_ERROR_BACKSLASH_K_IN_LOOKAROUND 199
|
||||
#define PCRE2_ERROR_MAX_VAR_LOOKBEHIND_EXCEEDED 200
|
||||
#define PCRE2_ERROR_PATTERN_COMPILED_SIZE_TOO_BIG 201
|
||||
#define PCRE2_ERROR_OVERSIZE_PYTHON_OCTAL 202
|
||||
#define PCRE2_ERROR_CALLOUT_CALLER_DISABLED 203
|
||||
#define PCRE2_ERROR_EXTRA_CASING_REQUIRES_UNICODE 204
|
||||
#define PCRE2_ERROR_TURKISH_CASING_REQUIRES_UTF 205
|
||||
#define PCRE2_ERROR_EXTRA_CASING_INCOMPATIBLE 206
|
||||
#define PCRE2_ERROR_ECLASS_NEST_TOO_DEEP 207
|
||||
#define PCRE2_ERROR_ECLASS_INVALID_OPERATOR 208
|
||||
#define PCRE2_ERROR_ECLASS_UNEXPECTED_OPERATOR 209
|
||||
#define PCRE2_ERROR_ECLASS_EXPECTED_OPERAND 210
|
||||
#define PCRE2_ERROR_ECLASS_MIXED_OPERATORS 211
|
||||
#define PCRE2_ERROR_ECLASS_HINT_SQUARE_BRACKET 212
|
||||
#define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_EXPR 213
|
||||
#define PCRE2_ERROR_PERL_ECLASS_EMPTY_EXPR 214
|
||||
#define PCRE2_ERROR_PERL_ECLASS_MISSING_CLOSE 215
|
||||
#define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_CHAR 216
|
||||
|
||||
/* "Expected" matching error codes: no match and partial match. */
|
||||
|
||||
#define PCRE2_ERROR_NOMATCH (-1)
|
||||
#define PCRE2_ERROR_PARTIAL (-2)
|
||||
|
||||
/* Error codes for UTF-8 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF8_ERR1 (-3)
|
||||
#define PCRE2_ERROR_UTF8_ERR2 (-4)
|
||||
#define PCRE2_ERROR_UTF8_ERR3 (-5)
|
||||
#define PCRE2_ERROR_UTF8_ERR4 (-6)
|
||||
#define PCRE2_ERROR_UTF8_ERR5 (-7)
|
||||
#define PCRE2_ERROR_UTF8_ERR6 (-8)
|
||||
#define PCRE2_ERROR_UTF8_ERR7 (-9)
|
||||
#define PCRE2_ERROR_UTF8_ERR8 (-10)
|
||||
#define PCRE2_ERROR_UTF8_ERR9 (-11)
|
||||
#define PCRE2_ERROR_UTF8_ERR10 (-12)
|
||||
#define PCRE2_ERROR_UTF8_ERR11 (-13)
|
||||
#define PCRE2_ERROR_UTF8_ERR12 (-14)
|
||||
#define PCRE2_ERROR_UTF8_ERR13 (-15)
|
||||
#define PCRE2_ERROR_UTF8_ERR14 (-16)
|
||||
#define PCRE2_ERROR_UTF8_ERR15 (-17)
|
||||
#define PCRE2_ERROR_UTF8_ERR16 (-18)
|
||||
#define PCRE2_ERROR_UTF8_ERR17 (-19)
|
||||
#define PCRE2_ERROR_UTF8_ERR18 (-20)
|
||||
#define PCRE2_ERROR_UTF8_ERR19 (-21)
|
||||
#define PCRE2_ERROR_UTF8_ERR20 (-22)
|
||||
#define PCRE2_ERROR_UTF8_ERR21 (-23)
|
||||
|
||||
/* Error codes for UTF-16 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF16_ERR1 (-24)
|
||||
#define PCRE2_ERROR_UTF16_ERR2 (-25)
|
||||
#define PCRE2_ERROR_UTF16_ERR3 (-26)
|
||||
|
||||
/* Error codes for UTF-32 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF32_ERR1 (-27)
|
||||
#define PCRE2_ERROR_UTF32_ERR2 (-28)
|
||||
|
||||
/* Miscellaneous error codes for pcre2[_dfa]_match(), substring extraction
|
||||
functions, context functions, and serializing functions. They are in numerical
|
||||
order. Originally they were in alphabetical order too, but now that PCRE2 is
|
||||
released, the numbers must not be changed. */
|
||||
|
||||
#define PCRE2_ERROR_BADDATA (-29)
|
||||
#define PCRE2_ERROR_MIXEDTABLES (-30) /* Name was changed */
|
||||
#define PCRE2_ERROR_BADMAGIC (-31)
|
||||
#define PCRE2_ERROR_BADMODE (-32)
|
||||
#define PCRE2_ERROR_BADOFFSET (-33)
|
||||
#define PCRE2_ERROR_BADOPTION (-34)
|
||||
#define PCRE2_ERROR_BADREPLACEMENT (-35)
|
||||
#define PCRE2_ERROR_BADUTFOFFSET (-36)
|
||||
#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
|
||||
#define PCRE2_ERROR_DFA_BADRESTART (-38)
|
||||
#define PCRE2_ERROR_DFA_RECURSE (-39)
|
||||
#define PCRE2_ERROR_DFA_UCOND (-40)
|
||||
#define PCRE2_ERROR_DFA_UFUNC (-41)
|
||||
#define PCRE2_ERROR_DFA_UITEM (-42)
|
||||
#define PCRE2_ERROR_DFA_WSSIZE (-43)
|
||||
#define PCRE2_ERROR_INTERNAL (-44)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-45)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-46)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-47)
|
||||
#define PCRE2_ERROR_NOMEMORY (-48)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-49)
|
||||
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-50)
|
||||
#define PCRE2_ERROR_NULL (-51)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-52)
|
||||
#define PCRE2_ERROR_DEPTHLIMIT (-53)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-53) /* Obsolete synonym */
|
||||
#define PCRE2_ERROR_UNAVAILABLE (-54)
|
||||
#define PCRE2_ERROR_UNSET (-55)
|
||||
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
|
||||
#define PCRE2_ERROR_BADREPESCAPE (-57)
|
||||
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
|
||||
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
|
||||
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
|
||||
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
|
||||
#define PCRE2_ERROR_HEAPLIMIT (-63)
|
||||
#define PCRE2_ERROR_CONVERT_SYNTAX (-64)
|
||||
#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65)
|
||||
#define PCRE2_ERROR_DFA_UINVALID_UTF (-66)
|
||||
#define PCRE2_ERROR_INVALIDOFFSET (-67)
|
||||
#define PCRE2_ERROR_JIT_UNSUPPORTED (-68)
|
||||
#define PCRE2_ERROR_REPLACECASE (-69)
|
||||
#define PCRE2_ERROR_TOOLARGEREPLACE (-70)
|
||||
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
#define PCRE2_INFO_ALLOPTIONS 0
|
||||
#define PCRE2_INFO_ARGOPTIONS 1
|
||||
#define PCRE2_INFO_BACKREFMAX 2
|
||||
#define PCRE2_INFO_BSR 3
|
||||
#define PCRE2_INFO_CAPTURECOUNT 4
|
||||
#define PCRE2_INFO_FIRSTCODEUNIT 5
|
||||
#define PCRE2_INFO_FIRSTCODETYPE 6
|
||||
#define PCRE2_INFO_FIRSTBITMAP 7
|
||||
#define PCRE2_INFO_HASCRORLF 8
|
||||
#define PCRE2_INFO_JCHANGED 9
|
||||
#define PCRE2_INFO_JITSIZE 10
|
||||
#define PCRE2_INFO_LASTCODEUNIT 11
|
||||
#define PCRE2_INFO_LASTCODETYPE 12
|
||||
#define PCRE2_INFO_MATCHEMPTY 13
|
||||
#define PCRE2_INFO_MATCHLIMIT 14
|
||||
#define PCRE2_INFO_MAXLOOKBEHIND 15
|
||||
#define PCRE2_INFO_MINLENGTH 16
|
||||
#define PCRE2_INFO_NAMECOUNT 17
|
||||
#define PCRE2_INFO_NAMEENTRYSIZE 18
|
||||
#define PCRE2_INFO_NAMETABLE 19
|
||||
#define PCRE2_INFO_NEWLINE 20
|
||||
#define PCRE2_INFO_DEPTHLIMIT 21
|
||||
#define PCRE2_INFO_RECURSIONLIMIT 21 /* Obsolete synonym */
|
||||
#define PCRE2_INFO_SIZE 22
|
||||
#define PCRE2_INFO_HASBACKSLASHC 23
|
||||
#define PCRE2_INFO_FRAMESIZE 24
|
||||
#define PCRE2_INFO_HEAPLIMIT 25
|
||||
#define PCRE2_INFO_EXTRAOPTIONS 26
|
||||
|
||||
/* Request types for pcre2_config(). */
|
||||
|
||||
#define PCRE2_CONFIG_BSR 0
|
||||
#define PCRE2_CONFIG_JIT 1
|
||||
#define PCRE2_CONFIG_JITTARGET 2
|
||||
#define PCRE2_CONFIG_LINKSIZE 3
|
||||
#define PCRE2_CONFIG_MATCHLIMIT 4
|
||||
#define PCRE2_CONFIG_NEWLINE 5
|
||||
#define PCRE2_CONFIG_PARENSLIMIT 6
|
||||
#define PCRE2_CONFIG_DEPTHLIMIT 7
|
||||
#define PCRE2_CONFIG_RECURSIONLIMIT 7 /* Obsolete synonym */
|
||||
#define PCRE2_CONFIG_STACKRECURSE 8 /* Obsolete */
|
||||
#define PCRE2_CONFIG_UNICODE 9
|
||||
#define PCRE2_CONFIG_UNICODE_VERSION 10
|
||||
#define PCRE2_CONFIG_VERSION 11
|
||||
#define PCRE2_CONFIG_HEAPLIMIT 12
|
||||
#define PCRE2_CONFIG_NEVER_BACKSLASH_C 13
|
||||
#define PCRE2_CONFIG_COMPILED_WIDTHS 14
|
||||
#define PCRE2_CONFIG_TABLES_LENGTH 15
|
||||
|
||||
/* Optimization directives for pcre2_set_optimize().
|
||||
For binary compatibility, only add to this list; do not renumber. */
|
||||
|
||||
#define PCRE2_OPTIMIZATION_NONE 0
|
||||
#define PCRE2_OPTIMIZATION_FULL 1
|
||||
|
||||
#define PCRE2_AUTO_POSSESS 64
|
||||
#define PCRE2_AUTO_POSSESS_OFF 65
|
||||
#define PCRE2_DOTSTAR_ANCHOR 66
|
||||
#define PCRE2_DOTSTAR_ANCHOR_OFF 67
|
||||
#define PCRE2_START_OPTIMIZE 68
|
||||
#define PCRE2_START_OPTIMIZE_OFF 69
|
||||
|
||||
/* Types used in pcre2_set_substitute_case_callout().
|
||||
|
||||
PCRE2_SUBSTITUTE_CASE_LOWER and PCRE2_SUBSTITUTE_CASE_UPPER are passed to the
|
||||
callout to indicate that the case of the entire callout input should be
|
||||
case-transformed. PCRE2_SUBSTITUTE_CASE_TITLE_FIRST is passed to indicate that
|
||||
only the first character or glyph should be transformed to Unicode titlecase,
|
||||
and the rest to lowercase. */
|
||||
|
||||
#define PCRE2_SUBSTITUTE_CASE_LOWER 1
|
||||
#define PCRE2_SUBSTITUTE_CASE_UPPER 2
|
||||
#define PCRE2_SUBSTITUTE_CASE_TITLE_FIRST 3
|
||||
|
||||
/* Types for code units in patterns and subject strings. */
|
||||
|
||||
typedef uint8_t PCRE2_UCHAR8;
|
||||
typedef uint16_t PCRE2_UCHAR16;
|
||||
typedef uint32_t PCRE2_UCHAR32;
|
||||
|
||||
typedef const PCRE2_UCHAR8 *PCRE2_SPTR8;
|
||||
typedef const PCRE2_UCHAR16 *PCRE2_SPTR16;
|
||||
typedef const PCRE2_UCHAR32 *PCRE2_SPTR32;
|
||||
|
||||
/* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE2,
|
||||
including pattern offsets for errors and subject offsets after a match. We
|
||||
define special values to indicate zero-terminated strings and unset offsets in
|
||||
the offset vector (ovector). */
|
||||
|
||||
#define PCRE2_SIZE size_t
|
||||
#define PCRE2_SIZE_MAX SIZE_MAX
|
||||
#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0)
|
||||
#define PCRE2_UNSET (~(PCRE2_SIZE)0)
|
||||
|
||||
/* Generic types for opaque structures and JIT callback functions. These
|
||||
declarations are defined in a macro that is expanded for each width later. */
|
||||
|
||||
#define PCRE2_TYPES_LIST \
|
||||
struct pcre2_real_general_context; \
|
||||
typedef struct pcre2_real_general_context pcre2_general_context; \
|
||||
\
|
||||
struct pcre2_real_compile_context; \
|
||||
typedef struct pcre2_real_compile_context pcre2_compile_context; \
|
||||
\
|
||||
struct pcre2_real_match_context; \
|
||||
typedef struct pcre2_real_match_context pcre2_match_context; \
|
||||
\
|
||||
struct pcre2_real_convert_context; \
|
||||
typedef struct pcre2_real_convert_context pcre2_convert_context; \
|
||||
\
|
||||
struct pcre2_real_code; \
|
||||
typedef struct pcre2_real_code pcre2_code; \
|
||||
\
|
||||
struct pcre2_real_match_data; \
|
||||
typedef struct pcre2_real_match_data pcre2_match_data; \
|
||||
\
|
||||
struct pcre2_real_jit_stack; \
|
||||
typedef struct pcre2_real_jit_stack pcre2_jit_stack; \
|
||||
\
|
||||
typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *);
|
||||
|
||||
|
||||
/* The structures for passing out data via callout functions. We use structures
|
||||
so that new fields can be added on the end in future versions, without changing
|
||||
the API of the function, thereby allowing old clients to work without
|
||||
modification. Define the generic versions in a macro; the width-specific
|
||||
versions are generated from this macro below. */
|
||||
|
||||
/* Flags for the callout_flags field. These are cleared after a callout. */
|
||||
|
||||
#define PCRE2_CALLOUT_STARTMATCH 0x00000001u /* Set for each bumpalong */
|
||||
#define PCRE2_CALLOUT_BACKTRACK 0x00000002u /* Set after a backtrack */
|
||||
|
||||
#define PCRE2_STRUCTURE_LIST \
|
||||
typedef struct pcre2_callout_block { \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||
uint32_t capture_top; /* Max current capture */ \
|
||||
uint32_t capture_last; /* Most recently closed capture */ \
|
||||
PCRE2_SIZE *offset_vector; /* The offset vector */ \
|
||||
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
|
||||
PCRE2_SPTR subject; /* The subject being matched */ \
|
||||
PCRE2_SIZE subject_length; /* The length of the subject */ \
|
||||
PCRE2_SIZE start_match; /* Offset to start of this match attempt */ \
|
||||
PCRE2_SIZE current_position; /* Where we currently are in the subject */ \
|
||||
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||
/* ------------------- Added for Version 1 -------------------------- */ \
|
||||
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
|
||||
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||
/* ------------------- Added for Version 2 -------------------------- */ \
|
||||
uint32_t callout_flags; /* See above for list */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_callout_block; \
|
||||
\
|
||||
typedef struct pcre2_callout_enumerate_block { \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
|
||||
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_callout_enumerate_block; \
|
||||
\
|
||||
typedef struct pcre2_substitute_callout_block { \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
PCRE2_SPTR input; /* Pointer to input subject string */ \
|
||||
PCRE2_SPTR output; /* Pointer to output buffer */ \
|
||||
PCRE2_SIZE output_offsets[2]; /* Changed portion of the output */ \
|
||||
PCRE2_SIZE *ovector; /* Pointer to current ovector */ \
|
||||
uint32_t oveccount; /* Count of pairs set in ovector */ \
|
||||
uint32_t subscount; /* Substitution number */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_substitute_callout_block;
|
||||
|
||||
|
||||
/* List the generic forms of all other functions in macros, which will be
|
||||
expanded for each width below. Start with functions that give general
|
||||
information. */
|
||||
|
||||
#define PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_config(uint32_t, void *);
|
||||
|
||||
|
||||
/* Functions for manipulating contexts. */
|
||||
|
||||
#define PCRE2_GENERAL_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_general_context_copy(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_general_context_create(void *(*)(size_t, void *), \
|
||||
void (*)(void *, void *), void *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_general_context_free(pcre2_general_context *);
|
||||
|
||||
#define PCRE2_COMPILE_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_compile_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_compile_context_copy(pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL pcre2_compile_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_compile_context_create(pcre2_general_context *);\
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_compile_context_free(pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_character_tables(pcre2_compile_context *, const uint8_t *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_max_pattern_compiled_length(pcre2_compile_context *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_max_varlookbehind(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_newline(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_compile_recursion_guard(pcre2_compile_context *, \
|
||||
int (*)(uint32_t, void *), void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_optimize(pcre2_compile_context *, uint32_t);
|
||||
|
||||
#define PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_context_copy(pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_context_create(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_context_free(pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_callout(pcre2_match_context *, \
|
||||
int (*)(pcre2_callout_block *, void *), void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_substitute_callout(pcre2_match_context *, \
|
||||
int (*)(pcre2_substitute_callout_block *, void *), void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_substitute_case_callout(pcre2_match_context *, \
|
||||
PCRE2_SIZE (*)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE, int, \
|
||||
void *), \
|
||||
void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_offset_limit(pcre2_match_context *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_recursion_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_recursion_memory_management(pcre2_match_context *, \
|
||||
void *(*)(size_t, void *), void (*)(void *, void *), void *);
|
||||
|
||||
#define PCRE2_CONVERT_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_convert_context_copy(pcre2_convert_context *); \
|
||||
PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_convert_context_create(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_convert_context_free(pcre2_convert_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_glob_separator(pcre2_convert_context *, uint32_t);
|
||||
|
||||
|
||||
/* Functions concerned with compiling a pattern to PCRE internal code. */
|
||||
|
||||
#define PCRE2_COMPILE_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
|
||||
pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \
|
||||
pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_code_free(pcre2_code *); \
|
||||
PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
|
||||
pcre2_code_copy(const pcre2_code *); \
|
||||
PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
|
||||
pcre2_code_copy_with_tables(const pcre2_code *);
|
||||
|
||||
|
||||
/* Functions that give information about a compiled pattern. */
|
||||
|
||||
#define PCRE2_PATTERN_INFO_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_pattern_info(const pcre2_code *, uint32_t, void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_callout_enumerate(const pcre2_code *, \
|
||||
int (*)(pcre2_callout_enumerate_block *, void *), void *);
|
||||
|
||||
|
||||
/* Functions for running a match and inspecting the result. */
|
||||
|
||||
#define PCRE2_MATCH_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_match_data *PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_data_create(uint32_t, pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL pcre2_match_data *PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_data_create_from_pattern(const pcre2_code *, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *, int *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_data_free(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_mark(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_match_data_size(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_match_data_heapframes_size(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_ovector_count(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE *PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_ovector_pointer(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_startchar(pcre2_match_data *);
|
||||
|
||||
|
||||
/* Convenience functions for handling matched substrings. */
|
||||
|
||||
#define PCRE2_SUBSTRING_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_copy_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR *, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_copy_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR *, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_free(PCRE2_UCHAR *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_get_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR **, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_get_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR **, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_length_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_length_bynumber(pcre2_match_data *, uint32_t, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_nametable_scan(const pcre2_code *, PCRE2_SPTR, PCRE2_SPTR *, \
|
||||
PCRE2_SPTR *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_number_from_name(const pcre2_code *, PCRE2_SPTR); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_list_free(PCRE2_UCHAR **); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **);
|
||||
|
||||
|
||||
/* Functions for serializing / deserializing compiled patterns. */
|
||||
|
||||
#define PCRE2_SERIALIZE_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_encode(const pcre2_code **, int32_t, uint8_t **, \
|
||||
PCRE2_SIZE *, pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_decode(pcre2_code **, int32_t, const uint8_t *, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_get_number_of_codes(const uint8_t *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_free(uint8_t *);
|
||||
|
||||
|
||||
/* Convenience function for match + substitute. */
|
||||
|
||||
#define PCRE2_SUBSTITUTE_FUNCTION \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substitute(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *, PCRE2_SPTR, \
|
||||
PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *);
|
||||
|
||||
|
||||
/* Functions for converting pattern source strings. */
|
||||
|
||||
#define PCRE2_CONVERT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_pattern_convert(PCRE2_SPTR, PCRE2_SIZE, uint32_t, PCRE2_UCHAR **, \
|
||||
PCRE2_SIZE *, pcre2_convert_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_converted_pattern_free(PCRE2_UCHAR *);
|
||||
|
||||
|
||||
/* Functions for JIT processing */
|
||||
|
||||
#define PCRE2_JIT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_compile(pcre2_code *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_free_unused_memory(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL pcre2_jit_stack *PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_stack_create(size_t, size_t, pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_stack_free(pcre2_jit_stack *);
|
||||
|
||||
|
||||
/* Other miscellaneous functions. */
|
||||
|
||||
#define PCRE2_OTHER_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL const uint8_t *PCRE2_CALL_CONVENTION \
|
||||
pcre2_maketables(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_maketables_free(pcre2_general_context *, const uint8_t *);
|
||||
|
||||
/* Define macros that generate width-specific names from generic versions. The
|
||||
three-level macro scheme is necessary to get the macros expanded when we want
|
||||
them to be. First we get the width from PCRE2_LOCAL_WIDTH, which is used for
|
||||
generating three versions of everything below. After that, PCRE2_SUFFIX will be
|
||||
re-defined to use PCRE2_CODE_UNIT_WIDTH, for use when macros such as
|
||||
pcre2_compile are called by application code. */
|
||||
|
||||
#define PCRE2_JOIN(a,b) a ## b
|
||||
#define PCRE2_GLUE(a,b) PCRE2_JOIN(a,b)
|
||||
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a,PCRE2_LOCAL_WIDTH)
|
||||
|
||||
|
||||
/* Data types */
|
||||
|
||||
#define PCRE2_UCHAR PCRE2_SUFFIX(PCRE2_UCHAR)
|
||||
#define PCRE2_SPTR PCRE2_SUFFIX(PCRE2_SPTR)
|
||||
|
||||
#define pcre2_code PCRE2_SUFFIX(pcre2_code_)
|
||||
#define pcre2_jit_callback PCRE2_SUFFIX(pcre2_jit_callback_)
|
||||
#define pcre2_jit_stack PCRE2_SUFFIX(pcre2_jit_stack_)
|
||||
|
||||
#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_)
|
||||
#define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_)
|
||||
#define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_)
|
||||
#define pcre2_real_convert_context PCRE2_SUFFIX(pcre2_real_convert_context_)
|
||||
#define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_)
|
||||
#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_)
|
||||
#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_)
|
||||
|
||||
|
||||
/* Data blocks */
|
||||
|
||||
#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_)
|
||||
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
|
||||
#define pcre2_substitute_callout_block PCRE2_SUFFIX(pcre2_substitute_callout_block_)
|
||||
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
|
||||
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
|
||||
#define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_)
|
||||
#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_)
|
||||
#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_)
|
||||
|
||||
|
||||
/* Functions: the complete list in alphabetical order */
|
||||
|
||||
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
||||
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
|
||||
#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_)
|
||||
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
||||
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
||||
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
||||
#define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_)
|
||||
#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_)
|
||||
#define pcre2_config PCRE2_SUFFIX(pcre2_config_)
|
||||
#define pcre2_convert_context_copy PCRE2_SUFFIX(pcre2_convert_context_copy_)
|
||||
#define pcre2_convert_context_create PCRE2_SUFFIX(pcre2_convert_context_create_)
|
||||
#define pcre2_convert_context_free PCRE2_SUFFIX(pcre2_convert_context_free_)
|
||||
#define pcre2_converted_pattern_free PCRE2_SUFFIX(pcre2_converted_pattern_free_)
|
||||
#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_)
|
||||
#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_)
|
||||
#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_)
|
||||
#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_)
|
||||
#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_)
|
||||
#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_)
|
||||
#define pcre2_get_match_data_heapframes_size PCRE2_SUFFIX(pcre2_get_match_data_heapframes_size_)
|
||||
#define pcre2_get_match_data_size PCRE2_SUFFIX(pcre2_get_match_data_size_)
|
||||
#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
|
||||
#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_)
|
||||
#define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_)
|
||||
#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_)
|
||||
#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_)
|
||||
#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_)
|
||||
#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_)
|
||||
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
|
||||
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
|
||||
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
|
||||
#define pcre2_maketables_free PCRE2_SUFFIX(pcre2_maketables_free_)
|
||||
#define pcre2_match PCRE2_SUFFIX(pcre2_match_)
|
||||
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
|
||||
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)
|
||||
#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_)
|
||||
#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_)
|
||||
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
|
||||
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
|
||||
#define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_)
|
||||
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
|
||||
#define pcre2_serialize_decode PCRE2_SUFFIX(pcre2_serialize_decode_)
|
||||
#define pcre2_serialize_encode PCRE2_SUFFIX(pcre2_serialize_encode_)
|
||||
#define pcre2_serialize_free PCRE2_SUFFIX(pcre2_serialize_free_)
|
||||
#define pcre2_serialize_get_number_of_codes PCRE2_SUFFIX(pcre2_serialize_get_number_of_codes_)
|
||||
#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_)
|
||||
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
|
||||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||
#define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_)
|
||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
|
||||
#define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_)
|
||||
#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
|
||||
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
|
||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||
#define pcre2_set_max_varlookbehind PCRE2_SUFFIX(pcre2_set_max_varlookbehind_)
|
||||
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||
#define pcre2_set_max_pattern_compiled_length PCRE2_SUFFIX(pcre2_set_max_pattern_compiled_length_)
|
||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
|
||||
#define pcre2_set_optimize PCRE2_SUFFIX(pcre2_set_optimize_)
|
||||
#define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_)
|
||||
#define pcre2_set_substitute_case_callout PCRE2_SUFFIX(pcre2_set_substitute_case_callout_)
|
||||
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
|
||||
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
|
||||
#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)
|
||||
#define pcre2_substring_free PCRE2_SUFFIX(pcre2_substring_free_)
|
||||
#define pcre2_substring_get_byname PCRE2_SUFFIX(pcre2_substring_get_byname_)
|
||||
#define pcre2_substring_get_bynumber PCRE2_SUFFIX(pcre2_substring_get_bynumber_)
|
||||
#define pcre2_substring_length_byname PCRE2_SUFFIX(pcre2_substring_length_byname_)
|
||||
#define pcre2_substring_length_bynumber PCRE2_SUFFIX(pcre2_substring_length_bynumber_)
|
||||
#define pcre2_substring_list_get PCRE2_SUFFIX(pcre2_substring_list_get_)
|
||||
#define pcre2_substring_list_free PCRE2_SUFFIX(pcre2_substring_list_free_)
|
||||
#define pcre2_substring_nametable_scan PCRE2_SUFFIX(pcre2_substring_nametable_scan_)
|
||||
#define pcre2_substring_number_from_name PCRE2_SUFFIX(pcre2_substring_number_from_name_)
|
||||
|
||||
/* Keep this old function name for backwards compatibility */
|
||||
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
|
||||
|
||||
/* Keep this obsolete function for backwards compatibility: it is now a noop. */
|
||||
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
|
||||
|
||||
/* Now generate all three sets of width-specific structures and function
|
||||
prototypes. */
|
||||
|
||||
#define PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS \
|
||||
PCRE2_TYPES_LIST \
|
||||
PCRE2_STRUCTURE_LIST \
|
||||
PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||
PCRE2_GENERAL_CONTEXT_FUNCTIONS \
|
||||
PCRE2_COMPILE_CONTEXT_FUNCTIONS \
|
||||
PCRE2_CONVERT_CONTEXT_FUNCTIONS \
|
||||
PCRE2_CONVERT_FUNCTIONS \
|
||||
PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
||||
PCRE2_COMPILE_FUNCTIONS \
|
||||
PCRE2_PATTERN_INFO_FUNCTIONS \
|
||||
PCRE2_MATCH_FUNCTIONS \
|
||||
PCRE2_SUBSTRING_FUNCTIONS \
|
||||
PCRE2_SERIALIZE_FUNCTIONS \
|
||||
PCRE2_SUBSTITUTE_FUNCTION \
|
||||
PCRE2_JIT_FUNCTIONS \
|
||||
PCRE2_OTHER_FUNCTIONS
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 8
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 16
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 32
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
/* Undefine the list macros; they are no longer needed. */
|
||||
|
||||
#undef PCRE2_TYPES_LIST
|
||||
#undef PCRE2_STRUCTURE_LIST
|
||||
#undef PCRE2_GENERAL_INFO_FUNCTIONS
|
||||
#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_CONVERT_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_MATCH_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_COMPILE_FUNCTIONS
|
||||
#undef PCRE2_PATTERN_INFO_FUNCTIONS
|
||||
#undef PCRE2_MATCH_FUNCTIONS
|
||||
#undef PCRE2_SUBSTRING_FUNCTIONS
|
||||
#undef PCRE2_SERIALIZE_FUNCTIONS
|
||||
#undef PCRE2_SUBSTITUTE_FUNCTION
|
||||
#undef PCRE2_JIT_FUNCTIONS
|
||||
#undef PCRE2_OTHER_FUNCTIONS
|
||||
#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
|
||||
/* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine
|
||||
PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make
|
||||
PCRE2_SUFFIX a no-op. Otherwise, generate an error. */
|
||||
|
||||
#undef PCRE2_SUFFIX
|
||||
#ifndef PCRE2_CODE_UNIT_WIDTH
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h.
|
||||
#error Use 8, 16, or 32; or 0 for a multi-width application.
|
||||
#else /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8 || \
|
||||
PCRE2_CODE_UNIT_WIDTH == 16 || \
|
||||
PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH)
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 0
|
||||
#undef PCRE2_JOIN
|
||||
#undef PCRE2_GLUE
|
||||
#define PCRE2_SUFFIX(a) a
|
||||
#else
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32.
|
||||
#endif
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* PCRE2_H_IDEMPOTENT_GUARD */
|
||||
|
||||
/* End of pcre2.h */
|
||||
@@ -1,1069 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This is the public header file for the PCRE library, second API, to be
|
||||
#included by applications that call PCRE2 functions.
|
||||
|
||||
Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef PCRE2_H_IDEMPOTENT_GUARD
|
||||
#define PCRE2_H_IDEMPOTENT_GUARD
|
||||
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE2_MAJOR @PCRE2_MAJOR@
|
||||
#define PCRE2_MINOR @PCRE2_MINOR@
|
||||
#define PCRE2_PRERELEASE @PCRE2_PRERELEASE@
|
||||
#define PCRE2_DATE @PCRE2_DATE@
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
export setting is defined in pcre2_internal.h, which includes this file. So we
|
||||
don't change existing definitions of PCRE2_EXP_DECL. */
|
||||
|
||||
#if defined(_WIN32) && !defined(PCRE2_STATIC)
|
||||
# ifndef PCRE2_EXP_DECL
|
||||
# define PCRE2_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCRE2_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCRE2_EXP_DECL extern "C"
|
||||
# else
|
||||
# define PCRE2_EXP_DECL extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* When compiling with the MSVC compiler, it is sometimes necessary to include
|
||||
a "calling convention" before exported function names. (This is secondhand
|
||||
information; I know nothing about MSVC myself). For example, something like
|
||||
|
||||
void __cdecl function(....)
|
||||
|
||||
might be needed. In order so make this easy, all the exported functions have
|
||||
PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
|
||||
set, we ensure here that it has no effect. */
|
||||
|
||||
#ifndef PCRE2_CALL_CONVENTION
|
||||
#define PCRE2_CALL_CONVENTION
|
||||
#endif
|
||||
|
||||
/* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and
|
||||
uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do
|
||||
not have stdint.h, which is why we use inttypes.h, which according to the C
|
||||
standard is a superset of stdint.h. If inttypes.h is not available the build
|
||||
will break and the relevant values must be provided by some other means. */
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
/* Allow for C++ users compiling this directly. */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* The following option bits can be passed to pcre2_compile(), pcre2_match(),
|
||||
or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it
|
||||
is passed. Put these bits at the most significant end of the options word so
|
||||
others can be added next to them */
|
||||
|
||||
#define PCRE2_ANCHORED 0x80000000u
|
||||
#define PCRE2_NO_UTF_CHECK 0x40000000u
|
||||
#define PCRE2_ENDANCHORED 0x20000000u
|
||||
|
||||
/* The following option bits can be passed only to pcre2_compile(). However,
|
||||
they may affect compilation, JIT compilation, and/or interpretive execution.
|
||||
The following tags indicate which:
|
||||
|
||||
C alters what is compiled by pcre2_compile()
|
||||
J alters what is compiled by pcre2_jit_compile()
|
||||
M is inspected during pcre2_match() execution
|
||||
D is inspected during pcre2_dfa_match() execution
|
||||
*/
|
||||
|
||||
#define PCRE2_ALLOW_EMPTY_CLASS 0x00000001u /* C */
|
||||
#define PCRE2_ALT_BSUX 0x00000002u /* C */
|
||||
#define PCRE2_AUTO_CALLOUT 0x00000004u /* C */
|
||||
#define PCRE2_CASELESS 0x00000008u /* C */
|
||||
#define PCRE2_DOLLAR_ENDONLY 0x00000010u /* J M D */
|
||||
#define PCRE2_DOTALL 0x00000020u /* C */
|
||||
#define PCRE2_DUPNAMES 0x00000040u /* C */
|
||||
#define PCRE2_EXTENDED 0x00000080u /* C */
|
||||
#define PCRE2_FIRSTLINE 0x00000100u /* J M D */
|
||||
#define PCRE2_MATCH_UNSET_BACKREF 0x00000200u /* C J M */
|
||||
#define PCRE2_MULTILINE 0x00000400u /* C */
|
||||
#define PCRE2_NEVER_UCP 0x00000800u /* C */
|
||||
#define PCRE2_NEVER_UTF 0x00001000u /* C */
|
||||
#define PCRE2_NO_AUTO_CAPTURE 0x00002000u /* C */
|
||||
#define PCRE2_NO_AUTO_POSSESS 0x00004000u /* C */
|
||||
#define PCRE2_NO_DOTSTAR_ANCHOR 0x00008000u /* C */
|
||||
#define PCRE2_NO_START_OPTIMIZE 0x00010000u /* J M D */
|
||||
#define PCRE2_UCP 0x00020000u /* C J M D */
|
||||
#define PCRE2_UNGREEDY 0x00040000u /* C */
|
||||
#define PCRE2_UTF 0x00080000u /* C J M D */
|
||||
#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */
|
||||
#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */
|
||||
#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */
|
||||
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
|
||||
#define PCRE2_EXTENDED_MORE 0x01000000u /* C */
|
||||
#define PCRE2_LITERAL 0x02000000u /* C */
|
||||
#define PCRE2_MATCH_INVALID_UTF 0x04000000u /* J M D */
|
||||
#define PCRE2_ALT_EXTENDED_CLASS 0x08000000u /* C */
|
||||
|
||||
/* An additional compile options word is available in the compile context. */
|
||||
|
||||
#define PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES 0x00000001u /* C */
|
||||
#define PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL 0x00000002u /* C */
|
||||
#define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */
|
||||
#define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */
|
||||
#define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */
|
||||
#define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */
|
||||
#define PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK 0x00000040u /* C */
|
||||
#define PCRE2_EXTRA_CASELESS_RESTRICT 0x00000080u /* C */
|
||||
#define PCRE2_EXTRA_ASCII_BSD 0x00000100u /* C */
|
||||
#define PCRE2_EXTRA_ASCII_BSS 0x00000200u /* C */
|
||||
#define PCRE2_EXTRA_ASCII_BSW 0x00000400u /* C */
|
||||
#define PCRE2_EXTRA_ASCII_POSIX 0x00000800u /* C */
|
||||
#define PCRE2_EXTRA_ASCII_DIGIT 0x00001000u /* C */
|
||||
#define PCRE2_EXTRA_PYTHON_OCTAL 0x00002000u /* C */
|
||||
#define PCRE2_EXTRA_NO_BS0 0x00004000u /* C */
|
||||
#define PCRE2_EXTRA_NEVER_CALLOUT 0x00008000u /* C */
|
||||
#define PCRE2_EXTRA_TURKISH_CASING 0x00010000u /* C */
|
||||
|
||||
/* These are for pcre2_jit_compile(). */
|
||||
|
||||
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
|
||||
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
|
||||
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
|
||||
#define PCRE2_JIT_INVALID_UTF 0x00000100u
|
||||
#define PCRE2_JIT_TEST_ALLOC 0x00000200u
|
||||
|
||||
/* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and
|
||||
pcre2_substitute(). Some are allowed only for one of the functions, and in
|
||||
these cases it is noted below. Note that PCRE2_ANCHORED, PCRE2_ENDANCHORED and
|
||||
PCRE2_NO_UTF_CHECK can also be passed to these functions (though
|
||||
pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */
|
||||
|
||||
#define PCRE2_NOTBOL 0x00000001u
|
||||
#define PCRE2_NOTEOL 0x00000002u
|
||||
#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
|
||||
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
|
||||
#define PCRE2_PARTIAL_SOFT 0x00000010u
|
||||
#define PCRE2_PARTIAL_HARD 0x00000020u
|
||||
#define PCRE2_DFA_RESTART 0x00000040u /* pcre2_dfa_match() only */
|
||||
#define PCRE2_DFA_SHORTEST 0x00000080u /* pcre2_dfa_match() only */
|
||||
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */
|
||||
#define PCRE2_NO_JIT 0x00002000u /* not for pcre2_dfa_match() */
|
||||
#define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u
|
||||
#define PCRE2_SUBSTITUTE_LITERAL 0x00008000u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_MATCHED 0x00010000u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_REPLACEMENT_ONLY 0x00020000u /* pcre2_substitute() only */
|
||||
#define PCRE2_DISABLE_RECURSELOOP_CHECK 0x00040000u /* not for pcre2_dfa_match() or pcre2_jit_match() */
|
||||
|
||||
/* Options for pcre2_pattern_convert(). */
|
||||
|
||||
#define PCRE2_CONVERT_UTF 0x00000001u
|
||||
#define PCRE2_CONVERT_NO_UTF_CHECK 0x00000002u
|
||||
#define PCRE2_CONVERT_POSIX_BASIC 0x00000004u
|
||||
#define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u
|
||||
#define PCRE2_CONVERT_GLOB 0x00000010u
|
||||
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000030u
|
||||
#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000050u
|
||||
|
||||
/* Newline and \R settings, for use in compile contexts. The newline values
|
||||
must be kept in step with values set in config.h and both sets must all be
|
||||
greater than zero. */
|
||||
|
||||
#define PCRE2_NEWLINE_CR 1
|
||||
#define PCRE2_NEWLINE_LF 2
|
||||
#define PCRE2_NEWLINE_CRLF 3
|
||||
#define PCRE2_NEWLINE_ANY 4
|
||||
#define PCRE2_NEWLINE_ANYCRLF 5
|
||||
#define PCRE2_NEWLINE_NUL 6
|
||||
|
||||
#define PCRE2_BSR_UNICODE 1
|
||||
#define PCRE2_BSR_ANYCRLF 2
|
||||
|
||||
/* Error codes for pcre2_compile(). Some of these are also used by
|
||||
pcre2_pattern_convert(). */
|
||||
|
||||
#define PCRE2_ERROR_END_BACKSLASH 101
|
||||
#define PCRE2_ERROR_END_BACKSLASH_C 102
|
||||
#define PCRE2_ERROR_UNKNOWN_ESCAPE 103
|
||||
#define PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER 104
|
||||
#define PCRE2_ERROR_QUANTIFIER_TOO_BIG 105
|
||||
#define PCRE2_ERROR_MISSING_SQUARE_BRACKET 106
|
||||
#define PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS 107
|
||||
#define PCRE2_ERROR_CLASS_RANGE_ORDER 108
|
||||
#define PCRE2_ERROR_QUANTIFIER_INVALID 109
|
||||
#define PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT 110
|
||||
#define PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY 111
|
||||
#define PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS 112
|
||||
#define PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING 113
|
||||
#define PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS 114
|
||||
#define PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE 115
|
||||
#define PCRE2_ERROR_NULL_PATTERN 116
|
||||
#define PCRE2_ERROR_BAD_OPTIONS 117
|
||||
#define PCRE2_ERROR_MISSING_COMMENT_CLOSING 118
|
||||
#define PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP 119
|
||||
#define PCRE2_ERROR_PATTERN_TOO_LARGE 120
|
||||
#define PCRE2_ERROR_HEAP_FAILED 121
|
||||
#define PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS 122
|
||||
#define PCRE2_ERROR_INTERNAL_CODE_OVERFLOW 123
|
||||
#define PCRE2_ERROR_MISSING_CONDITION_CLOSING 124
|
||||
#define PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH 125
|
||||
#define PCRE2_ERROR_ZERO_RELATIVE_REFERENCE 126
|
||||
#define PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES 127
|
||||
#define PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED 128
|
||||
#define PCRE2_ERROR_BAD_RELATIVE_REFERENCE 129
|
||||
#define PCRE2_ERROR_UNKNOWN_POSIX_CLASS 130
|
||||
#define PCRE2_ERROR_INTERNAL_STUDY_ERROR 131
|
||||
#define PCRE2_ERROR_UNICODE_NOT_SUPPORTED 132
|
||||
#define PCRE2_ERROR_PARENTHESES_STACK_CHECK 133
|
||||
#define PCRE2_ERROR_CODE_POINT_TOO_BIG 134
|
||||
#define PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED 135
|
||||
#define PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C 136
|
||||
#define PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE 137
|
||||
#define PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG 138
|
||||
#define PCRE2_ERROR_MISSING_CALLOUT_CLOSING 139
|
||||
#define PCRE2_ERROR_ESCAPE_INVALID_IN_VERB 140
|
||||
#define PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P 141
|
||||
#define PCRE2_ERROR_MISSING_NAME_TERMINATOR 142
|
||||
#define PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME 143
|
||||
#define PCRE2_ERROR_INVALID_SUBPATTERN_NAME 144
|
||||
#define PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE 145
|
||||
#define PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY 146
|
||||
#define PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY 147
|
||||
#define PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG 148
|
||||
#define PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS 149
|
||||
#define PCRE2_ERROR_CLASS_INVALID_RANGE 150
|
||||
#define PCRE2_ERROR_OCTAL_BYTE_TOO_BIG 151
|
||||
#define PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE 152
|
||||
#define PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN 153
|
||||
#define PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES 154
|
||||
#define PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE 155
|
||||
#define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE 156
|
||||
#define PCRE2_ERROR_BACKSLASH_G_SYNTAX 157
|
||||
#define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158
|
||||
/* Error 159 is obsolete and should now never occur */
|
||||
#define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED 159
|
||||
#define PCRE2_ERROR_VERB_UNKNOWN 160
|
||||
#define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG 161
|
||||
#define PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED 162
|
||||
#define PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW 163
|
||||
#define PCRE2_ERROR_INVALID_OCTAL 164
|
||||
#define PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH 165
|
||||
#define PCRE2_ERROR_MARK_MISSING_ARGUMENT 166
|
||||
#define PCRE2_ERROR_INVALID_HEXADECIMAL 167
|
||||
#define PCRE2_ERROR_BACKSLASH_C_SYNTAX 168
|
||||
#define PCRE2_ERROR_BACKSLASH_K_SYNTAX 169
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS 170
|
||||
#define PCRE2_ERROR_BACKSLASH_N_IN_CLASS 171
|
||||
#define PCRE2_ERROR_CALLOUT_STRING_TOO_LONG 172
|
||||
#define PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT 173
|
||||
#define PCRE2_ERROR_UTF_IS_DISABLED 174
|
||||
#define PCRE2_ERROR_UCP_IS_DISABLED 175
|
||||
#define PCRE2_ERROR_VERB_NAME_TOO_LONG 176
|
||||
#define PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG 177
|
||||
#define PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS 178
|
||||
#define PCRE2_ERROR_VERSION_CONDITION_SYNTAX 179
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS 180
|
||||
#define PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER 181
|
||||
#define PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER 182
|
||||
#define PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED 183
|
||||
#define PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP 184
|
||||
#define PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED 185
|
||||
#define PCRE2_ERROR_PATTERN_TOO_COMPLICATED 186
|
||||
#define PCRE2_ERROR_LOOKBEHIND_TOO_LONG 187
|
||||
#define PCRE2_ERROR_PATTERN_STRING_TOO_LONG 188
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE 189
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190
|
||||
#define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191
|
||||
#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192
|
||||
#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193
|
||||
#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194
|
||||
#define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195
|
||||
#define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196
|
||||
#define PCRE2_ERROR_TOO_MANY_CAPTURES 197
|
||||
#define PCRE2_ERROR_MISSING_OCTAL_DIGIT 198
|
||||
#define PCRE2_ERROR_BACKSLASH_K_IN_LOOKAROUND 199
|
||||
#define PCRE2_ERROR_MAX_VAR_LOOKBEHIND_EXCEEDED 200
|
||||
#define PCRE2_ERROR_PATTERN_COMPILED_SIZE_TOO_BIG 201
|
||||
#define PCRE2_ERROR_OVERSIZE_PYTHON_OCTAL 202
|
||||
#define PCRE2_ERROR_CALLOUT_CALLER_DISABLED 203
|
||||
#define PCRE2_ERROR_EXTRA_CASING_REQUIRES_UNICODE 204
|
||||
#define PCRE2_ERROR_TURKISH_CASING_REQUIRES_UTF 205
|
||||
#define PCRE2_ERROR_EXTRA_CASING_INCOMPATIBLE 206
|
||||
#define PCRE2_ERROR_ECLASS_NEST_TOO_DEEP 207
|
||||
#define PCRE2_ERROR_ECLASS_INVALID_OPERATOR 208
|
||||
#define PCRE2_ERROR_ECLASS_UNEXPECTED_OPERATOR 209
|
||||
#define PCRE2_ERROR_ECLASS_EXPECTED_OPERAND 210
|
||||
#define PCRE2_ERROR_ECLASS_MIXED_OPERATORS 211
|
||||
#define PCRE2_ERROR_ECLASS_HINT_SQUARE_BRACKET 212
|
||||
#define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_EXPR 213
|
||||
#define PCRE2_ERROR_PERL_ECLASS_EMPTY_EXPR 214
|
||||
#define PCRE2_ERROR_PERL_ECLASS_MISSING_CLOSE 215
|
||||
#define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_CHAR 216
|
||||
|
||||
/* "Expected" matching error codes: no match and partial match. */
|
||||
|
||||
#define PCRE2_ERROR_NOMATCH (-1)
|
||||
#define PCRE2_ERROR_PARTIAL (-2)
|
||||
|
||||
/* Error codes for UTF-8 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF8_ERR1 (-3)
|
||||
#define PCRE2_ERROR_UTF8_ERR2 (-4)
|
||||
#define PCRE2_ERROR_UTF8_ERR3 (-5)
|
||||
#define PCRE2_ERROR_UTF8_ERR4 (-6)
|
||||
#define PCRE2_ERROR_UTF8_ERR5 (-7)
|
||||
#define PCRE2_ERROR_UTF8_ERR6 (-8)
|
||||
#define PCRE2_ERROR_UTF8_ERR7 (-9)
|
||||
#define PCRE2_ERROR_UTF8_ERR8 (-10)
|
||||
#define PCRE2_ERROR_UTF8_ERR9 (-11)
|
||||
#define PCRE2_ERROR_UTF8_ERR10 (-12)
|
||||
#define PCRE2_ERROR_UTF8_ERR11 (-13)
|
||||
#define PCRE2_ERROR_UTF8_ERR12 (-14)
|
||||
#define PCRE2_ERROR_UTF8_ERR13 (-15)
|
||||
#define PCRE2_ERROR_UTF8_ERR14 (-16)
|
||||
#define PCRE2_ERROR_UTF8_ERR15 (-17)
|
||||
#define PCRE2_ERROR_UTF8_ERR16 (-18)
|
||||
#define PCRE2_ERROR_UTF8_ERR17 (-19)
|
||||
#define PCRE2_ERROR_UTF8_ERR18 (-20)
|
||||
#define PCRE2_ERROR_UTF8_ERR19 (-21)
|
||||
#define PCRE2_ERROR_UTF8_ERR20 (-22)
|
||||
#define PCRE2_ERROR_UTF8_ERR21 (-23)
|
||||
|
||||
/* Error codes for UTF-16 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF16_ERR1 (-24)
|
||||
#define PCRE2_ERROR_UTF16_ERR2 (-25)
|
||||
#define PCRE2_ERROR_UTF16_ERR3 (-26)
|
||||
|
||||
/* Error codes for UTF-32 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF32_ERR1 (-27)
|
||||
#define PCRE2_ERROR_UTF32_ERR2 (-28)
|
||||
|
||||
/* Miscellaneous error codes for pcre2[_dfa]_match(), substring extraction
|
||||
functions, context functions, and serializing functions. They are in numerical
|
||||
order. Originally they were in alphabetical order too, but now that PCRE2 is
|
||||
released, the numbers must not be changed. */
|
||||
|
||||
#define PCRE2_ERROR_BADDATA (-29)
|
||||
#define PCRE2_ERROR_MIXEDTABLES (-30) /* Name was changed */
|
||||
#define PCRE2_ERROR_BADMAGIC (-31)
|
||||
#define PCRE2_ERROR_BADMODE (-32)
|
||||
#define PCRE2_ERROR_BADOFFSET (-33)
|
||||
#define PCRE2_ERROR_BADOPTION (-34)
|
||||
#define PCRE2_ERROR_BADREPLACEMENT (-35)
|
||||
#define PCRE2_ERROR_BADUTFOFFSET (-36)
|
||||
#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
|
||||
#define PCRE2_ERROR_DFA_BADRESTART (-38)
|
||||
#define PCRE2_ERROR_DFA_RECURSE (-39)
|
||||
#define PCRE2_ERROR_DFA_UCOND (-40)
|
||||
#define PCRE2_ERROR_DFA_UFUNC (-41)
|
||||
#define PCRE2_ERROR_DFA_UITEM (-42)
|
||||
#define PCRE2_ERROR_DFA_WSSIZE (-43)
|
||||
#define PCRE2_ERROR_INTERNAL (-44)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-45)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-46)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-47)
|
||||
#define PCRE2_ERROR_NOMEMORY (-48)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-49)
|
||||
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-50)
|
||||
#define PCRE2_ERROR_NULL (-51)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-52)
|
||||
#define PCRE2_ERROR_DEPTHLIMIT (-53)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-53) /* Obsolete synonym */
|
||||
#define PCRE2_ERROR_UNAVAILABLE (-54)
|
||||
#define PCRE2_ERROR_UNSET (-55)
|
||||
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
|
||||
#define PCRE2_ERROR_BADREPESCAPE (-57)
|
||||
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
|
||||
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
|
||||
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
|
||||
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
|
||||
#define PCRE2_ERROR_HEAPLIMIT (-63)
|
||||
#define PCRE2_ERROR_CONVERT_SYNTAX (-64)
|
||||
#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65)
|
||||
#define PCRE2_ERROR_DFA_UINVALID_UTF (-66)
|
||||
#define PCRE2_ERROR_INVALIDOFFSET (-67)
|
||||
#define PCRE2_ERROR_JIT_UNSUPPORTED (-68)
|
||||
#define PCRE2_ERROR_REPLACECASE (-69)
|
||||
#define PCRE2_ERROR_TOOLARGEREPLACE (-70)
|
||||
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
#define PCRE2_INFO_ALLOPTIONS 0
|
||||
#define PCRE2_INFO_ARGOPTIONS 1
|
||||
#define PCRE2_INFO_BACKREFMAX 2
|
||||
#define PCRE2_INFO_BSR 3
|
||||
#define PCRE2_INFO_CAPTURECOUNT 4
|
||||
#define PCRE2_INFO_FIRSTCODEUNIT 5
|
||||
#define PCRE2_INFO_FIRSTCODETYPE 6
|
||||
#define PCRE2_INFO_FIRSTBITMAP 7
|
||||
#define PCRE2_INFO_HASCRORLF 8
|
||||
#define PCRE2_INFO_JCHANGED 9
|
||||
#define PCRE2_INFO_JITSIZE 10
|
||||
#define PCRE2_INFO_LASTCODEUNIT 11
|
||||
#define PCRE2_INFO_LASTCODETYPE 12
|
||||
#define PCRE2_INFO_MATCHEMPTY 13
|
||||
#define PCRE2_INFO_MATCHLIMIT 14
|
||||
#define PCRE2_INFO_MAXLOOKBEHIND 15
|
||||
#define PCRE2_INFO_MINLENGTH 16
|
||||
#define PCRE2_INFO_NAMECOUNT 17
|
||||
#define PCRE2_INFO_NAMEENTRYSIZE 18
|
||||
#define PCRE2_INFO_NAMETABLE 19
|
||||
#define PCRE2_INFO_NEWLINE 20
|
||||
#define PCRE2_INFO_DEPTHLIMIT 21
|
||||
#define PCRE2_INFO_RECURSIONLIMIT 21 /* Obsolete synonym */
|
||||
#define PCRE2_INFO_SIZE 22
|
||||
#define PCRE2_INFO_HASBACKSLASHC 23
|
||||
#define PCRE2_INFO_FRAMESIZE 24
|
||||
#define PCRE2_INFO_HEAPLIMIT 25
|
||||
#define PCRE2_INFO_EXTRAOPTIONS 26
|
||||
|
||||
/* Request types for pcre2_config(). */
|
||||
|
||||
#define PCRE2_CONFIG_BSR 0
|
||||
#define PCRE2_CONFIG_JIT 1
|
||||
#define PCRE2_CONFIG_JITTARGET 2
|
||||
#define PCRE2_CONFIG_LINKSIZE 3
|
||||
#define PCRE2_CONFIG_MATCHLIMIT 4
|
||||
#define PCRE2_CONFIG_NEWLINE 5
|
||||
#define PCRE2_CONFIG_PARENSLIMIT 6
|
||||
#define PCRE2_CONFIG_DEPTHLIMIT 7
|
||||
#define PCRE2_CONFIG_RECURSIONLIMIT 7 /* Obsolete synonym */
|
||||
#define PCRE2_CONFIG_STACKRECURSE 8 /* Obsolete */
|
||||
#define PCRE2_CONFIG_UNICODE 9
|
||||
#define PCRE2_CONFIG_UNICODE_VERSION 10
|
||||
#define PCRE2_CONFIG_VERSION 11
|
||||
#define PCRE2_CONFIG_HEAPLIMIT 12
|
||||
#define PCRE2_CONFIG_NEVER_BACKSLASH_C 13
|
||||
#define PCRE2_CONFIG_COMPILED_WIDTHS 14
|
||||
#define PCRE2_CONFIG_TABLES_LENGTH 15
|
||||
|
||||
/* Optimization directives for pcre2_set_optimize().
|
||||
For binary compatibility, only add to this list; do not renumber. */
|
||||
|
||||
#define PCRE2_OPTIMIZATION_NONE 0
|
||||
#define PCRE2_OPTIMIZATION_FULL 1
|
||||
|
||||
#define PCRE2_AUTO_POSSESS 64
|
||||
#define PCRE2_AUTO_POSSESS_OFF 65
|
||||
#define PCRE2_DOTSTAR_ANCHOR 66
|
||||
#define PCRE2_DOTSTAR_ANCHOR_OFF 67
|
||||
#define PCRE2_START_OPTIMIZE 68
|
||||
#define PCRE2_START_OPTIMIZE_OFF 69
|
||||
|
||||
/* Types used in pcre2_set_substitute_case_callout().
|
||||
|
||||
PCRE2_SUBSTITUTE_CASE_LOWER and PCRE2_SUBSTITUTE_CASE_UPPER are passed to the
|
||||
callout to indicate that the case of the entire callout input should be
|
||||
case-transformed. PCRE2_SUBSTITUTE_CASE_TITLE_FIRST is passed to indicate that
|
||||
only the first character or glyph should be transformed to Unicode titlecase,
|
||||
and the rest to lowercase. */
|
||||
|
||||
#define PCRE2_SUBSTITUTE_CASE_LOWER 1
|
||||
#define PCRE2_SUBSTITUTE_CASE_UPPER 2
|
||||
#define PCRE2_SUBSTITUTE_CASE_TITLE_FIRST 3
|
||||
|
||||
/* Types for code units in patterns and subject strings. */
|
||||
|
||||
typedef uint8_t PCRE2_UCHAR8;
|
||||
typedef uint16_t PCRE2_UCHAR16;
|
||||
typedef uint32_t PCRE2_UCHAR32;
|
||||
|
||||
typedef const PCRE2_UCHAR8 *PCRE2_SPTR8;
|
||||
typedef const PCRE2_UCHAR16 *PCRE2_SPTR16;
|
||||
typedef const PCRE2_UCHAR32 *PCRE2_SPTR32;
|
||||
|
||||
/* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE2,
|
||||
including pattern offsets for errors and subject offsets after a match. We
|
||||
define special values to indicate zero-terminated strings and unset offsets in
|
||||
the offset vector (ovector). */
|
||||
|
||||
#define PCRE2_SIZE size_t
|
||||
#define PCRE2_SIZE_MAX SIZE_MAX
|
||||
#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0)
|
||||
#define PCRE2_UNSET (~(PCRE2_SIZE)0)
|
||||
|
||||
/* Generic types for opaque structures and JIT callback functions. These
|
||||
declarations are defined in a macro that is expanded for each width later. */
|
||||
|
||||
#define PCRE2_TYPES_LIST \
|
||||
struct pcre2_real_general_context; \
|
||||
typedef struct pcre2_real_general_context pcre2_general_context; \
|
||||
\
|
||||
struct pcre2_real_compile_context; \
|
||||
typedef struct pcre2_real_compile_context pcre2_compile_context; \
|
||||
\
|
||||
struct pcre2_real_match_context; \
|
||||
typedef struct pcre2_real_match_context pcre2_match_context; \
|
||||
\
|
||||
struct pcre2_real_convert_context; \
|
||||
typedef struct pcre2_real_convert_context pcre2_convert_context; \
|
||||
\
|
||||
struct pcre2_real_code; \
|
||||
typedef struct pcre2_real_code pcre2_code; \
|
||||
\
|
||||
struct pcre2_real_match_data; \
|
||||
typedef struct pcre2_real_match_data pcre2_match_data; \
|
||||
\
|
||||
struct pcre2_real_jit_stack; \
|
||||
typedef struct pcre2_real_jit_stack pcre2_jit_stack; \
|
||||
\
|
||||
typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *);
|
||||
|
||||
|
||||
/* The structures for passing out data via callout functions. We use structures
|
||||
so that new fields can be added on the end in future versions, without changing
|
||||
the API of the function, thereby allowing old clients to work without
|
||||
modification. Define the generic versions in a macro; the width-specific
|
||||
versions are generated from this macro below. */
|
||||
|
||||
/* Flags for the callout_flags field. These are cleared after a callout. */
|
||||
|
||||
#define PCRE2_CALLOUT_STARTMATCH 0x00000001u /* Set for each bumpalong */
|
||||
#define PCRE2_CALLOUT_BACKTRACK 0x00000002u /* Set after a backtrack */
|
||||
|
||||
#define PCRE2_STRUCTURE_LIST \
|
||||
typedef struct pcre2_callout_block { \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||
uint32_t capture_top; /* Max current capture */ \
|
||||
uint32_t capture_last; /* Most recently closed capture */ \
|
||||
PCRE2_SIZE *offset_vector; /* The offset vector */ \
|
||||
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
|
||||
PCRE2_SPTR subject; /* The subject being matched */ \
|
||||
PCRE2_SIZE subject_length; /* The length of the subject */ \
|
||||
PCRE2_SIZE start_match; /* Offset to start of this match attempt */ \
|
||||
PCRE2_SIZE current_position; /* Where we currently are in the subject */ \
|
||||
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||
/* ------------------- Added for Version 1 -------------------------- */ \
|
||||
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
|
||||
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||
/* ------------------- Added for Version 2 -------------------------- */ \
|
||||
uint32_t callout_flags; /* See above for list */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_callout_block; \
|
||||
\
|
||||
typedef struct pcre2_callout_enumerate_block { \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
|
||||
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_callout_enumerate_block; \
|
||||
\
|
||||
typedef struct pcre2_substitute_callout_block { \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
PCRE2_SPTR input; /* Pointer to input subject string */ \
|
||||
PCRE2_SPTR output; /* Pointer to output buffer */ \
|
||||
PCRE2_SIZE output_offsets[2]; /* Changed portion of the output */ \
|
||||
PCRE2_SIZE *ovector; /* Pointer to current ovector */ \
|
||||
uint32_t oveccount; /* Count of pairs set in ovector */ \
|
||||
uint32_t subscount; /* Substitution number */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_substitute_callout_block;
|
||||
|
||||
|
||||
/* List the generic forms of all other functions in macros, which will be
|
||||
expanded for each width below. Start with functions that give general
|
||||
information. */
|
||||
|
||||
#define PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_config(uint32_t, void *);
|
||||
|
||||
|
||||
/* Functions for manipulating contexts. */
|
||||
|
||||
#define PCRE2_GENERAL_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_general_context_copy(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_general_context_create(void *(*)(size_t, void *), \
|
||||
void (*)(void *, void *), void *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_general_context_free(pcre2_general_context *);
|
||||
|
||||
#define PCRE2_COMPILE_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_compile_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_compile_context_copy(pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL pcre2_compile_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_compile_context_create(pcre2_general_context *);\
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_compile_context_free(pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_character_tables(pcre2_compile_context *, const uint8_t *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_max_pattern_compiled_length(pcre2_compile_context *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_max_varlookbehind(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_newline(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_compile_recursion_guard(pcre2_compile_context *, \
|
||||
int (*)(uint32_t, void *), void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_optimize(pcre2_compile_context *, uint32_t);
|
||||
|
||||
#define PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_context_copy(pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_context_create(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_context_free(pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_callout(pcre2_match_context *, \
|
||||
int (*)(pcre2_callout_block *, void *), void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_substitute_callout(pcre2_match_context *, \
|
||||
int (*)(pcre2_substitute_callout_block *, void *), void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_substitute_case_callout(pcre2_match_context *, \
|
||||
PCRE2_SIZE (*)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE, int, \
|
||||
void *), \
|
||||
void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_offset_limit(pcre2_match_context *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_recursion_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_recursion_memory_management(pcre2_match_context *, \
|
||||
void *(*)(size_t, void *), void (*)(void *, void *), void *);
|
||||
|
||||
#define PCRE2_CONVERT_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_convert_context_copy(pcre2_convert_context *); \
|
||||
PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_convert_context_create(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_convert_context_free(pcre2_convert_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_glob_separator(pcre2_convert_context *, uint32_t);
|
||||
|
||||
|
||||
/* Functions concerned with compiling a pattern to PCRE internal code. */
|
||||
|
||||
#define PCRE2_COMPILE_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
|
||||
pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \
|
||||
pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_code_free(pcre2_code *); \
|
||||
PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
|
||||
pcre2_code_copy(const pcre2_code *); \
|
||||
PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
|
||||
pcre2_code_copy_with_tables(const pcre2_code *);
|
||||
|
||||
|
||||
/* Functions that give information about a compiled pattern. */
|
||||
|
||||
#define PCRE2_PATTERN_INFO_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_pattern_info(const pcre2_code *, uint32_t, void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_callout_enumerate(const pcre2_code *, \
|
||||
int (*)(pcre2_callout_enumerate_block *, void *), void *);
|
||||
|
||||
|
||||
/* Functions for running a match and inspecting the result. */
|
||||
|
||||
#define PCRE2_MATCH_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_match_data *PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_data_create(uint32_t, pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL pcre2_match_data *PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_data_create_from_pattern(const pcre2_code *, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *, int *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_data_free(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_mark(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_match_data_size(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_match_data_heapframes_size(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_ovector_count(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE *PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_ovector_pointer(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_startchar(pcre2_match_data *);
|
||||
|
||||
|
||||
/* Convenience functions for handling matched substrings. */
|
||||
|
||||
#define PCRE2_SUBSTRING_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_copy_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR *, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_copy_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR *, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_free(PCRE2_UCHAR *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_get_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR **, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_get_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR **, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_length_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_length_bynumber(pcre2_match_data *, uint32_t, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_nametable_scan(const pcre2_code *, PCRE2_SPTR, PCRE2_SPTR *, \
|
||||
PCRE2_SPTR *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_number_from_name(const pcre2_code *, PCRE2_SPTR); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_list_free(PCRE2_UCHAR **); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **);
|
||||
|
||||
|
||||
/* Functions for serializing / deserializing compiled patterns. */
|
||||
|
||||
#define PCRE2_SERIALIZE_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_encode(const pcre2_code **, int32_t, uint8_t **, \
|
||||
PCRE2_SIZE *, pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_decode(pcre2_code **, int32_t, const uint8_t *, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_get_number_of_codes(const uint8_t *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_free(uint8_t *);
|
||||
|
||||
|
||||
/* Convenience function for match + substitute. */
|
||||
|
||||
#define PCRE2_SUBSTITUTE_FUNCTION \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substitute(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *, PCRE2_SPTR, \
|
||||
PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *);
|
||||
|
||||
|
||||
/* Functions for converting pattern source strings. */
|
||||
|
||||
#define PCRE2_CONVERT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_pattern_convert(PCRE2_SPTR, PCRE2_SIZE, uint32_t, PCRE2_UCHAR **, \
|
||||
PCRE2_SIZE *, pcre2_convert_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_converted_pattern_free(PCRE2_UCHAR *);
|
||||
|
||||
|
||||
/* Functions for JIT processing */
|
||||
|
||||
#define PCRE2_JIT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_compile(pcre2_code *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_free_unused_memory(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL pcre2_jit_stack *PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_stack_create(size_t, size_t, pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_stack_free(pcre2_jit_stack *);
|
||||
|
||||
|
||||
/* Other miscellaneous functions. */
|
||||
|
||||
#define PCRE2_OTHER_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL const uint8_t *PCRE2_CALL_CONVENTION \
|
||||
pcre2_maketables(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_maketables_free(pcre2_general_context *, const uint8_t *);
|
||||
|
||||
/* Define macros that generate width-specific names from generic versions. The
|
||||
three-level macro scheme is necessary to get the macros expanded when we want
|
||||
them to be. First we get the width from PCRE2_LOCAL_WIDTH, which is used for
|
||||
generating three versions of everything below. After that, PCRE2_SUFFIX will be
|
||||
re-defined to use PCRE2_CODE_UNIT_WIDTH, for use when macros such as
|
||||
pcre2_compile are called by application code. */
|
||||
|
||||
#define PCRE2_JOIN(a,b) a ## b
|
||||
#define PCRE2_GLUE(a,b) PCRE2_JOIN(a,b)
|
||||
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a,PCRE2_LOCAL_WIDTH)
|
||||
|
||||
|
||||
/* Data types */
|
||||
|
||||
#define PCRE2_UCHAR PCRE2_SUFFIX(PCRE2_UCHAR)
|
||||
#define PCRE2_SPTR PCRE2_SUFFIX(PCRE2_SPTR)
|
||||
|
||||
#define pcre2_code PCRE2_SUFFIX(pcre2_code_)
|
||||
#define pcre2_jit_callback PCRE2_SUFFIX(pcre2_jit_callback_)
|
||||
#define pcre2_jit_stack PCRE2_SUFFIX(pcre2_jit_stack_)
|
||||
|
||||
#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_)
|
||||
#define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_)
|
||||
#define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_)
|
||||
#define pcre2_real_convert_context PCRE2_SUFFIX(pcre2_real_convert_context_)
|
||||
#define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_)
|
||||
#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_)
|
||||
#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_)
|
||||
|
||||
|
||||
/* Data blocks */
|
||||
|
||||
#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_)
|
||||
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
|
||||
#define pcre2_substitute_callout_block PCRE2_SUFFIX(pcre2_substitute_callout_block_)
|
||||
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
|
||||
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
|
||||
#define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_)
|
||||
#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_)
|
||||
#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_)
|
||||
|
||||
|
||||
/* Functions: the complete list in alphabetical order */
|
||||
|
||||
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
||||
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
|
||||
#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_)
|
||||
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
||||
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
||||
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
||||
#define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_)
|
||||
#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_)
|
||||
#define pcre2_config PCRE2_SUFFIX(pcre2_config_)
|
||||
#define pcre2_convert_context_copy PCRE2_SUFFIX(pcre2_convert_context_copy_)
|
||||
#define pcre2_convert_context_create PCRE2_SUFFIX(pcre2_convert_context_create_)
|
||||
#define pcre2_convert_context_free PCRE2_SUFFIX(pcre2_convert_context_free_)
|
||||
#define pcre2_converted_pattern_free PCRE2_SUFFIX(pcre2_converted_pattern_free_)
|
||||
#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_)
|
||||
#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_)
|
||||
#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_)
|
||||
#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_)
|
||||
#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_)
|
||||
#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_)
|
||||
#define pcre2_get_match_data_heapframes_size PCRE2_SUFFIX(pcre2_get_match_data_heapframes_size_)
|
||||
#define pcre2_get_match_data_size PCRE2_SUFFIX(pcre2_get_match_data_size_)
|
||||
#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
|
||||
#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_)
|
||||
#define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_)
|
||||
#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_)
|
||||
#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_)
|
||||
#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_)
|
||||
#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_)
|
||||
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
|
||||
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
|
||||
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
|
||||
#define pcre2_maketables_free PCRE2_SUFFIX(pcre2_maketables_free_)
|
||||
#define pcre2_match PCRE2_SUFFIX(pcre2_match_)
|
||||
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
|
||||
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)
|
||||
#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_)
|
||||
#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_)
|
||||
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
|
||||
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
|
||||
#define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_)
|
||||
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
|
||||
#define pcre2_serialize_decode PCRE2_SUFFIX(pcre2_serialize_decode_)
|
||||
#define pcre2_serialize_encode PCRE2_SUFFIX(pcre2_serialize_encode_)
|
||||
#define pcre2_serialize_free PCRE2_SUFFIX(pcre2_serialize_free_)
|
||||
#define pcre2_serialize_get_number_of_codes PCRE2_SUFFIX(pcre2_serialize_get_number_of_codes_)
|
||||
#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_)
|
||||
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
|
||||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||
#define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_)
|
||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
|
||||
#define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_)
|
||||
#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
|
||||
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
|
||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||
#define pcre2_set_max_varlookbehind PCRE2_SUFFIX(pcre2_set_max_varlookbehind_)
|
||||
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||
#define pcre2_set_max_pattern_compiled_length PCRE2_SUFFIX(pcre2_set_max_pattern_compiled_length_)
|
||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
|
||||
#define pcre2_set_optimize PCRE2_SUFFIX(pcre2_set_optimize_)
|
||||
#define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_)
|
||||
#define pcre2_set_substitute_case_callout PCRE2_SUFFIX(pcre2_set_substitute_case_callout_)
|
||||
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
|
||||
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
|
||||
#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)
|
||||
#define pcre2_substring_free PCRE2_SUFFIX(pcre2_substring_free_)
|
||||
#define pcre2_substring_get_byname PCRE2_SUFFIX(pcre2_substring_get_byname_)
|
||||
#define pcre2_substring_get_bynumber PCRE2_SUFFIX(pcre2_substring_get_bynumber_)
|
||||
#define pcre2_substring_length_byname PCRE2_SUFFIX(pcre2_substring_length_byname_)
|
||||
#define pcre2_substring_length_bynumber PCRE2_SUFFIX(pcre2_substring_length_bynumber_)
|
||||
#define pcre2_substring_list_get PCRE2_SUFFIX(pcre2_substring_list_get_)
|
||||
#define pcre2_substring_list_free PCRE2_SUFFIX(pcre2_substring_list_free_)
|
||||
#define pcre2_substring_nametable_scan PCRE2_SUFFIX(pcre2_substring_nametable_scan_)
|
||||
#define pcre2_substring_number_from_name PCRE2_SUFFIX(pcre2_substring_number_from_name_)
|
||||
|
||||
/* Keep this old function name for backwards compatibility */
|
||||
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
|
||||
|
||||
/* Keep this obsolete function for backwards compatibility: it is now a noop. */
|
||||
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
|
||||
|
||||
/* Now generate all three sets of width-specific structures and function
|
||||
prototypes. */
|
||||
|
||||
#define PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS \
|
||||
PCRE2_TYPES_LIST \
|
||||
PCRE2_STRUCTURE_LIST \
|
||||
PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||
PCRE2_GENERAL_CONTEXT_FUNCTIONS \
|
||||
PCRE2_COMPILE_CONTEXT_FUNCTIONS \
|
||||
PCRE2_CONVERT_CONTEXT_FUNCTIONS \
|
||||
PCRE2_CONVERT_FUNCTIONS \
|
||||
PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
||||
PCRE2_COMPILE_FUNCTIONS \
|
||||
PCRE2_PATTERN_INFO_FUNCTIONS \
|
||||
PCRE2_MATCH_FUNCTIONS \
|
||||
PCRE2_SUBSTRING_FUNCTIONS \
|
||||
PCRE2_SERIALIZE_FUNCTIONS \
|
||||
PCRE2_SUBSTITUTE_FUNCTION \
|
||||
PCRE2_JIT_FUNCTIONS \
|
||||
PCRE2_OTHER_FUNCTIONS
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 8
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 16
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 32
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
/* Undefine the list macros; they are no longer needed. */
|
||||
|
||||
#undef PCRE2_TYPES_LIST
|
||||
#undef PCRE2_STRUCTURE_LIST
|
||||
#undef PCRE2_GENERAL_INFO_FUNCTIONS
|
||||
#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_CONVERT_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_MATCH_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_COMPILE_FUNCTIONS
|
||||
#undef PCRE2_PATTERN_INFO_FUNCTIONS
|
||||
#undef PCRE2_MATCH_FUNCTIONS
|
||||
#undef PCRE2_SUBSTRING_FUNCTIONS
|
||||
#undef PCRE2_SERIALIZE_FUNCTIONS
|
||||
#undef PCRE2_SUBSTITUTE_FUNCTION
|
||||
#undef PCRE2_JIT_FUNCTIONS
|
||||
#undef PCRE2_OTHER_FUNCTIONS
|
||||
#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
|
||||
/* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine
|
||||
PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make
|
||||
PCRE2_SUFFIX a no-op. Otherwise, generate an error. */
|
||||
|
||||
#undef PCRE2_SUFFIX
|
||||
#ifndef PCRE2_CODE_UNIT_WIDTH
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h.
|
||||
#error Use 8, 16, or 32; or 0 for a multi-width application.
|
||||
#else /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8 || \
|
||||
PCRE2_CODE_UNIT_WIDTH == 16 || \
|
||||
PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH)
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 0
|
||||
#undef PCRE2_JOIN
|
||||
#undef PCRE2_GLUE
|
||||
#define PCRE2_SUFFIX(a) a
|
||||
#else
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32.
|
||||
#endif
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* PCRE2_H_IDEMPOTENT_GUARD */
|
||||
|
||||
/* End of pcre2.h */
|
||||
@@ -1,1412 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains functions that scan a compiled pattern and change
|
||||
repeats into possessive repeats where possible. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
/* This macro represents the max size of list[] and that is used to keep
|
||||
track of UCD info in several places, it should be kept on sync with the
|
||||
value used by GenerateUcd.py */
|
||||
#define MAX_LIST 8
|
||||
|
||||
/*************************************************
|
||||
* Tables for auto-possessification *
|
||||
*************************************************/
|
||||
|
||||
/* This table is used to check whether auto-possessification is possible
|
||||
between adjacent character-type opcodes. The left-hand (repeated) opcode is
|
||||
used to select the row, and the right-hand opcode is use to select the column.
|
||||
A value of 1 means that auto-possessification is OK. For example, the second
|
||||
value in the first row means that \D+\d can be turned into \D++\d.
|
||||
|
||||
The Unicode property types (\P and \p) have to be present to fill out the table
|
||||
because of what their opcode values are, but the table values should always be
|
||||
zero because property types are handled separately in the code. The last four
|
||||
columns apply to items that cannot be repeated, so there is no need to have
|
||||
rows for them. Note that OP_DIGIT etc. are generated only when PCRE2_UCP is
|
||||
*not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
|
||||
|
||||
#define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1)
|
||||
#define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1)
|
||||
|
||||
static const uint8_t autoposstab[APTROWS][APTCOLS] = {
|
||||
/* \D \d \S \s \W \w . .+ \C \P \p \R \H \h \V \v \X \Z \z $ $M */
|
||||
{ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \D */
|
||||
{ 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \d */
|
||||
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \S */
|
||||
{ 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \s */
|
||||
{ 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \W */
|
||||
{ 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \w */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* . */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* .+ */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \C */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \P */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \p */
|
||||
{ 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \R */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \H */
|
||||
{ 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \h */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \V */
|
||||
{ 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 }, /* \v */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 } /* \X */
|
||||
};
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/* This table is used to check whether auto-possessification is possible
|
||||
between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The
|
||||
left-hand (repeated) opcode is used to select the row, and the right-hand
|
||||
opcode is used to select the column. The values are as follows:
|
||||
|
||||
0 Always return FALSE (never auto-possessify)
|
||||
1 Character groups are distinct (possessify if both are OP_PROP)
|
||||
2 Check character categories in the same group (general or particular)
|
||||
3 TRUE if the two opcodes are not the same (PROP vs NOTPROP)
|
||||
|
||||
4 Check left general category vs right particular category
|
||||
5 Check right general category vs left particular category
|
||||
|
||||
6 Left alphanum vs right general category
|
||||
7 Left space vs right general category
|
||||
8 Left word vs right general category
|
||||
|
||||
9 Right alphanum vs left general category
|
||||
10 Right space vs left general category
|
||||
11 Right word vs left general category
|
||||
|
||||
12 Left alphanum vs right particular category
|
||||
13 Left space vs right particular category
|
||||
14 Left word vs right particular category
|
||||
|
||||
15 Right alphanum vs left particular category
|
||||
16 Right space vs left particular category
|
||||
17 Right word vs left particular category
|
||||
*/
|
||||
|
||||
static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
|
||||
/* LAMP GC PC SC SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */
|
||||
{ 3, 0, 0, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_LAMP */
|
||||
{ 0, 2, 4, 0, 0, 9, 10, 10, 11, 0, 0, 0, 0 }, /* PT_GC */
|
||||
{ 0, 5, 2, 0, 0, 15, 16, 16, 17, 0, 0, 0, 0 }, /* PT_PC */
|
||||
{ 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SC */
|
||||
{ 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SCX */
|
||||
{ 3, 6, 12, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_ALNUM */
|
||||
{ 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_SPACE */
|
||||
{ 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_PXSPACE */
|
||||
{ 0, 8, 14, 0, 0, 0, 1, 1, 3, 0, 0, 0, 0 }, /* PT_WORD */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0 }, /* PT_UCNC */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_BIDICL */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* PT_BOOL */
|
||||
/* PT_ANY does not need a record. */
|
||||
};
|
||||
|
||||
/* This table is used to check whether auto-possessification is possible
|
||||
between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP) when one
|
||||
specifies a general category and the other specifies a particular category. The
|
||||
row is selected by the general category and the column by the particular
|
||||
category. The value is 1 if the particular category is not part of the general
|
||||
category. */
|
||||
|
||||
static const uint8_t catposstab[7][30] = {
|
||||
/* Cc Cf Cn Co Cs Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc Pd Pe Pf Pi Po Ps Sc Sk Sm So Zl Zp Zs */
|
||||
{ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* C */
|
||||
{ 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* L */
|
||||
{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* M */
|
||||
{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* N */
|
||||
{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 }, /* P */
|
||||
{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1 }, /* S */
|
||||
{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 } /* Z */
|
||||
};
|
||||
|
||||
/* This table is used when checking ALNUM, (PX)SPACE, SPACE, and WORD against
|
||||
a general or particular category. The properties in each row are those
|
||||
that apply to the character set in question. Duplication means that a little
|
||||
unnecessary work is done when checking, but this keeps things much simpler
|
||||
because they can all use the same code. For more details see the comment where
|
||||
this table is used.
|
||||
|
||||
Note: SPACE and PXSPACE used to be different because Perl excluded VT from
|
||||
"space", but from Perl 5.18 it's included, so both categories are treated the
|
||||
same here. */
|
||||
|
||||
static const uint8_t posspropstab[3][4] = {
|
||||
{ ucp_L, ucp_N, ucp_N, ucp_Nl }, /* ALNUM, 3rd and 4th values redundant */
|
||||
{ ucp_Z, ucp_Z, ucp_C, ucp_Cc }, /* SPACE and PXSPACE, 2nd value redundant */
|
||||
{ ucp_L, ucp_N, ucp_P, ucp_Po } /* WORD */
|
||||
};
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/*************************************************
|
||||
* Check a character and a property *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called by compare_opcodes() when a property item is
|
||||
adjacent to a fixed character.
|
||||
|
||||
Arguments:
|
||||
c the character
|
||||
ptype the property type
|
||||
pdata the data for the type
|
||||
negated TRUE if it's a negated property (\P or \p{^)
|
||||
|
||||
Returns: TRUE if auto-possessifying is OK
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
|
||||
BOOL negated)
|
||||
{
|
||||
BOOL ok, rc;
|
||||
const uint32_t *p;
|
||||
const ucd_record *prop = GET_UCD(c);
|
||||
|
||||
switch(ptype)
|
||||
{
|
||||
case PT_LAMP:
|
||||
return (prop->chartype == ucp_Lu ||
|
||||
prop->chartype == ucp_Ll ||
|
||||
prop->chartype == ucp_Lt) == negated;
|
||||
|
||||
case PT_GC:
|
||||
return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
|
||||
|
||||
case PT_PC:
|
||||
return (pdata == prop->chartype) == negated;
|
||||
|
||||
case PT_SC:
|
||||
return (pdata == prop->script) == negated;
|
||||
|
||||
case PT_SCX:
|
||||
ok = (pdata == prop->script
|
||||
|| MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0);
|
||||
return ok == negated;
|
||||
|
||||
/* These are specials */
|
||||
|
||||
case PT_ALNUM:
|
||||
return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
|
||||
|
||||
/* Perl space used to exclude VT, but from Perl 5.18 it is included, which
|
||||
means that Perl space and POSIX space are now identical. PCRE was changed
|
||||
at release 8.34. */
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
rc = negated;
|
||||
break;
|
||||
|
||||
default:
|
||||
rc = (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
|
||||
}
|
||||
return rc;
|
||||
|
||||
case PT_WORD:
|
||||
return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
|
||||
c == CHAR_UNDERSCORE) == negated;
|
||||
|
||||
case PT_CLIST:
|
||||
p = PRIV(ucd_caseless_sets) + prop->caseset;
|
||||
for (;;)
|
||||
{
|
||||
if (c < *p) return !negated;
|
||||
if (c == *p++) return negated;
|
||||
}
|
||||
PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
|
||||
break;
|
||||
|
||||
/* Haven't yet thought these through. */
|
||||
|
||||
case PT_BIDICL:
|
||||
return FALSE;
|
||||
|
||||
case PT_BOOL:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Base opcode of repeated opcodes *
|
||||
*************************************************/
|
||||
|
||||
/* Returns the base opcode for repeated single character type opcodes. If the
|
||||
opcode is not a repeated character type, it returns with the original value.
|
||||
|
||||
Arguments: c opcode
|
||||
Returns: base opcode for the type
|
||||
*/
|
||||
|
||||
static PCRE2_UCHAR
|
||||
get_repeat_base(PCRE2_UCHAR c)
|
||||
{
|
||||
return (c > OP_TYPEPOSUPTO)? c :
|
||||
(c >= OP_TYPESTAR)? OP_TYPESTAR :
|
||||
(c >= OP_NOTSTARI)? OP_NOTSTARI :
|
||||
(c >= OP_NOTSTAR)? OP_NOTSTAR :
|
||||
(c >= OP_STARI)? OP_STARI :
|
||||
OP_STAR;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Fill the character property list *
|
||||
*************************************************/
|
||||
|
||||
/* Checks whether the code points to an opcode that can take part in auto-
|
||||
possessification, and if so, fills a list with its properties.
|
||||
|
||||
Arguments:
|
||||
code points to start of expression
|
||||
utf TRUE if in UTF mode
|
||||
ucp TRUE if in UCP mode
|
||||
fcc points to the case-flipping table
|
||||
list points to output list
|
||||
list[0] will be filled with the opcode
|
||||
list[1] will be non-zero if this opcode
|
||||
can match an empty character string
|
||||
list[2..7] depends on the opcode
|
||||
|
||||
Returns: points to the start of the next opcode if *code is accepted
|
||||
NULL if *code is not accepted
|
||||
*/
|
||||
|
||||
static PCRE2_SPTR
|
||||
get_chr_property_list(PCRE2_SPTR code, BOOL utf, BOOL ucp, const uint8_t *fcc,
|
||||
uint32_t *list)
|
||||
{
|
||||
PCRE2_UCHAR c = *code;
|
||||
PCRE2_UCHAR base;
|
||||
PCRE2_SPTR end;
|
||||
PCRE2_SPTR class_end;
|
||||
uint32_t chr;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
uint32_t *clist_dest;
|
||||
const uint32_t *clist_src;
|
||||
#else
|
||||
(void)utf; /* Suppress "unused parameter" compiler warnings */
|
||||
(void)ucp;
|
||||
#endif
|
||||
|
||||
list[0] = c;
|
||||
list[1] = FALSE;
|
||||
code++;
|
||||
|
||||
if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
|
||||
{
|
||||
base = get_repeat_base(c);
|
||||
c -= (base - OP_STAR);
|
||||
|
||||
if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
|
||||
code += IMM2_SIZE;
|
||||
|
||||
list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
|
||||
c != OP_POSPLUS);
|
||||
|
||||
switch(base)
|
||||
{
|
||||
case OP_STAR:
|
||||
list[0] = OP_CHAR;
|
||||
break;
|
||||
|
||||
case OP_STARI:
|
||||
list[0] = OP_CHARI;
|
||||
break;
|
||||
|
||||
case OP_NOTSTAR:
|
||||
list[0] = OP_NOT;
|
||||
break;
|
||||
|
||||
case OP_NOTSTARI:
|
||||
list[0] = OP_NOTI;
|
||||
break;
|
||||
|
||||
case OP_TYPESTAR:
|
||||
list[0] = *code;
|
||||
code++;
|
||||
break;
|
||||
}
|
||||
c = list[0];
|
||||
}
|
||||
|
||||
switch(c)
|
||||
{
|
||||
case OP_NOT_DIGIT:
|
||||
case OP_DIGIT:
|
||||
case OP_NOT_WHITESPACE:
|
||||
case OP_WHITESPACE:
|
||||
case OP_NOT_WORDCHAR:
|
||||
case OP_WORDCHAR:
|
||||
case OP_ANY:
|
||||
case OP_ALLANY:
|
||||
case OP_ANYNL:
|
||||
case OP_NOT_HSPACE:
|
||||
case OP_HSPACE:
|
||||
case OP_NOT_VSPACE:
|
||||
case OP_VSPACE:
|
||||
case OP_EXTUNI:
|
||||
case OP_EODN:
|
||||
case OP_EOD:
|
||||
case OP_DOLL:
|
||||
case OP_DOLLM:
|
||||
return code;
|
||||
|
||||
case OP_CHAR:
|
||||
case OP_NOT:
|
||||
GETCHARINCTEST(chr, code);
|
||||
list[2] = chr;
|
||||
list[3] = NOTACHAR;
|
||||
return code;
|
||||
|
||||
case OP_CHARI:
|
||||
case OP_NOTI:
|
||||
list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
|
||||
GETCHARINCTEST(chr, code);
|
||||
list[2] = chr;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (chr < 128 || (chr < 256 && !utf && !ucp))
|
||||
list[3] = fcc[chr];
|
||||
else
|
||||
list[3] = UCD_OTHERCASE(chr);
|
||||
#elif defined SUPPORT_WIDE_CHARS
|
||||
list[3] = (chr < 256) ? fcc[chr] : chr;
|
||||
#else
|
||||
list[3] = fcc[chr];
|
||||
#endif
|
||||
|
||||
/* The othercase might be the same value. */
|
||||
|
||||
if (chr == list[3])
|
||||
list[3] = NOTACHAR;
|
||||
else
|
||||
list[4] = NOTACHAR;
|
||||
return code;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case OP_PROP:
|
||||
case OP_NOTPROP:
|
||||
if (code[0] != PT_CLIST)
|
||||
{
|
||||
list[2] = code[0];
|
||||
list[3] = code[1];
|
||||
return code + 2;
|
||||
}
|
||||
|
||||
/* Convert only if we have enough space. */
|
||||
|
||||
clist_src = PRIV(ucd_caseless_sets) + code[1];
|
||||
clist_dest = list + 2;
|
||||
code += 2;
|
||||
|
||||
do {
|
||||
if (clist_dest >= list + MAX_LIST)
|
||||
{
|
||||
/* Early return if there is not enough space. GenerateUcd.py
|
||||
generated a list with more than 5 characters and something
|
||||
must be done about that going forward. */
|
||||
PCRE2_DEBUG_UNREACHABLE(); /* Remove if it ever triggers */
|
||||
list[2] = code[0];
|
||||
list[3] = code[1];
|
||||
return code;
|
||||
}
|
||||
*clist_dest++ = *clist_src;
|
||||
}
|
||||
while(*clist_src++ != NOTACHAR);
|
||||
|
||||
/* All characters are stored. The terminating NOTACHAR is copied from the
|
||||
clist itself. */
|
||||
|
||||
list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
|
||||
return code;
|
||||
#endif
|
||||
|
||||
case OP_NCLASS:
|
||||
case OP_CLASS:
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_XCLASS:
|
||||
case OP_ECLASS:
|
||||
if (c == OP_XCLASS || c == OP_ECLASS)
|
||||
end = code + GET(code, 0) - 1;
|
||||
else
|
||||
#endif
|
||||
end = code + 32 / sizeof(PCRE2_UCHAR);
|
||||
class_end = end;
|
||||
|
||||
switch(*end)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
case OP_CRPOSSTAR:
|
||||
case OP_CRPOSQUERY:
|
||||
list[1] = TRUE;
|
||||
end++;
|
||||
break;
|
||||
|
||||
case OP_CRPLUS:
|
||||
case OP_CRMINPLUS:
|
||||
case OP_CRPOSPLUS:
|
||||
end++;
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
case OP_CRPOSRANGE:
|
||||
list[1] = (GET2(end, 1) == 0);
|
||||
end += 1 + 2 * IMM2_SIZE;
|
||||
break;
|
||||
}
|
||||
list[2] = (uint32_t)(end - code);
|
||||
list[3] = (uint32_t)(end - class_end);
|
||||
return end;
|
||||
}
|
||||
|
||||
return NULL; /* Opcode not accepted */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Scan further character sets for match *
|
||||
*************************************************/
|
||||
|
||||
/* Checks whether the base and the current opcode have a common character, in
|
||||
which case the base cannot be possessified.
|
||||
|
||||
Arguments:
|
||||
code points to the byte code
|
||||
utf TRUE in UTF mode
|
||||
ucp TRUE in UCP mode
|
||||
cb compile data block
|
||||
base_list the data list of the base opcode
|
||||
base_end the end of the base opcode
|
||||
rec_limit points to recursion depth counter
|
||||
|
||||
Returns: TRUE if the auto-possessification is possible
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
compare_opcodes(PCRE2_SPTR code, BOOL utf, BOOL ucp, const compile_block *cb,
|
||||
const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit)
|
||||
{
|
||||
PCRE2_UCHAR c;
|
||||
uint32_t list[MAX_LIST];
|
||||
const uint32_t *chr_ptr;
|
||||
const uint32_t *ochr_ptr;
|
||||
const uint32_t *list_ptr;
|
||||
PCRE2_SPTR next_code;
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
PCRE2_SPTR xclass_flags;
|
||||
#endif
|
||||
const uint8_t *class_bitset;
|
||||
const uint8_t *set1, *set2, *set_end;
|
||||
uint32_t chr;
|
||||
BOOL accepted, invert_bits;
|
||||
BOOL entered_a_group = FALSE;
|
||||
|
||||
if (--(*rec_limit) <= 0) return FALSE; /* Recursion has gone too deep */
|
||||
|
||||
/* Note: the base_list[1] contains whether the current opcode has a greedy
|
||||
(represented by a non-zero value) quantifier. This is a different from
|
||||
other character type lists, which store here that the character iterator
|
||||
matches to an empty string (also represented by a non-zero value). */
|
||||
|
||||
for(;;)
|
||||
{
|
||||
PCRE2_SPTR bracode;
|
||||
|
||||
/* All operations move the code pointer forward.
|
||||
Therefore infinite recursions are not possible. */
|
||||
|
||||
c = *code;
|
||||
|
||||
/* Skip over callouts */
|
||||
|
||||
if (c == OP_CALLOUT)
|
||||
{
|
||||
code += PRIV(OP_lengths)[c];
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == OP_CALLOUT_STR)
|
||||
{
|
||||
code += GET(code, 1 + 2*LINK_SIZE);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* At the end of a branch, skip to the end of the group and process it. */
|
||||
|
||||
if (c == OP_ALT)
|
||||
{
|
||||
do code += GET(code, 1); while (*code == OP_ALT);
|
||||
c = *code;
|
||||
}
|
||||
|
||||
/* Inspect the next opcode. */
|
||||
|
||||
switch(c)
|
||||
{
|
||||
/* We can always possessify a greedy iterator at the end of the pattern,
|
||||
which is reached after skipping over the final OP_KET. A non-greedy
|
||||
iterator must never be possessified. */
|
||||
|
||||
case OP_END:
|
||||
return base_list[1] != 0;
|
||||
|
||||
/* When an iterator is at the end of certain kinds of group we can inspect
|
||||
what follows the group by skipping over the closing ket. Note that this
|
||||
does not apply to OP_KETRMAX or OP_KETRMIN because what follows any given
|
||||
iteration is variable (could be another iteration or could be the next
|
||||
item). As these two opcodes are not listed in the next switch, they will
|
||||
end up as the next code to inspect, and return FALSE by virtue of being
|
||||
unsupported. */
|
||||
|
||||
case OP_KET:
|
||||
case OP_KETRPOS:
|
||||
/* The non-greedy case cannot be converted to a possessive form. */
|
||||
|
||||
if (base_list[1] == 0) return FALSE;
|
||||
|
||||
/* If the bracket is capturing it might be referenced by an OP_RECURSE
|
||||
so its last iterator can never be possessified if the pattern contains
|
||||
recursions. (This could be improved by keeping a list of group numbers that
|
||||
are called by recursion.) */
|
||||
|
||||
bracode = code - GET(code, 1);
|
||||
switch(*bracode)
|
||||
{
|
||||
case OP_CBRA:
|
||||
case OP_SCBRA:
|
||||
case OP_CBRAPOS:
|
||||
case OP_SCBRAPOS:
|
||||
if (cb->had_recurse) return FALSE;
|
||||
break;
|
||||
|
||||
/* A script run might have to backtrack if the iterated item can match
|
||||
characters from more than one script. So give up unless repeating an
|
||||
explicit character. */
|
||||
|
||||
case OP_SCRIPT_RUN:
|
||||
if (base_list[0] != OP_CHAR && base_list[0] != OP_CHARI)
|
||||
return FALSE;
|
||||
break;
|
||||
|
||||
/* Atomic sub-patterns and forward assertions can always auto-possessify
|
||||
their last iterator. However, if the group was entered as a result of
|
||||
checking a previous iterator, this is not possible. */
|
||||
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ONCE:
|
||||
return !entered_a_group;
|
||||
|
||||
/* Fixed-length lookbehinds can be treated the same way, but variable
|
||||
length lookbehinds must not auto-possessify their last iterator. Note
|
||||
that in order to identify a variable length lookbehind we must check
|
||||
through all branches, because some may be of fixed length. */
|
||||
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
do
|
||||
{
|
||||
if (bracode[1+LINK_SIZE] == OP_VREVERSE) return FALSE; /* Variable */
|
||||
bracode += GET(bracode, 1);
|
||||
}
|
||||
while (*bracode == OP_ALT);
|
||||
return !entered_a_group; /* Not variable length */
|
||||
|
||||
/* Non-atomic assertions - don't possessify last iterator. This needs
|
||||
more thought. */
|
||||
|
||||
case OP_ASSERT_NA:
|
||||
case OP_ASSERTBACK_NA:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Skip over the bracket and inspect what comes next. */
|
||||
|
||||
code += PRIV(OP_lengths)[c];
|
||||
continue;
|
||||
|
||||
/* Handle cases where the next item is a group. */
|
||||
|
||||
case OP_ONCE:
|
||||
case OP_BRA:
|
||||
case OP_CBRA:
|
||||
next_code = code + GET(code, 1);
|
||||
code += PRIV(OP_lengths)[c];
|
||||
|
||||
/* Check each branch. We have to recurse a level for all but the last
|
||||
branch. */
|
||||
|
||||
while (*next_code == OP_ALT)
|
||||
{
|
||||
if (!compare_opcodes(code, utf, ucp, cb, base_list, base_end, rec_limit))
|
||||
return FALSE;
|
||||
code = next_code + 1 + LINK_SIZE;
|
||||
next_code += GET(next_code, 1);
|
||||
}
|
||||
|
||||
entered_a_group = TRUE;
|
||||
continue;
|
||||
|
||||
case OP_BRAZERO:
|
||||
case OP_BRAMINZERO:
|
||||
|
||||
next_code = code + 1;
|
||||
if (*next_code != OP_BRA && *next_code != OP_CBRA &&
|
||||
*next_code != OP_ONCE) return FALSE;
|
||||
|
||||
do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
|
||||
|
||||
/* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
|
||||
|
||||
next_code += 1 + LINK_SIZE;
|
||||
if (!compare_opcodes(next_code, utf, ucp, cb, base_list, base_end,
|
||||
rec_limit))
|
||||
return FALSE;
|
||||
|
||||
code += PRIV(OP_lengths)[c];
|
||||
continue;
|
||||
|
||||
/* The next opcode does not need special handling; fall through and use it
|
||||
to see if the base can be possessified. */
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* We now have the next appropriate opcode to compare with the base. Check
|
||||
for a supported opcode, and load its properties. */
|
||||
|
||||
code = get_chr_property_list(code, utf, ucp, cb->fcc, list);
|
||||
if (code == NULL) return FALSE; /* Unsupported */
|
||||
|
||||
/* If either opcode is a small character list, set pointers for comparing
|
||||
characters from that list with another list, or with a property. */
|
||||
|
||||
if (base_list[0] == OP_CHAR)
|
||||
{
|
||||
chr_ptr = base_list + 2;
|
||||
list_ptr = list;
|
||||
}
|
||||
else if (list[0] == OP_CHAR)
|
||||
{
|
||||
chr_ptr = list + 2;
|
||||
list_ptr = base_list;
|
||||
}
|
||||
|
||||
/* Character bitsets can also be compared to certain opcodes. */
|
||||
|
||||
else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
/* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */
|
||||
|| (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
|
||||
#endif
|
||||
)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
|
||||
#else
|
||||
if (base_list[0] == OP_CLASS)
|
||||
#endif
|
||||
{
|
||||
set1 = (const uint8_t *)(base_end - base_list[2]);
|
||||
list_ptr = list;
|
||||
}
|
||||
else
|
||||
{
|
||||
set1 = (const uint8_t *)(code - list[2]);
|
||||
list_ptr = base_list;
|
||||
}
|
||||
|
||||
invert_bits = FALSE;
|
||||
switch(list_ptr[0])
|
||||
{
|
||||
case OP_CLASS:
|
||||
case OP_NCLASS:
|
||||
set2 = (const uint8_t *)
|
||||
((list_ptr == list ? code : base_end) - list_ptr[2]);
|
||||
break;
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_XCLASS:
|
||||
xclass_flags = (list_ptr == list ? code : base_end) -
|
||||
list_ptr[2] + LINK_SIZE;
|
||||
if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE;
|
||||
if ((*xclass_flags & XCL_MAP) == 0)
|
||||
{
|
||||
/* No bits are set for characters < 256. */
|
||||
if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0;
|
||||
/* Might be an empty repeat. */
|
||||
continue;
|
||||
}
|
||||
set2 = (const uint8_t *)(xclass_flags + 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
invert_bits = TRUE;
|
||||
/* Fall through */
|
||||
case OP_DIGIT:
|
||||
set2 = (const uint8_t *)(cb->cbits + cbit_digit);
|
||||
break;
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
invert_bits = TRUE;
|
||||
/* Fall through */
|
||||
case OP_WHITESPACE:
|
||||
set2 = (const uint8_t *)(cb->cbits + cbit_space);
|
||||
break;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
invert_bits = TRUE;
|
||||
/* Fall through */
|
||||
case OP_WORDCHAR:
|
||||
set2 = (const uint8_t *)(cb->cbits + cbit_word);
|
||||
break;
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Because the bit sets are unaligned bytes, we need to perform byte
|
||||
comparison here. */
|
||||
|
||||
set_end = set1 + 32;
|
||||
if (invert_bits)
|
||||
{
|
||||
do
|
||||
{
|
||||
if ((*set1++ & ~(*set2++)) != 0) return FALSE;
|
||||
}
|
||||
while (set1 < set_end);
|
||||
}
|
||||
else
|
||||
{
|
||||
do
|
||||
{
|
||||
if ((*set1++ & *set2++) != 0) return FALSE;
|
||||
}
|
||||
while (set1 < set_end);
|
||||
}
|
||||
|
||||
if (list[1] == 0) return TRUE;
|
||||
/* Might be an empty repeat. */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Some property combinations also acceptable. Unicode property opcodes are
|
||||
processed specially; the rest can be handled with a lookup table. */
|
||||
|
||||
else
|
||||
{
|
||||
uint32_t leftop, rightop;
|
||||
|
||||
leftop = base_list[0];
|
||||
rightop = list[0];
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
accepted = FALSE; /* Always set in non-unicode case. */
|
||||
if (leftop == OP_PROP || leftop == OP_NOTPROP)
|
||||
{
|
||||
if (rightop == OP_EOD)
|
||||
accepted = TRUE;
|
||||
else if (rightop == OP_PROP || rightop == OP_NOTPROP)
|
||||
{
|
||||
int n;
|
||||
const uint8_t *p;
|
||||
BOOL same = leftop == rightop;
|
||||
BOOL lisprop = leftop == OP_PROP;
|
||||
BOOL risprop = rightop == OP_PROP;
|
||||
BOOL bothprop = lisprop && risprop;
|
||||
|
||||
/* There's a table that specifies how each combination is to be
|
||||
processed:
|
||||
0 Always return FALSE (never auto-possessify)
|
||||
1 Character groups are distinct (possessify if both are OP_PROP)
|
||||
2 Check character categories in the same group (general or particular)
|
||||
3 Return TRUE if the two opcodes are not the same
|
||||
... see comments below
|
||||
*/
|
||||
|
||||
n = propposstab[base_list[2]][list[2]];
|
||||
switch(n)
|
||||
{
|
||||
case 0: break;
|
||||
case 1: accepted = bothprop; break;
|
||||
case 2: accepted = (base_list[3] == list[3]) != same; break;
|
||||
case 3: accepted = !same; break;
|
||||
|
||||
case 4: /* Left general category, right particular category */
|
||||
accepted = risprop && catposstab[base_list[3]][list[3]] == same;
|
||||
break;
|
||||
|
||||
case 5: /* Right general category, left particular category */
|
||||
accepted = lisprop && catposstab[list[3]][base_list[3]] == same;
|
||||
break;
|
||||
|
||||
/* This code is logically tricky. Think hard before fiddling with it.
|
||||
The posspropstab table has four entries per row. Each row relates to
|
||||
one of PCRE's special properties such as ALNUM or SPACE or WORD.
|
||||
Only WORD actually needs all four entries, but using repeats for the
|
||||
others means they can all use the same code below.
|
||||
|
||||
The first two entries in each row are Unicode general categories, and
|
||||
apply always, because all the characters they include are part of the
|
||||
PCRE character set. The third and fourth entries are a general and a
|
||||
particular category, respectively, that include one or more relevant
|
||||
characters. One or the other is used, depending on whether the check
|
||||
is for a general or a particular category. However, in both cases the
|
||||
category contains more characters than the specials that are defined
|
||||
for the property being tested against. Therefore, it cannot be used
|
||||
in a NOTPROP case.
|
||||
|
||||
Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po.
|
||||
Underscore is covered by ucp_P or ucp_Po. */
|
||||
|
||||
case 6: /* Left alphanum vs right general category */
|
||||
case 7: /* Left space vs right general category */
|
||||
case 8: /* Left word vs right general category */
|
||||
p = posspropstab[n-6];
|
||||
accepted = risprop && lisprop ==
|
||||
(list[3] != p[0] &&
|
||||
list[3] != p[1] &&
|
||||
(list[3] != p[2] || !lisprop));
|
||||
break;
|
||||
|
||||
case 9: /* Right alphanum vs left general category */
|
||||
case 10: /* Right space vs left general category */
|
||||
case 11: /* Right word vs left general category */
|
||||
p = posspropstab[n-9];
|
||||
accepted = lisprop && risprop ==
|
||||
(base_list[3] != p[0] &&
|
||||
base_list[3] != p[1] &&
|
||||
(base_list[3] != p[2] || !risprop));
|
||||
break;
|
||||
|
||||
case 12: /* Left alphanum vs right particular category */
|
||||
case 13: /* Left space vs right particular category */
|
||||
case 14: /* Left word vs right particular category */
|
||||
p = posspropstab[n-12];
|
||||
accepted = risprop && lisprop ==
|
||||
(catposstab[p[0]][list[3]] &&
|
||||
catposstab[p[1]][list[3]] &&
|
||||
(list[3] != p[3] || !lisprop));
|
||||
break;
|
||||
|
||||
case 15: /* Right alphanum vs left particular category */
|
||||
case 16: /* Right space vs left particular category */
|
||||
case 17: /* Right word vs left particular category */
|
||||
p = posspropstab[n-15];
|
||||
accepted = lisprop && risprop ==
|
||||
(catposstab[p[0]][base_list[3]] &&
|
||||
catposstab[p[1]][base_list[3]] &&
|
||||
(base_list[3] != p[3] || !risprop));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
|
||||
rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
|
||||
autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
|
||||
|
||||
if (!accepted) return FALSE;
|
||||
|
||||
if (list[1] == 0) return TRUE;
|
||||
/* Might be an empty repeat. */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Control reaches here only if one of the items is a small character list.
|
||||
All characters are checked against the other side. */
|
||||
|
||||
do
|
||||
{
|
||||
chr = *chr_ptr;
|
||||
|
||||
switch(list_ptr[0])
|
||||
{
|
||||
case OP_CHAR:
|
||||
ochr_ptr = list_ptr + 2;
|
||||
do
|
||||
{
|
||||
if (chr == *ochr_ptr) return FALSE;
|
||||
ochr_ptr++;
|
||||
}
|
||||
while(*ochr_ptr != NOTACHAR);
|
||||
break;
|
||||
|
||||
case OP_NOT:
|
||||
ochr_ptr = list_ptr + 2;
|
||||
do
|
||||
{
|
||||
if (chr == *ochr_ptr)
|
||||
break;
|
||||
ochr_ptr++;
|
||||
}
|
||||
while(*ochr_ptr != NOTACHAR);
|
||||
if (*ochr_ptr == NOTACHAR) return FALSE; /* Not found */
|
||||
break;
|
||||
|
||||
/* Note that OP_DIGIT etc. are generated only when PCRE2_UCP is *not*
|
||||
set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
|
||||
|
||||
case OP_DIGIT:
|
||||
if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE;
|
||||
break;
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE;
|
||||
break;
|
||||
|
||||
case OP_WHITESPACE:
|
||||
if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE;
|
||||
break;
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE;
|
||||
break;
|
||||
|
||||
case OP_WORDCHAR:
|
||||
if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE;
|
||||
break;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE;
|
||||
break;
|
||||
|
||||
case OP_HSPACE:
|
||||
switch(chr)
|
||||
{
|
||||
HSPACE_CASES: return FALSE;
|
||||
default: break;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_NOT_HSPACE:
|
||||
switch(chr)
|
||||
{
|
||||
HSPACE_CASES: break;
|
||||
default: return FALSE;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_ANYNL:
|
||||
case OP_VSPACE:
|
||||
switch(chr)
|
||||
{
|
||||
VSPACE_CASES: return FALSE;
|
||||
default: break;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_NOT_VSPACE:
|
||||
switch(chr)
|
||||
{
|
||||
VSPACE_CASES: break;
|
||||
default: return FALSE;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_DOLL:
|
||||
case OP_EODN:
|
||||
switch (chr)
|
||||
{
|
||||
case CHAR_CR:
|
||||
case CHAR_LF:
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
case CHAR_NEL:
|
||||
#ifndef EBCDIC
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
#endif /* Not EBCDIC */
|
||||
return FALSE;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_EOD: /* Can always possessify before \z */
|
||||
break;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case OP_PROP:
|
||||
case OP_NOTPROP:
|
||||
if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
|
||||
list_ptr[0] == OP_NOTPROP))
|
||||
return FALSE;
|
||||
break;
|
||||
#endif
|
||||
|
||||
case OP_NCLASS:
|
||||
if (chr > 255) return FALSE;
|
||||
/* Fall through */
|
||||
|
||||
case OP_CLASS:
|
||||
if (chr > 255) break;
|
||||
class_bitset = (const uint8_t *)
|
||||
((list_ptr == list ? code : base_end) - list_ptr[2]);
|
||||
if ((class_bitset[chr >> 3] & (1u << (chr & 7))) != 0) return FALSE;
|
||||
break;
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_XCLASS:
|
||||
if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
|
||||
list_ptr[2] + LINK_SIZE, (const uint8_t*)cb->start_code, utf))
|
||||
return FALSE;
|
||||
break;
|
||||
|
||||
case OP_ECLASS:
|
||||
if (PRIV(eclass)(chr,
|
||||
(list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE,
|
||||
(list_ptr == list ? code : base_end) - list_ptr[3],
|
||||
(const uint8_t*)cb->start_code, utf))
|
||||
return FALSE;
|
||||
break;
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
chr_ptr++;
|
||||
}
|
||||
while(*chr_ptr != NOTACHAR);
|
||||
|
||||
/* At least one character must be matched from this opcode. */
|
||||
|
||||
if (list[1] == 0) return TRUE;
|
||||
}
|
||||
|
||||
PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
|
||||
return FALSE; /* Avoid compiler warnings */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Scan compiled regex for auto-possession *
|
||||
*************************************************/
|
||||
|
||||
/* Replaces single character iterations with their possessive alternatives
|
||||
if appropriate. This function modifies the compiled opcode! Hitting a
|
||||
non-existent opcode may indicate a bug in PCRE2, but it can also be caused if a
|
||||
bad UTF string was compiled with PCRE2_NO_UTF_CHECK. The rec_limit catches
|
||||
overly complicated or large patterns. In these cases, the check just stops,
|
||||
leaving the remainder of the pattern unpossessified.
|
||||
|
||||
Arguments:
|
||||
code points to start of the byte code
|
||||
cb compile data block
|
||||
|
||||
Returns: 0 for success
|
||||
-1 if a non-existant opcode is encountered
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(auto_possessify)(PCRE2_UCHAR *code, const compile_block *cb)
|
||||
{
|
||||
PCRE2_UCHAR c;
|
||||
PCRE2_SPTR end;
|
||||
PCRE2_UCHAR *repeat_opcode;
|
||||
uint32_t list[MAX_LIST];
|
||||
int rec_limit = 1000; /* Was 10,000 but clang+ASAN uses a lot of stack. */
|
||||
BOOL utf = (cb->external_options & PCRE2_UTF) != 0;
|
||||
BOOL ucp = (cb->external_options & PCRE2_UCP) != 0;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
c = *code;
|
||||
|
||||
if (c >= OP_TABLE_LENGTH)
|
||||
{
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return -1; /* Something gone wrong */
|
||||
}
|
||||
|
||||
if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
|
||||
{
|
||||
c -= get_repeat_base(c) - OP_STAR;
|
||||
end = (c <= OP_MINUPTO) ?
|
||||
get_chr_property_list(code, utf, ucp, cb->fcc, list) : NULL;
|
||||
list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
|
||||
|
||||
if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end,
|
||||
&rec_limit))
|
||||
{
|
||||
switch(c)
|
||||
{
|
||||
case OP_STAR:
|
||||
*code += OP_POSSTAR - OP_STAR;
|
||||
break;
|
||||
|
||||
case OP_MINSTAR:
|
||||
*code += OP_POSSTAR - OP_MINSTAR;
|
||||
break;
|
||||
|
||||
case OP_PLUS:
|
||||
*code += OP_POSPLUS - OP_PLUS;
|
||||
break;
|
||||
|
||||
case OP_MINPLUS:
|
||||
*code += OP_POSPLUS - OP_MINPLUS;
|
||||
break;
|
||||
|
||||
case OP_QUERY:
|
||||
*code += OP_POSQUERY - OP_QUERY;
|
||||
break;
|
||||
|
||||
case OP_MINQUERY:
|
||||
*code += OP_POSQUERY - OP_MINQUERY;
|
||||
break;
|
||||
|
||||
case OP_UPTO:
|
||||
*code += OP_POSUPTO - OP_UPTO;
|
||||
break;
|
||||
|
||||
case OP_MINUPTO:
|
||||
*code += OP_POSUPTO - OP_MINUPTO;
|
||||
break;
|
||||
}
|
||||
}
|
||||
c = *code;
|
||||
}
|
||||
else if (c == OP_CLASS || c == OP_NCLASS
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
|| c == OP_XCLASS || c == OP_ECLASS
|
||||
#endif
|
||||
)
|
||||
{
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
if (c == OP_XCLASS || c == OP_ECLASS)
|
||||
repeat_opcode = code + GET(code, 1);
|
||||
else
|
||||
#endif
|
||||
repeat_opcode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
|
||||
|
||||
c = *repeat_opcode;
|
||||
if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
|
||||
{
|
||||
/* The return from get_chr_property_list() will never be NULL when
|
||||
*code (aka c) is one of the four class opcodes. However, gcc with
|
||||
-fanalyzer notes that a NULL return is possible, and grumbles. Hence we
|
||||
put in a check. */
|
||||
|
||||
end = get_chr_property_list(code, utf, ucp, cb->fcc, list);
|
||||
list[1] = (c & 1) == 0;
|
||||
|
||||
if (end != NULL &&
|
||||
compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit))
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
*repeat_opcode = OP_CRPOSSTAR;
|
||||
break;
|
||||
|
||||
case OP_CRPLUS:
|
||||
case OP_CRMINPLUS:
|
||||
*repeat_opcode = OP_CRPOSPLUS;
|
||||
break;
|
||||
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
*repeat_opcode = OP_CRPOSQUERY;
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
*repeat_opcode = OP_CRPOSRANGE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
c = *code;
|
||||
}
|
||||
|
||||
switch(c)
|
||||
{
|
||||
case OP_END:
|
||||
return 0;
|
||||
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEPOSQUERY:
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
|
||||
break;
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEPOSUPTO:
|
||||
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
|
||||
code += 2;
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
code += GET(code, 1 + 2*LINK_SIZE);
|
||||
break;
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_XCLASS:
|
||||
case OP_ECLASS:
|
||||
code += GET(code, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
code += code[1];
|
||||
break;
|
||||
}
|
||||
|
||||
/* Add in the fixed length from the table */
|
||||
|
||||
code += PRIV(OP_lengths)[c];
|
||||
|
||||
/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
|
||||
followed by a multi-byte character. The length in the table is a minimum, so
|
||||
we have to arrange to skip the extra code units. */
|
||||
|
||||
#ifdef MAYBE_UTF_MULTI
|
||||
if (utf) switch(c)
|
||||
{
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
case OP_NOT:
|
||||
case OP_NOTI:
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_EXACT:
|
||||
case OP_POSSTAR:
|
||||
case OP_POSPLUS:
|
||||
case OP_POSQUERY:
|
||||
case OP_POSUPTO:
|
||||
case OP_STARI:
|
||||
case OP_MINSTARI:
|
||||
case OP_PLUSI:
|
||||
case OP_MINPLUSI:
|
||||
case OP_QUERYI:
|
||||
case OP_MINQUERYI:
|
||||
case OP_UPTOI:
|
||||
case OP_MINUPTOI:
|
||||
case OP_EXACTI:
|
||||
case OP_POSSTARI:
|
||||
case OP_POSPLUSI:
|
||||
case OP_POSQUERYI:
|
||||
case OP_POSUPTOI:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTPOSQUERY:
|
||||
case OP_NOTPOSUPTO:
|
||||
case OP_NOTSTARI:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_NOTEXACTI:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_NOTPOSQUERYI:
|
||||
case OP_NOTPOSUPTOI:
|
||||
if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
|
||||
break;
|
||||
}
|
||||
#else
|
||||
(void)(utf); /* Keep compiler happy by referencing function argument */
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_auto_possess.c */
|
||||
@@ -1,196 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This file was automatically written by the pcre2_dftables auxiliary
|
||||
program. It contains character tables that are used when no external
|
||||
tables are passed to PCRE2 by the application that calls it. The tables
|
||||
are used only for characters whose code values are less than 256, and
|
||||
only relevant if not in UCP mode. */
|
||||
|
||||
/* This set of tables was written in the C locale. */
|
||||
|
||||
/* The pcre2_ftables program (which is distributed with PCRE2) can be used
|
||||
to build alternative versions of this file. This is necessary if you are
|
||||
running in an EBCDIC environment, or if you want to default to a different
|
||||
encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates
|
||||
these tables in the "C" locale by default. This happens automatically if
|
||||
PCRE2 is configured with --enable-rebuild-chartables. However, you can run
|
||||
pcre2_dftables manually with the -L option to build tables using the LC_ALL
|
||||
locale. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
const uint8_t PRIV(default_tables)[] = {
|
||||
|
||||
/* This table is a lower casing table. */
|
||||
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122, 91, 92, 93, 94, 95,
|
||||
96, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,
|
||||
136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,
|
||||
152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,
|
||||
168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,
|
||||
184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,
|
||||
200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,
|
||||
216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,
|
||||
232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,
|
||||
248,249,250,251,252,253,254,255,
|
||||
|
||||
/* This table is a case flipping table. */
|
||||
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122, 91, 92, 93, 94, 95,
|
||||
96, 65, 66, 67, 68, 69, 70, 71,
|
||||
72, 73, 74, 75, 76, 77, 78, 79,
|
||||
80, 81, 82, 83, 84, 85, 86, 87,
|
||||
88, 89, 90,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,
|
||||
136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,
|
||||
152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,
|
||||
168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,
|
||||
184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,
|
||||
200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,
|
||||
216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,
|
||||
232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,
|
||||
248,249,250,251,252,253,254,255,
|
||||
|
||||
/* This table contains bit maps for various character classes. Each map is 32
|
||||
bytes long and the bits run from the least significant end of each byte. The
|
||||
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
|
||||
graph, print, punct, and cntrl. Other classes are built from combinations. */
|
||||
|
||||
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, /* space */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* xdigit */
|
||||
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* digit */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* upper */
|
||||
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* lower */
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* word */
|
||||
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, /* graph */
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, /* print */
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, /* punct */
|
||||
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, /* cntrl */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
/* This table identifies various classes of character by individual bits:
|
||||
0x01 white space character
|
||||
0x02 letter
|
||||
0x04 lower case letter
|
||||
0x08 decimal digit
|
||||
0x10 word (alphanumeric or '_')
|
||||
*/
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
|
||||
0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
|
||||
0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */
|
||||
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, /* 0 - 7 */
|
||||
0x18,0x18,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */
|
||||
0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* @ - G */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
|
||||
0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x10, /* X - _ */
|
||||
0x00,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* ` - g */
|
||||
0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* h - o */
|
||||
0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* p - w */
|
||||
0x16,0x16,0x16,0x00,0x00,0x00,0x00,0x00, /* x -127 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
|
||||
|
||||
/* End of pcre2_chartables.c */
|
||||
@@ -1,94 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 2023 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This file contains functions to implement checked integer operation */
|
||||
|
||||
#ifndef PCRE2_PCRE2TEST
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
#endif
|
||||
|
||||
/*************************************************
|
||||
* Checked Integer Multiplication *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
r A pointer to PCRE2_SIZE to store the answer
|
||||
a, b Two integers
|
||||
|
||||
Returns: Bool indicating if the operation overflows
|
||||
|
||||
It is modeled after C23's <stdckdint.h> interface
|
||||
The INT64_OR_DOUBLE type is a 64-bit integer type when available,
|
||||
otherwise double. */
|
||||
|
||||
BOOL
|
||||
PRIV(ckd_smul)(PCRE2_SIZE *r, int a, int b)
|
||||
{
|
||||
#ifdef HAVE_BUILTIN_MUL_OVERFLOW
|
||||
PCRE2_SIZE m;
|
||||
|
||||
if (__builtin_mul_overflow(a, b, &m)) return TRUE;
|
||||
|
||||
*r = m;
|
||||
#else
|
||||
INT64_OR_DOUBLE m;
|
||||
|
||||
PCRE2_ASSERT(a >= 0 && b >= 0);
|
||||
|
||||
m = (INT64_OR_DOUBLE)a * (INT64_OR_DOUBLE)b;
|
||||
|
||||
#if defined INT64_MAX || defined int64_t
|
||||
if (sizeof(m) > sizeof(*r) && m > (INT64_OR_DOUBLE)PCRE2_SIZE_MAX) return TRUE;
|
||||
*r = (PCRE2_SIZE)m;
|
||||
#else
|
||||
if (m > PCRE2_SIZE_MAX) return TRUE;
|
||||
*r = m;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* End of pcre2_chkdint.c */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,280 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE2 is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef PCRE2_COMPILE_H_IDEMPOTENT_GUARD
|
||||
#define PCRE2_COMPILE_H_IDEMPOTENT_GUARD
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
/* Compile time error code numbers. They are given names so that they can more
|
||||
easily be tracked. When a new number is added, the tables called eint1 and
|
||||
eint2 in pcre2posix.c may need to be updated, and a new error text must be
|
||||
added to compile_error_texts in pcre2_error.c. Also, the error codes in
|
||||
pcre2.h.in must be updated - their values are exactly 100 greater than these
|
||||
values. */
|
||||
|
||||
enum { ERR0 = COMPILE_ERROR_BASE,
|
||||
ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10,
|
||||
ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
|
||||
ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, ERR30,
|
||||
ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40,
|
||||
ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
|
||||
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
|
||||
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
|
||||
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
|
||||
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
|
||||
ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99, ERR100,
|
||||
ERR101,ERR102,ERR103,ERR104,ERR105,ERR106,ERR107,ERR108,ERR109,ERR110,
|
||||
ERR111,ERR112,ERR113,ERR114,ERR115,ERR116 };
|
||||
|
||||
/* Code values for parsed patterns, which are stored in a vector of 32-bit
|
||||
unsigned ints. Values less than META_END are literal data values. The coding
|
||||
for identifying the item is in the top 16-bits, leaving 16 bits for the
|
||||
additional data that some of them need. The META_CODE, META_DATA, and META_DIFF
|
||||
macros are used to manipulate parsed pattern elements.
|
||||
|
||||
NOTE: When these definitions are changed, the table of extra lengths for each
|
||||
code (meta_extra_lengths) must be updated to remain in step. */
|
||||
|
||||
#define META_END 0x80000000u /* End of pattern */
|
||||
|
||||
#define META_ALT 0x80010000u /* alternation */
|
||||
#define META_ATOMIC 0x80020000u /* atomic group */
|
||||
#define META_BACKREF 0x80030000u /* Back ref */
|
||||
#define META_BACKREF_BYNAME 0x80040000u /* \k'name' */
|
||||
#define META_BIGVALUE 0x80050000u /* Next is a literal > META_END */
|
||||
#define META_CALLOUT_NUMBER 0x80060000u /* (?C with numerical argument */
|
||||
#define META_CALLOUT_STRING 0x80070000u /* (?C with string argument */
|
||||
#define META_CAPTURE 0x80080000u /* Capturing parenthesis */
|
||||
#define META_CIRCUMFLEX 0x80090000u /* ^ metacharacter */
|
||||
#define META_CLASS 0x800a0000u /* start non-empty class */
|
||||
#define META_CLASS_EMPTY 0x800b0000u /* empty class */
|
||||
#define META_CLASS_EMPTY_NOT 0x800c0000u /* negative empty class */
|
||||
#define META_CLASS_END 0x800d0000u /* end of non-empty class */
|
||||
#define META_CLASS_NOT 0x800e0000u /* start non-empty negative class */
|
||||
#define META_COND_ASSERT 0x800f0000u /* (?(?assertion)... */
|
||||
#define META_COND_DEFINE 0x80100000u /* (?(DEFINE)... */
|
||||
#define META_COND_NAME 0x80110000u /* (?(<name>)... */
|
||||
#define META_COND_NUMBER 0x80120000u /* (?(digits)... */
|
||||
#define META_COND_RNAME 0x80130000u /* (?(R&name)... */
|
||||
#define META_COND_RNUMBER 0x80140000u /* (?(Rdigits)... */
|
||||
#define META_COND_VERSION 0x80150000u /* (?(VERSION<op>x.y)... */
|
||||
#define META_OFFSET 0x80160000u /* Setting offset for various
|
||||
META codes (e.g. META_SCS_NAME) */
|
||||
#define META_SCS 0x80170000u /* (*scan_substring:... */
|
||||
#define META_SCS_NAME 0x80180000u /* Next <name> of scan_substring */
|
||||
#define META_SCS_NUMBER 0x80190000u /* Next digits of scan_substring */
|
||||
#define META_DOLLAR 0x801a0000u /* $ metacharacter */
|
||||
#define META_DOT 0x801b0000u /* . metacharacter */
|
||||
#define META_ESCAPE 0x801c0000u /* \d and friends */
|
||||
#define META_KET 0x801d0000u /* closing parenthesis */
|
||||
#define META_NOCAPTURE 0x801e0000u /* no capture parens */
|
||||
#define META_OPTIONS 0x801f0000u /* (?i) and friends */
|
||||
#define META_POSIX 0x80200000u /* POSIX class item */
|
||||
#define META_POSIX_NEG 0x80210000u /* negative POSIX class item */
|
||||
#define META_RANGE_ESCAPED 0x80220000u /* range with at least one escape */
|
||||
#define META_RANGE_LITERAL 0x80230000u /* range defined literally */
|
||||
#define META_RECURSE 0x80240000u /* Recursion */
|
||||
#define META_RECURSE_BYNAME 0x80250000u /* (?&name) */
|
||||
#define META_SCRIPT_RUN 0x80260000u /* (*script_run:...) */
|
||||
|
||||
/* These must be kept together to make it easy to check that an assertion
|
||||
is present where expected in a conditional group. */
|
||||
|
||||
#define META_LOOKAHEAD 0x80270000u /* (?= */
|
||||
#define META_LOOKAHEADNOT 0x80280000u /* (?! */
|
||||
#define META_LOOKBEHIND 0x80290000u /* (?<= */
|
||||
#define META_LOOKBEHINDNOT 0x802a0000u /* (?<! */
|
||||
|
||||
/* These cannot be conditions */
|
||||
|
||||
#define META_LOOKAHEAD_NA 0x802b0000u /* (*napla: */
|
||||
#define META_LOOKBEHIND_NA 0x802c0000u /* (*naplb: */
|
||||
|
||||
/* These must be kept in this order, with consecutive values, and the _ARG
|
||||
versions of COMMIT, PRUNE, SKIP, and THEN immediately after their non-argument
|
||||
versions. */
|
||||
|
||||
#define META_MARK 0x802d0000u /* (*MARK) */
|
||||
#define META_ACCEPT 0x802e0000u /* (*ACCEPT) */
|
||||
#define META_FAIL 0x802f0000u /* (*FAIL) */
|
||||
#define META_COMMIT 0x80300000u /* These */
|
||||
#define META_COMMIT_ARG 0x80310000u /* pairs */
|
||||
#define META_PRUNE 0x80320000u /* must */
|
||||
#define META_PRUNE_ARG 0x80330000u /* be */
|
||||
#define META_SKIP 0x80340000u /* kept */
|
||||
#define META_SKIP_ARG 0x80350000u /* in */
|
||||
#define META_THEN 0x80360000u /* this */
|
||||
#define META_THEN_ARG 0x80370000u /* order */
|
||||
|
||||
/* These must be kept in groups of adjacent 3 values, and all together. */
|
||||
|
||||
#define META_ASTERISK 0x80380000u /* * */
|
||||
#define META_ASTERISK_PLUS 0x80390000u /* *+ */
|
||||
#define META_ASTERISK_QUERY 0x803a0000u /* *? */
|
||||
#define META_PLUS 0x803b0000u /* + */
|
||||
#define META_PLUS_PLUS 0x803c0000u /* ++ */
|
||||
#define META_PLUS_QUERY 0x803d0000u /* +? */
|
||||
#define META_QUERY 0x803e0000u /* ? */
|
||||
#define META_QUERY_PLUS 0x803f0000u /* ?+ */
|
||||
#define META_QUERY_QUERY 0x80400000u /* ?? */
|
||||
#define META_MINMAX 0x80410000u /* {n,m} repeat */
|
||||
#define META_MINMAX_PLUS 0x80420000u /* {n,m}+ repeat */
|
||||
#define META_MINMAX_QUERY 0x80430000u /* {n,m}? repeat */
|
||||
|
||||
/* These meta codes must be kept in a group, with the OR/SUB/XOR in
|
||||
this order, and AND/NOT at the start/end. */
|
||||
|
||||
#define META_ECLASS_AND 0x80440000u /* && (or &) in a class */
|
||||
#define META_ECLASS_OR 0x80450000u /* || (or |, +) in a class */
|
||||
#define META_ECLASS_SUB 0x80460000u /* -- (or -) in a class */
|
||||
#define META_ECLASS_XOR 0x80470000u /* ~~ (or ^) in a class */
|
||||
#define META_ECLASS_NOT 0x80480000u /* ! in a class */
|
||||
|
||||
/* Convenience aliases. */
|
||||
|
||||
#define META_FIRST_QUANTIFIER META_ASTERISK
|
||||
#define META_LAST_QUANTIFIER META_MINMAX_QUERY
|
||||
|
||||
/* This is a special "meta code" that is used only to distinguish (*asr: from
|
||||
(*sr: in the table of alphabetic assertions. It is never stored in the parsed
|
||||
pattern because (*asr: is turned into (*sr:(*atomic: at that stage. There is
|
||||
therefore no need for it to have a length entry, so use a high value. */
|
||||
|
||||
#define META_ATOMIC_SCRIPT_RUN 0x8fff0000u
|
||||
|
||||
/* Macros for manipulating elements of the parsed pattern vector. */
|
||||
|
||||
#define META_CODE(x) (x & 0xffff0000u)
|
||||
#define META_DATA(x) (x & 0x0000ffffu)
|
||||
#define META_DIFF(x,y) ((x-y)>>16)
|
||||
|
||||
/* Extended class management flags. */
|
||||
|
||||
#define CLASS_IS_ECLASS 0x1
|
||||
|
||||
/* Macro for the highest character value. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define MAX_UCHAR_VALUE 0xffu
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
#define MAX_UCHAR_VALUE 0xffffu
|
||||
#else
|
||||
#define MAX_UCHAR_VALUE 0xffffffffu
|
||||
#endif
|
||||
|
||||
#define GET_MAX_CHAR_VALUE(utf) \
|
||||
((utf) ? MAX_UTF_CODE_POINT : MAX_UCHAR_VALUE)
|
||||
|
||||
/* Macro for setting individual bits in class bitmaps. */
|
||||
|
||||
#define SETBIT(a,b) a[(b) >> 3] |= (uint8_t)(1u << ((b) & 0x7))
|
||||
|
||||
/* Macro for 8 bit specific checks. */
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define SELECT_VALUE8(value8, value) (value8)
|
||||
#else
|
||||
#define SELECT_VALUE8(value8, value) (value)
|
||||
#endif
|
||||
|
||||
/* Macro for aligning data. */
|
||||
#define CLIST_ALIGN_TO(base, align) \
|
||||
((base + ((size_t)(align) - 1)) & ~((size_t)(align) - 1))
|
||||
|
||||
/* Structure for holding information about an OP_ECLASS internal operand.
|
||||
An "operand" here could be just a single OP_[X]CLASS, or it could be some
|
||||
complex expression; but it's some sequence of ECL_* codes which pushes one
|
||||
value to the stack. */
|
||||
typedef struct {
|
||||
/* The position of the operand - or NULL if (lengthptr != NULL). */
|
||||
PCRE2_UCHAR *code_start;
|
||||
PCRE2_SIZE length;
|
||||
/* The operand's type if it is a single code (ECL_XCLASS, ECL_ANY, ECL_NONE);
|
||||
otherwise zero if the operand is not atomic. */
|
||||
uint8_t op_single_type;
|
||||
/* Regardless of whether it's a single code or not, we fully constant-fold
|
||||
the bitmap for code points < 256. */
|
||||
class_bits_storage bits;
|
||||
} eclass_op_info;
|
||||
|
||||
/* Macros for the definitions below, to prevent name collisions. */
|
||||
|
||||
#define _pcre2_posix_class_maps PCRE2_SUFFIX(_pcre2_posix_class_maps)
|
||||
#define _pcre2_update_classbits PCRE2_SUFFIX(_pcre2_update_classbits_)
|
||||
#define _pcre2_compile_class_nested PCRE2_SUFFIX(_pcre2_compile_class_nested_)
|
||||
#define _pcre2_compile_class_not_nested PCRE2_SUFFIX(_pcre2_compile_class_not_nested_)
|
||||
|
||||
|
||||
/* Indices of the POSIX classes in posix_names, posix_name_lengths,
|
||||
posix_class_maps, and posix_substitutes. They must be kept in sync. */
|
||||
|
||||
#define PC_DIGIT 7
|
||||
#define PC_GRAPH 8
|
||||
#define PC_PRINT 9
|
||||
#define PC_PUNCT 10
|
||||
#define PC_XDIGIT 13
|
||||
|
||||
extern const int PRIV(posix_class_maps)[];
|
||||
|
||||
|
||||
/* Set bits in classbits according to the property type */
|
||||
|
||||
void PRIV(update_classbits)(uint32_t ptype, uint32_t pdata, BOOL negated,
|
||||
uint8_t *classbits);
|
||||
|
||||
/* Compile the META codes from start_ptr...end_ptr, writing a single OP_CLASS
|
||||
OP_CLASS, OP_NCLASS, OP_XCLASS, or OP_ALLANY into pcode. */
|
||||
|
||||
uint32_t *PRIV(compile_class_not_nested)(uint32_t options, uint32_t xoptions,
|
||||
uint32_t *start_ptr, PCRE2_UCHAR **pcode, BOOL negate_class, BOOL* has_bitmap,
|
||||
int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr);
|
||||
|
||||
/* Compile the META codes in pptr into opcodes written to pcode. The pptr must
|
||||
start at a META_CLASS or META_CLASS_NOT.
|
||||
|
||||
The pptr will be left pointing at the matching META_CLASS_END. */
|
||||
|
||||
BOOL PRIV(compile_class_nested)(uint32_t options, uint32_t xoptions,
|
||||
uint32_t **pptr, PCRE2_UCHAR **pcode, int *errorcodeptr,
|
||||
compile_block *cb, PCRE2_SIZE *lengthptr);
|
||||
|
||||
#endif /* PCRE2_COMPILE_H_IDEMPOTENT_GUARD */
|
||||
|
||||
/* End of pcre2_compile.h */
|
||||
@@ -1,2737 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_compile.h"
|
||||
|
||||
typedef struct {
|
||||
/* Option bits for eclass. */
|
||||
uint32_t options;
|
||||
uint32_t xoptions;
|
||||
/* Rarely used members. */
|
||||
int *errorcodeptr;
|
||||
compile_block *cb;
|
||||
/* Bitmap is needed. */
|
||||
BOOL needs_bitmap;
|
||||
} eclass_context;
|
||||
|
||||
/* Checks the allowed tokens at the end of a class structure in debug mode.
|
||||
When a new token is not processed by all loops, and the token is equals to
|
||||
a) one of the cases here:
|
||||
the compiler will complain about a duplicated case value.
|
||||
b) none of the cases here:
|
||||
the loop without the handler will stop with an assertion failure. */
|
||||
|
||||
#ifdef PCRE2_DEBUG
|
||||
#define CLASS_END_CASES(meta) \
|
||||
default: \
|
||||
PCRE2_ASSERT((meta) <= META_END); \
|
||||
/* Fall through */ \
|
||||
case META_CLASS: \
|
||||
case META_CLASS_NOT: \
|
||||
case META_CLASS_EMPTY: \
|
||||
case META_CLASS_EMPTY_NOT: \
|
||||
case META_CLASS_END: \
|
||||
case META_ECLASS_AND: \
|
||||
case META_ECLASS_OR: \
|
||||
case META_ECLASS_SUB: \
|
||||
case META_ECLASS_XOR: \
|
||||
case META_ECLASS_NOT:
|
||||
#else
|
||||
#define CLASS_END_CASES(meta) \
|
||||
default:
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
|
||||
/* Heapsort algorithm. */
|
||||
|
||||
static void do_heapify(uint32_t *buffer, size_t size, size_t i)
|
||||
{
|
||||
size_t max;
|
||||
size_t left;
|
||||
size_t right;
|
||||
uint32_t tmp1, tmp2;
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
max = i;
|
||||
left = (i << 1) + 2;
|
||||
right = left + 2;
|
||||
|
||||
if (left < size && buffer[left] > buffer[max]) max = left;
|
||||
if (right < size && buffer[right] > buffer[max]) max = right;
|
||||
if (i == max) return;
|
||||
|
||||
/* Swap items. */
|
||||
tmp1 = buffer[i];
|
||||
tmp2 = buffer[i + 1];
|
||||
buffer[i] = buffer[max];
|
||||
buffer[i + 1] = buffer[max + 1];
|
||||
buffer[max] = tmp1;
|
||||
buffer[max + 1] = tmp2;
|
||||
i = max;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
|
||||
#define PARSE_CLASS_UTF 0x1
|
||||
#define PARSE_CLASS_CASELESS_UTF 0x2
|
||||
#define PARSE_CLASS_RESTRICTED_UTF 0x4
|
||||
#define PARSE_CLASS_TURKISH_UTF 0x8
|
||||
|
||||
/* Get the range of nocase characters which includes the
|
||||
'c' character passed as argument, or directly follows 'c'. */
|
||||
|
||||
static const uint32_t*
|
||||
get_nocase_range(uint32_t c)
|
||||
{
|
||||
uint32_t left = 0;
|
||||
uint32_t right = PRIV(ucd_nocase_ranges_size);
|
||||
uint32_t middle;
|
||||
|
||||
if (c > MAX_UTF_CODE_POINT) return PRIV(ucd_nocase_ranges) + right;
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
/* Range end of the middle element. */
|
||||
middle = ((left + right) >> 1) | 0x1;
|
||||
|
||||
if (PRIV(ucd_nocase_ranges)[middle] <= c)
|
||||
left = middle + 1;
|
||||
else if (middle > 1 && PRIV(ucd_nocase_ranges)[middle - 2] > c)
|
||||
right = middle - 1;
|
||||
else
|
||||
return PRIV(ucd_nocase_ranges) + (middle - 1);
|
||||
}
|
||||
}
|
||||
|
||||
/* Get the list of othercase characters, which belongs to the passed range.
|
||||
Create ranges from these characters, and append them to the buffer argument. */
|
||||
|
||||
static size_t
|
||||
utf_caseless_extend(uint32_t start, uint32_t end, uint32_t options,
|
||||
uint32_t *buffer)
|
||||
{
|
||||
uint32_t new_start = start;
|
||||
uint32_t new_end = end;
|
||||
uint32_t c = start;
|
||||
const uint32_t *list;
|
||||
uint32_t tmp[3];
|
||||
size_t result = 2;
|
||||
const uint32_t *skip_range = get_nocase_range(c);
|
||||
uint32_t skip_start = skip_range[0];
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
PCRE2_ASSERT(options & PARSE_CLASS_UTF);
|
||||
#endif
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (end > MAX_UTF_CODE_POINT) end = MAX_UTF_CODE_POINT;
|
||||
#endif
|
||||
|
||||
while (c <= end)
|
||||
{
|
||||
uint32_t co;
|
||||
|
||||
if (c > skip_start)
|
||||
{
|
||||
c = skip_range[1];
|
||||
skip_range += 2;
|
||||
skip_start = skip_range[0];
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Compute caseless set. */
|
||||
|
||||
if ((options & (PARSE_CLASS_TURKISH_UTF|PARSE_CLASS_RESTRICTED_UTF)) ==
|
||||
PARSE_CLASS_TURKISH_UTF &&
|
||||
UCD_ANY_I(c))
|
||||
{
|
||||
co = PRIV(ucd_turkish_dotted_i_caseset) + (UCD_DOTTED_I(c)? 0 : 3);
|
||||
}
|
||||
else if ((co = UCD_CASESET(c)) != 0 &&
|
||||
(options & PARSE_CLASS_RESTRICTED_UTF) != 0 &&
|
||||
PRIV(ucd_caseless_sets)[co] < 128)
|
||||
{
|
||||
co = 0; /* Ignore the caseless set if it's restricted. */
|
||||
}
|
||||
|
||||
if (co != 0)
|
||||
list = PRIV(ucd_caseless_sets) + co;
|
||||
else
|
||||
{
|
||||
co = UCD_OTHERCASE(c);
|
||||
list = tmp;
|
||||
tmp[0] = c;
|
||||
tmp[1] = NOTACHAR;
|
||||
|
||||
if (co != c)
|
||||
{
|
||||
tmp[1] = co;
|
||||
tmp[2] = NOTACHAR;
|
||||
}
|
||||
}
|
||||
c++;
|
||||
|
||||
/* Add characters. */
|
||||
do
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 16
|
||||
if (!(options & PARSE_CLASS_UTF) && *list > 0xffff) continue;
|
||||
#endif
|
||||
|
||||
if (*list < new_start)
|
||||
{
|
||||
if (*list + 1 == new_start)
|
||||
{
|
||||
new_start--;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (*list > new_end)
|
||||
{
|
||||
if (*list - 1 == new_end)
|
||||
{
|
||||
new_end++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else continue;
|
||||
|
||||
result += 2;
|
||||
if (buffer != NULL)
|
||||
{
|
||||
buffer[0] = *list;
|
||||
buffer[1] = *list;
|
||||
buffer += 2;
|
||||
}
|
||||
}
|
||||
while (*(++list) != NOTACHAR);
|
||||
}
|
||||
|
||||
if (buffer != NULL)
|
||||
{
|
||||
buffer[0] = new_start;
|
||||
buffer[1] = new_end;
|
||||
buffer += 2;
|
||||
(void)buffer;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* Add a character list to a buffer. */
|
||||
|
||||
static size_t
|
||||
append_char_list(const uint32_t *p, uint32_t *buffer)
|
||||
{
|
||||
const uint32_t *n;
|
||||
size_t result = 0;
|
||||
|
||||
while (*p != NOTACHAR)
|
||||
{
|
||||
n = p;
|
||||
while (n[0] == n[1] - 1) n++;
|
||||
|
||||
PCRE2_ASSERT(*p < 0xffff);
|
||||
|
||||
if (buffer != NULL)
|
||||
{
|
||||
buffer[0] = *p;
|
||||
buffer[1] = *n;
|
||||
buffer += 2;
|
||||
}
|
||||
|
||||
result += 2;
|
||||
p = n + 1;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
get_highest_char(uint32_t options)
|
||||
{
|
||||
(void)options; /* Avoid compiler warning. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
return MAX_UTF_CODE_POINT;
|
||||
#else
|
||||
#ifdef SUPPORT_UNICODE
|
||||
return GET_MAX_CHAR_VALUE((options & PARSE_CLASS_UTF) != 0);
|
||||
#else
|
||||
return MAX_UCHAR_VALUE;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Add a negated character list to a buffer. */
|
||||
static size_t
|
||||
append_negated_char_list(const uint32_t *p, uint32_t options, uint32_t *buffer)
|
||||
{
|
||||
const uint32_t *n;
|
||||
uint32_t start = 0;
|
||||
size_t result = 2;
|
||||
|
||||
PCRE2_ASSERT(*p > 0);
|
||||
|
||||
while (*p != NOTACHAR)
|
||||
{
|
||||
n = p;
|
||||
while (n[0] == n[1] - 1) n++;
|
||||
|
||||
PCRE2_ASSERT(*p < 0xffff);
|
||||
|
||||
if (buffer != NULL)
|
||||
{
|
||||
buffer[0] = start;
|
||||
buffer[1] = *p - 1;
|
||||
buffer += 2;
|
||||
}
|
||||
|
||||
result += 2;
|
||||
start = *n + 1;
|
||||
p = n + 1;
|
||||
}
|
||||
|
||||
if (buffer != NULL)
|
||||
{
|
||||
buffer[0] = start;
|
||||
buffer[1] = get_highest_char(options);
|
||||
buffer += 2;
|
||||
(void)buffer;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static uint32_t *
|
||||
append_non_ascii_range(uint32_t options, uint32_t *buffer)
|
||||
{
|
||||
if (buffer == NULL) return NULL;
|
||||
|
||||
buffer[0] = 0x100;
|
||||
buffer[1] = get_highest_char(options);
|
||||
return buffer + 2;
|
||||
}
|
||||
|
||||
static size_t
|
||||
parse_class(uint32_t *ptr, uint32_t options, uint32_t *buffer)
|
||||
{
|
||||
size_t total_size = 0;
|
||||
size_t size;
|
||||
uint32_t meta_arg;
|
||||
uint32_t start_char;
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
switch (META_CODE(*ptr))
|
||||
{
|
||||
case META_ESCAPE:
|
||||
meta_arg = META_DATA(*ptr);
|
||||
switch (meta_arg)
|
||||
{
|
||||
case ESC_D:
|
||||
case ESC_W:
|
||||
case ESC_S:
|
||||
buffer = append_non_ascii_range(options, buffer);
|
||||
total_size += 2;
|
||||
break;
|
||||
|
||||
case ESC_h:
|
||||
size = append_char_list(PRIV(hspace_list), buffer);
|
||||
total_size += size;
|
||||
if (buffer != NULL) buffer += size;
|
||||
break;
|
||||
|
||||
case ESC_H:
|
||||
size = append_negated_char_list(PRIV(hspace_list), options, buffer);
|
||||
total_size += size;
|
||||
if (buffer != NULL) buffer += size;
|
||||
break;
|
||||
|
||||
case ESC_v:
|
||||
size = append_char_list(PRIV(vspace_list), buffer);
|
||||
total_size += size;
|
||||
if (buffer != NULL) buffer += size;
|
||||
break;
|
||||
|
||||
case ESC_V:
|
||||
size = append_negated_char_list(PRIV(vspace_list), options, buffer);
|
||||
total_size += size;
|
||||
if (buffer != NULL) buffer += size;
|
||||
break;
|
||||
|
||||
case ESC_p:
|
||||
case ESC_P:
|
||||
ptr++;
|
||||
if (meta_arg == ESC_p && (*ptr >> 16) == PT_ANY)
|
||||
{
|
||||
if (buffer != NULL)
|
||||
{
|
||||
buffer[0] = 0;
|
||||
buffer[1] = get_highest_char(options);
|
||||
buffer += 2;
|
||||
}
|
||||
total_size += 2;
|
||||
}
|
||||
break;
|
||||
}
|
||||
ptr++;
|
||||
continue;
|
||||
case META_POSIX_NEG:
|
||||
buffer = append_non_ascii_range(options, buffer);
|
||||
total_size += 2;
|
||||
ptr += 2;
|
||||
continue;
|
||||
case META_POSIX:
|
||||
ptr += 2;
|
||||
continue;
|
||||
case META_BIGVALUE:
|
||||
/* Character literal */
|
||||
ptr++;
|
||||
break;
|
||||
CLASS_END_CASES(*ptr)
|
||||
if (*ptr >= META_END) return total_size;
|
||||
break;
|
||||
}
|
||||
|
||||
start_char = *ptr;
|
||||
|
||||
if (ptr[1] == META_RANGE_LITERAL || ptr[1] == META_RANGE_ESCAPED)
|
||||
{
|
||||
ptr += 2;
|
||||
PCRE2_ASSERT(*ptr < META_END || *ptr == META_BIGVALUE);
|
||||
|
||||
if (*ptr == META_BIGVALUE) ptr++;
|
||||
|
||||
#ifdef EBCDIC
|
||||
#error "Missing EBCDIC support"
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (options & PARSE_CLASS_CASELESS_UTF)
|
||||
{
|
||||
size = utf_caseless_extend(start_char, *ptr++, options, buffer);
|
||||
if (buffer != NULL) buffer += size;
|
||||
total_size += size;
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (buffer != NULL)
|
||||
{
|
||||
buffer[0] = start_char;
|
||||
buffer[1] = *ptr;
|
||||
buffer += 2;
|
||||
}
|
||||
|
||||
ptr++;
|
||||
total_size += 2;
|
||||
}
|
||||
|
||||
return total_size;
|
||||
}
|
||||
|
||||
/* Extra uint32_t values for storing the lengths of range lists in
|
||||
the worst case. Two uint32_t lengths and a range end for a range
|
||||
starting before 255 */
|
||||
#define CHAR_LIST_EXTRA_SIZE 3
|
||||
|
||||
/* Starting character values for each character list. */
|
||||
|
||||
static const uint32_t char_list_starts[] = {
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
XCL_CHAR_LIST_HIGH_32_START,
|
||||
#endif
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32 || defined SUPPORT_UNICODE
|
||||
XCL_CHAR_LIST_LOW_32_START,
|
||||
#endif
|
||||
XCL_CHAR_LIST_HIGH_16_START,
|
||||
/* Must be terminated by XCL_CHAR_LIST_LOW_16_START,
|
||||
which also represents the end of the bitset. */
|
||||
XCL_CHAR_LIST_LOW_16_START,
|
||||
};
|
||||
|
||||
static class_ranges *
|
||||
compile_optimize_class(uint32_t *start_ptr, uint32_t options,
|
||||
uint32_t xoptions, compile_block *cb)
|
||||
{
|
||||
class_ranges* cranges;
|
||||
uint32_t *ptr;
|
||||
uint32_t *buffer;
|
||||
uint32_t *dst;
|
||||
uint32_t class_options = 0;
|
||||
size_t range_list_size = 0, total_size, i;
|
||||
uint32_t tmp1, tmp2;
|
||||
const uint32_t *char_list_next;
|
||||
uint16_t *next_char;
|
||||
uint32_t char_list_start, char_list_end;
|
||||
uint32_t range_start, range_end;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (options & PCRE2_UTF)
|
||||
class_options |= PARSE_CLASS_UTF;
|
||||
|
||||
if ((options & PCRE2_CASELESS) && (options & (PCRE2_UTF|PCRE2_UCP)))
|
||||
class_options |= PARSE_CLASS_CASELESS_UTF;
|
||||
|
||||
if (xoptions & PCRE2_EXTRA_CASELESS_RESTRICT)
|
||||
class_options |= PARSE_CLASS_RESTRICTED_UTF;
|
||||
|
||||
if (xoptions & PCRE2_EXTRA_TURKISH_CASING)
|
||||
class_options |= PARSE_CLASS_TURKISH_UTF;
|
||||
#endif
|
||||
|
||||
/* Compute required space for the range. */
|
||||
|
||||
range_list_size = parse_class(start_ptr, class_options, NULL);
|
||||
PCRE2_ASSERT((range_list_size & 0x1) == 0);
|
||||
|
||||
/* Allocate buffer. The total_size also represents the end of the buffer. */
|
||||
|
||||
total_size = range_list_size +
|
||||
((range_list_size >= 2) ? CHAR_LIST_EXTRA_SIZE : 0);
|
||||
|
||||
cranges = cb->cx->memctl.malloc(
|
||||
sizeof(class_ranges) + total_size * sizeof(uint32_t),
|
||||
cb->cx->memctl.memory_data);
|
||||
|
||||
if (cranges == NULL) return NULL;
|
||||
|
||||
cranges->next = NULL;
|
||||
cranges->range_list_size = (uint16_t)range_list_size;
|
||||
cranges->char_lists_types = 0;
|
||||
cranges->char_lists_size = 0;
|
||||
cranges->char_lists_start = 0;
|
||||
|
||||
if (range_list_size == 0) return cranges;
|
||||
|
||||
buffer = (uint32_t*)(cranges + 1);
|
||||
parse_class(start_ptr, class_options, buffer);
|
||||
|
||||
/* Using <= instead of == to help static analysis. */
|
||||
if (range_list_size <= 2) return cranges;
|
||||
|
||||
/* In-place sorting of ranges. */
|
||||
|
||||
i = (((range_list_size >> 2) - 1) << 1);
|
||||
while (TRUE)
|
||||
{
|
||||
do_heapify(buffer, range_list_size, i);
|
||||
if (i == 0) break;
|
||||
i -= 2;
|
||||
}
|
||||
|
||||
i = range_list_size - 2;
|
||||
while (TRUE)
|
||||
{
|
||||
tmp1 = buffer[i];
|
||||
tmp2 = buffer[i + 1];
|
||||
buffer[i] = buffer[0];
|
||||
buffer[i + 1] = buffer[1];
|
||||
buffer[0] = tmp1;
|
||||
buffer[1] = tmp2;
|
||||
|
||||
do_heapify(buffer, i, 0);
|
||||
if (i == 0) break;
|
||||
i -= 2;
|
||||
}
|
||||
|
||||
/* Merge ranges whenever possible. */
|
||||
dst = buffer;
|
||||
ptr = buffer + 2;
|
||||
range_list_size -= 2;
|
||||
|
||||
/* The second condition is a very rare corner case, where the end of the last
|
||||
range is the maximum character. This range cannot be extended further. */
|
||||
|
||||
while (range_list_size > 0 && dst[1] != ~(uint32_t)0)
|
||||
{
|
||||
if (dst[1] + 1 < ptr[0])
|
||||
{
|
||||
dst += 2;
|
||||
dst[0] = ptr[0];
|
||||
dst[1] = ptr[1];
|
||||
}
|
||||
else if (dst[1] < ptr[1]) dst[1] = ptr[1];
|
||||
|
||||
ptr += 2;
|
||||
range_list_size -= 2;
|
||||
}
|
||||
|
||||
PCRE2_ASSERT(dst[1] <= get_highest_char(class_options));
|
||||
|
||||
/* When the number of ranges are less than six,
|
||||
they are not converted to range lists. */
|
||||
|
||||
ptr = buffer;
|
||||
while (ptr < dst && ptr[1] < 0x100) ptr += 2;
|
||||
if (dst - ptr < (2 * (6 - 1)))
|
||||
{
|
||||
cranges->range_list_size = (uint16_t)(dst + 2 - buffer);
|
||||
return cranges;
|
||||
}
|
||||
|
||||
/* Compute character lists structures. */
|
||||
|
||||
char_list_next = char_list_starts;
|
||||
char_list_start = *char_list_next++;
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
char_list_end = XCL_CHAR_LIST_HIGH_32_END;
|
||||
#elif defined SUPPORT_UNICODE
|
||||
char_list_end = XCL_CHAR_LIST_LOW_32_END;
|
||||
#else
|
||||
char_list_end = XCL_CHAR_LIST_HIGH_16_END;
|
||||
#endif
|
||||
next_char = (uint16_t*)(buffer + total_size);
|
||||
|
||||
tmp1 = 0;
|
||||
tmp2 = ((sizeof(char_list_starts) / sizeof(uint32_t)) - 1) * XCL_TYPE_BIT_LEN;
|
||||
PCRE2_ASSERT(tmp2 <= 3 * XCL_TYPE_BIT_LEN && tmp2 >= XCL_TYPE_BIT_LEN);
|
||||
range_start = dst[0];
|
||||
range_end = dst[1];
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
if (range_start >= char_list_start)
|
||||
{
|
||||
if (range_start == range_end || range_end < char_list_end)
|
||||
{
|
||||
tmp1++;
|
||||
next_char--;
|
||||
|
||||
if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
|
||||
*next_char = (uint16_t)((range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END);
|
||||
else
|
||||
*(uint32_t*)(--next_char) =
|
||||
(range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END;
|
||||
}
|
||||
|
||||
if (range_start < range_end)
|
||||
{
|
||||
if (range_start > char_list_start)
|
||||
{
|
||||
tmp1++;
|
||||
next_char--;
|
||||
|
||||
if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
|
||||
*next_char = (uint16_t)(range_start << XCL_CHAR_SHIFT);
|
||||
else
|
||||
*(uint32_t*)(--next_char) = (range_start << XCL_CHAR_SHIFT);
|
||||
}
|
||||
else
|
||||
cranges->char_lists_types |= XCL_BEGIN_WITH_RANGE << tmp2;
|
||||
}
|
||||
|
||||
PCRE2_ASSERT((uint32_t*)next_char >= dst + 2);
|
||||
|
||||
if (dst > buffer)
|
||||
{
|
||||
dst -= 2;
|
||||
range_start = dst[0];
|
||||
range_end = dst[1];
|
||||
continue;
|
||||
}
|
||||
|
||||
range_start = 0;
|
||||
range_end = 0;
|
||||
}
|
||||
|
||||
if (range_end >= char_list_start)
|
||||
{
|
||||
PCRE2_ASSERT(range_start < char_list_start);
|
||||
|
||||
if (range_end < char_list_end)
|
||||
{
|
||||
tmp1++;
|
||||
next_char--;
|
||||
|
||||
if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
|
||||
*next_char = (uint16_t)((range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END);
|
||||
else
|
||||
*(uint32_t*)(--next_char) =
|
||||
(range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END;
|
||||
|
||||
PCRE2_ASSERT((uint32_t*)next_char >= dst + 2);
|
||||
}
|
||||
|
||||
cranges->char_lists_types |= XCL_BEGIN_WITH_RANGE << tmp2;
|
||||
}
|
||||
|
||||
if (tmp1 >= XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
cranges->char_lists_types |= XCL_ITEM_COUNT_MASK << tmp2;
|
||||
next_char--;
|
||||
|
||||
if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
|
||||
*next_char = (uint16_t)tmp1;
|
||||
else
|
||||
*(uint32_t*)(--next_char) = tmp1;
|
||||
}
|
||||
else
|
||||
cranges->char_lists_types |= tmp1 << tmp2;
|
||||
|
||||
if (range_start < XCL_CHAR_LIST_LOW_16_START) break;
|
||||
|
||||
PCRE2_ASSERT(tmp2 >= XCL_TYPE_BIT_LEN);
|
||||
char_list_end = char_list_start - 1;
|
||||
char_list_start = *char_list_next++;
|
||||
tmp1 = 0;
|
||||
tmp2 -= XCL_TYPE_BIT_LEN;
|
||||
}
|
||||
|
||||
if (dst[0] < XCL_CHAR_LIST_LOW_16_START) dst += 2;
|
||||
PCRE2_ASSERT((uint16_t*)dst <= next_char);
|
||||
|
||||
cranges->char_lists_size =
|
||||
(size_t)((uint8_t*)(buffer + total_size) - (uint8_t*)next_char);
|
||||
cranges->char_lists_start = (size_t)((uint8_t*)next_char - (uint8_t*)buffer);
|
||||
cranges->range_list_size = (uint16_t)(dst - buffer);
|
||||
return cranges;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
|
||||
void PRIV(update_classbits)(uint32_t ptype, uint32_t pdata, BOOL negated,
|
||||
uint8_t *classbits)
|
||||
{
|
||||
/* Update PRIV(xclass) when this function is changed. */
|
||||
int c, chartype;
|
||||
const ucd_record *prop;
|
||||
uint32_t gentype;
|
||||
BOOL set_bit;
|
||||
|
||||
if (ptype == PT_ANY)
|
||||
{
|
||||
if (!negated) memset(classbits, 0xff, 32);
|
||||
return;
|
||||
}
|
||||
|
||||
for (c = 0; c < 256; c++)
|
||||
{
|
||||
prop = GET_UCD(c);
|
||||
set_bit = FALSE;
|
||||
(void)set_bit;
|
||||
|
||||
switch (ptype)
|
||||
{
|
||||
case PT_LAMP:
|
||||
chartype = prop->chartype;
|
||||
set_bit = (chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt);
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
set_bit = (PRIV(ucp_gentype)[prop->chartype] == pdata);
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
set_bit = (prop->chartype == pdata);
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
set_bit = (prop->script == pdata);
|
||||
break;
|
||||
|
||||
case PT_SCX:
|
||||
set_bit = (prop->script == pdata ||
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0);
|
||||
break;
|
||||
|
||||
case PT_ALNUM:
|
||||
gentype = PRIV(ucp_gentype)[prop->chartype];
|
||||
set_bit = (gentype == ucp_L || gentype == ucp_N);
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_BYTE_CASES:
|
||||
VSPACE_BYTE_CASES:
|
||||
set_bit = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
set_bit = (PRIV(ucp_gentype)[prop->chartype] == ucp_Z);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
chartype = prop->chartype;
|
||||
gentype = PRIV(ucp_gentype)[chartype];
|
||||
set_bit = (gentype == ucp_L || gentype == ucp_N ||
|
||||
chartype == ucp_Mn || chartype == ucp_Pc);
|
||||
break;
|
||||
|
||||
case PT_UCNC:
|
||||
set_bit = (c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
||||
c == CHAR_GRAVE_ACCENT || c >= 0xa0);
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
set_bit = (UCD_BIDICLASS_PROP(prop) == pdata);
|
||||
break;
|
||||
|
||||
case PT_BOOL:
|
||||
set_bit = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||
UCD_BPROPS_PROP(prop), pdata) != 0;
|
||||
break;
|
||||
|
||||
case PT_PXGRAPH:
|
||||
chartype = prop->chartype;
|
||||
gentype = PRIV(ucp_gentype)[chartype];
|
||||
set_bit = (gentype != ucp_Z && (gentype != ucp_C || chartype == ucp_Cf));
|
||||
break;
|
||||
|
||||
case PT_PXPRINT:
|
||||
chartype = prop->chartype;
|
||||
set_bit = (chartype != ucp_Zl && chartype != ucp_Zp &&
|
||||
(PRIV(ucp_gentype)[chartype] != ucp_C || chartype == ucp_Cf));
|
||||
break;
|
||||
|
||||
case PT_PXPUNCT:
|
||||
gentype = PRIV(ucp_gentype)[prop->chartype];
|
||||
set_bit = (gentype == ucp_P || (c < 128 && gentype == ucp_S));
|
||||
break;
|
||||
|
||||
default:
|
||||
PCRE2_ASSERT(ptype == PT_PXXDIGIT);
|
||||
set_bit = (c >= CHAR_0 && c <= CHAR_9) ||
|
||||
(c >= CHAR_A && c <= CHAR_F) ||
|
||||
(c >= CHAR_a && c <= CHAR_f);
|
||||
break;
|
||||
}
|
||||
|
||||
if (negated) set_bit = !set_bit;
|
||||
if (set_bit) *classbits |= (uint8_t)(1 << (c & 0x7));
|
||||
if ((c & 0x7) == 0x7) classbits++;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
|
||||
/*************************************************
|
||||
* XClass related properties *
|
||||
*************************************************/
|
||||
|
||||
/* XClass needs to be generated. */
|
||||
#define XCLASS_REQUIRED 0x1
|
||||
/* XClass has 8 bit character. */
|
||||
#define XCLASS_HAS_8BIT_CHARS 0x2
|
||||
/* XClass has properties. */
|
||||
#define XCLASS_HAS_PROPS 0x4
|
||||
/* XClass has character lists. */
|
||||
#define XCLASS_HAS_CHAR_LISTS 0x8
|
||||
/* XClass matches to all >= 256 characters. */
|
||||
#define XCLASS_HIGH_ANY 0x10
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Internal entry point for add range to class *
|
||||
*************************************************/
|
||||
|
||||
/* This function sets the overall range for characters < 256.
|
||||
It also handles non-utf case folding.
|
||||
|
||||
Arguments:
|
||||
options the options bits
|
||||
xoptions the extra options bits
|
||||
cb compile data
|
||||
start start of range character
|
||||
end end of range character
|
||||
|
||||
Returns: cb->classbits is updated
|
||||
*/
|
||||
|
||||
static void
|
||||
add_to_class(uint32_t options, uint32_t xoptions, compile_block *cb,
|
||||
uint32_t start, uint32_t end)
|
||||
{
|
||||
uint8_t *classbits = cb->classbits.classbits;
|
||||
uint32_t c, byte_start, byte_end;
|
||||
uint32_t classbits_end = (end <= 0xff ? end : 0xff);
|
||||
|
||||
/* If caseless matching is required, scan the range and process alternate
|
||||
cases. In Unicode, there are 8-bit characters that have alternate cases that
|
||||
are greater than 255 and vice-versa (though these may be ignored if caseless
|
||||
restriction is in force). Sometimes we can just extend the original range. */
|
||||
|
||||
if ((options & PCRE2_CASELESS) != 0)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/* UTF mode. This branch is taken if we don't support wide characters (e.g.
|
||||
8-bit library, without UTF), but we do treat those characters as Unicode
|
||||
(if UCP flag is set). In this case, we only need to expand the character class
|
||||
set to include the case pairs which are in the 0-255 codepoint range. */
|
||||
if ((options & (PCRE2_UTF|PCRE2_UCP)) != 0)
|
||||
{
|
||||
BOOL turkish_i = (xoptions & (PCRE2_EXTRA_TURKISH_CASING|PCRE2_EXTRA_CASELESS_RESTRICT)) ==
|
||||
PCRE2_EXTRA_TURKISH_CASING;
|
||||
if (start < 128)
|
||||
{
|
||||
uint32_t lo_end = (classbits_end < 127 ? classbits_end : 127);
|
||||
for (c = start; c <= lo_end; c++)
|
||||
{
|
||||
if (turkish_i && UCD_ANY_I(c)) continue;
|
||||
SETBIT(classbits, cb->fcc[c]);
|
||||
}
|
||||
}
|
||||
if (classbits_end >= 128)
|
||||
{
|
||||
uint32_t hi_start = (start > 128 ? start : 128);
|
||||
for (c = hi_start; c <= classbits_end; c++)
|
||||
{
|
||||
uint32_t co = UCD_OTHERCASE(c);
|
||||
if (co <= 0xff) SETBIT(classbits, co);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Not UTF mode */
|
||||
{
|
||||
for (c = start; c <= classbits_end; c++)
|
||||
SETBIT(classbits, cb->fcc[c]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Use the bitmap for characters < 256. Otherwise use extra data. */
|
||||
|
||||
byte_start = (start + 7) >> 3;
|
||||
byte_end = (classbits_end + 1) >> 3;
|
||||
|
||||
if (byte_start >= byte_end)
|
||||
{
|
||||
for (c = start; c <= classbits_end; c++)
|
||||
/* Regardless of start, c will always be <= 255. */
|
||||
SETBIT(classbits, c);
|
||||
return;
|
||||
}
|
||||
|
||||
for (c = byte_start; c < byte_end; c++)
|
||||
classbits[c] = 0xff;
|
||||
|
||||
byte_start <<= 3;
|
||||
byte_end <<= 3;
|
||||
|
||||
for (c = start; c < byte_start; c++)
|
||||
SETBIT(classbits, c);
|
||||
|
||||
for (c = byte_end; c <= classbits_end; c++)
|
||||
SETBIT(classbits, c);
|
||||
}
|
||||
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
/*************************************************
|
||||
* Internal entry point for add list to class *
|
||||
*************************************************/
|
||||
|
||||
/* This function is used for adding a list of horizontal or vertical whitespace
|
||||
characters to a class. The list must be in order so that ranges of characters
|
||||
can be detected and handled appropriately. This function sets the overall range
|
||||
so that the internal functions can try to avoid duplication when handling
|
||||
case-independence.
|
||||
|
||||
Arguments:
|
||||
options the options bits
|
||||
xoptions the extra options bits
|
||||
cb contains pointers to tables etc.
|
||||
p points to row of 32-bit values, terminated by NOTACHAR
|
||||
|
||||
Returns: cb->classbits is updated
|
||||
*/
|
||||
|
||||
static void
|
||||
add_list_to_class(uint32_t options, uint32_t xoptions, compile_block *cb,
|
||||
const uint32_t *p)
|
||||
{
|
||||
while (p[0] < 256)
|
||||
{
|
||||
unsigned int n = 0;
|
||||
|
||||
while(p[n+1] == p[0] + n + 1) n++;
|
||||
add_to_class(options, xoptions, cb, p[0], p[n]);
|
||||
|
||||
p += n + 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Add characters not in a list to a class *
|
||||
*************************************************/
|
||||
|
||||
/* This function is used for adding the complement of a list of horizontal or
|
||||
vertical whitespace to a class. The list must be in order.
|
||||
|
||||
Arguments:
|
||||
options the options bits
|
||||
xoptions the extra options bits
|
||||
cb contains pointers to tables etc.
|
||||
p points to row of 32-bit values, terminated by NOTACHAR
|
||||
|
||||
Returns: cb->classbits is updated
|
||||
*/
|
||||
|
||||
static void
|
||||
add_not_list_to_class(uint32_t options, uint32_t xoptions, compile_block *cb,
|
||||
const uint32_t *p)
|
||||
{
|
||||
if (p[0] > 0)
|
||||
add_to_class(options, xoptions, cb, 0, p[0] - 1);
|
||||
while (p[0] < 256)
|
||||
{
|
||||
while (p[1] == p[0] + 1) p++;
|
||||
add_to_class(options, xoptions, cb, p[0] + 1, (p[1] > 255) ? 255 : p[1] - 1);
|
||||
p++;
|
||||
}
|
||||
}
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Main entry-point to compile a character class *
|
||||
*************************************************/
|
||||
|
||||
/* This function consumes a "leaf", which is a set of characters that will
|
||||
become a single OP_CLASS OP_NCLASS, OP_XCLASS, or OP_ALLANY. */
|
||||
|
||||
uint32_t *
|
||||
PRIV(compile_class_not_nested)(uint32_t options, uint32_t xoptions,
|
||||
uint32_t *start_ptr, PCRE2_UCHAR **pcode, BOOL negate_class, BOOL* has_bitmap,
|
||||
int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
uint32_t *pptr = start_ptr;
|
||||
PCRE2_UCHAR *code = *pcode;
|
||||
BOOL should_flip_negation;
|
||||
const uint8_t *cbits = cb->cbits;
|
||||
/* Some functions such as add_to_class() or eclass processing
|
||||
expects that the bitset is stored in cb->classbits.classbits. */
|
||||
uint8_t *const classbits = cb->classbits.classbits;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf = (options & PCRE2_UTF) != 0;
|
||||
#else /* No Unicode support */
|
||||
BOOL utf = FALSE;
|
||||
#endif
|
||||
|
||||
/* Helper variables for OP_XCLASS opcode (for characters > 255). */
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
uint32_t xclass_props;
|
||||
PCRE2_UCHAR *class_uchardata;
|
||||
class_ranges* cranges;
|
||||
#endif
|
||||
|
||||
/* If an XClass contains a negative special such as \S, we need to flip the
|
||||
negation flag at the end, so that support for characters > 255 works correctly
|
||||
(they are all included in the class). An XClass may need to insert specific
|
||||
matching or non-matching code for wide characters.
|
||||
*/
|
||||
|
||||
should_flip_negation = FALSE;
|
||||
|
||||
/* XClass will be used when characters > 255 might match. */
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
xclass_props = 0;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
cranges = NULL;
|
||||
|
||||
if (utf)
|
||||
#endif
|
||||
{
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
cranges = compile_optimize_class(pptr, options, xoptions, cb);
|
||||
|
||||
if (cranges == NULL)
|
||||
{
|
||||
*errorcodeptr = ERR21;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Caching the pre-processed character ranges. */
|
||||
if (cb->next_cranges != NULL)
|
||||
cb->next_cranges->next = cranges;
|
||||
else
|
||||
cb->cranges = cranges;
|
||||
|
||||
cb->next_cranges = cranges;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Reuse the pre-processed character ranges. */
|
||||
cranges = cb->cranges;
|
||||
PCRE2_ASSERT(cranges != NULL);
|
||||
cb->cranges = cranges->next;
|
||||
}
|
||||
|
||||
if (cranges->range_list_size > 0)
|
||||
{
|
||||
const uint32_t *ranges = (const uint32_t*)(cranges + 1);
|
||||
|
||||
if (ranges[0] <= 255)
|
||||
xclass_props |= XCLASS_HAS_8BIT_CHARS;
|
||||
|
||||
if (ranges[cranges->range_list_size - 1] == GET_MAX_CHAR_VALUE(utf) &&
|
||||
ranges[cranges->range_list_size - 2] <= 256)
|
||||
xclass_props |= XCLASS_HIGH_ANY;
|
||||
}
|
||||
}
|
||||
|
||||
class_uchardata = code + LINK_SIZE + 2; /* For XCLASS items */
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
|
||||
/* Initialize the 256-bit (32-byte) bit map to all zeros. We build the map
|
||||
in a temporary bit of memory, in case the class contains fewer than two
|
||||
8-bit characters because in that case the compiled code doesn't use the bit
|
||||
map. */
|
||||
|
||||
memset(classbits, 0, 32);
|
||||
|
||||
/* Process items until end_ptr is reached. */
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
uint32_t meta = *(pptr++);
|
||||
BOOL local_negate;
|
||||
int posix_class;
|
||||
int taboffset, tabopt;
|
||||
class_bits_storage pbits;
|
||||
uint32_t escape, c;
|
||||
|
||||
/* Handle POSIX classes such as [:alpha:] etc. */
|
||||
switch (META_CODE(meta))
|
||||
{
|
||||
case META_POSIX:
|
||||
case META_POSIX_NEG:
|
||||
|
||||
local_negate = (meta == META_POSIX_NEG);
|
||||
posix_class = *(pptr++);
|
||||
|
||||
if (local_negate) should_flip_negation = TRUE; /* Note negative special */
|
||||
|
||||
/* If matching is caseless, upper and lower are converted to alpha.
|
||||
This relies on the fact that the class table starts with alpha,
|
||||
lower, upper as the first 3 entries. */
|
||||
|
||||
if ((options & PCRE2_CASELESS) != 0 && posix_class <= 2)
|
||||
posix_class = 0;
|
||||
|
||||
/* When PCRE2_UCP is set, some of the POSIX classes are converted to
|
||||
different escape sequences that use Unicode properties \p or \P.
|
||||
Others that are not available via \p or \P have to generate
|
||||
XCL_PROP/XCL_NOTPROP directly, which is done here. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/* TODO This entire block of code here appears to be unreachable!? I simply
|
||||
can't see how it can be hit, given that the frontend parser doesn't emit
|
||||
META_POSIX for GRAPH/PRINT/PUNCT when UCP is set. */
|
||||
if ((options & PCRE2_UCP) != 0 &&
|
||||
(xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0)
|
||||
{
|
||||
uint32_t ptype;
|
||||
|
||||
switch(posix_class)
|
||||
{
|
||||
case PC_GRAPH:
|
||||
case PC_PRINT:
|
||||
case PC_PUNCT:
|
||||
ptype = (posix_class == PC_GRAPH)? PT_PXGRAPH :
|
||||
(posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT;
|
||||
|
||||
PRIV(update_classbits)(ptype, 0, local_negate, classbits);
|
||||
|
||||
if ((xclass_props & XCLASS_HIGH_ANY) == 0)
|
||||
{
|
||||
if (lengthptr != NULL)
|
||||
*lengthptr += 3;
|
||||
else
|
||||
{
|
||||
*class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP;
|
||||
*class_uchardata++ = (PCRE2_UCHAR)ptype;
|
||||
*class_uchardata++ = 0;
|
||||
}
|
||||
xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_PROPS;
|
||||
}
|
||||
continue;
|
||||
|
||||
/* For the other POSIX classes (ex: ascii) we are going to
|
||||
fall through to the non-UCP case and build a bit map for
|
||||
characters with code points less than 256. However, if we are in
|
||||
a negated POSIX class, characters with code points greater than
|
||||
255 must either all match or all not match, depending on whether
|
||||
the whole class is not or is negated. For example, for
|
||||
[[:^ascii:]... they must all match, whereas for [^[:^ascii:]...
|
||||
they must not.
|
||||
|
||||
In the special case where there are no xclass items, this is
|
||||
automatically handled by the use of OP_CLASS or OP_NCLASS, but an
|
||||
explicit range is needed for OP_XCLASS. Setting a flag here
|
||||
causes the range to be generated later when it is known that
|
||||
OP_XCLASS is required. In the 8-bit library this is relevant only in
|
||||
utf mode, since no wide characters can exist otherwise. */
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* In the non-UCP case, or when UCP makes no difference, we build the
|
||||
bit map for the POSIX class in a chunk of local store because we may
|
||||
be adding and subtracting from it, and we don't want to subtract bits
|
||||
that may be in the main map already. At the end we or the result into
|
||||
the bit map that is being built. */
|
||||
|
||||
posix_class *= 3;
|
||||
|
||||
/* Copy in the first table (always present) */
|
||||
|
||||
memcpy(pbits.classbits, cbits + PRIV(posix_class_maps)[posix_class], 32);
|
||||
|
||||
/* If there is a second table, add or remove it as required. */
|
||||
|
||||
taboffset = PRIV(posix_class_maps)[posix_class + 1];
|
||||
tabopt = PRIV(posix_class_maps)[posix_class + 2];
|
||||
|
||||
if (taboffset >= 0)
|
||||
{
|
||||
if (tabopt >= 0)
|
||||
for (int i = 0; i < 32; i++)
|
||||
pbits.classbits[i] |= cbits[i + taboffset];
|
||||
else
|
||||
for (int i = 0; i < 32; i++)
|
||||
pbits.classbits[i] &= (uint8_t)(~cbits[i + taboffset]);
|
||||
}
|
||||
|
||||
/* Now see if we need to remove any special characters. An option
|
||||
value of 1 removes vertical space and 2 removes underscore. */
|
||||
|
||||
if (tabopt < 0) tabopt = -tabopt;
|
||||
if (tabopt == 1) pbits.classbits[1] &= ~0x3c;
|
||||
else if (tabopt == 2) pbits.classbits[11] &= 0x7f;
|
||||
|
||||
/* Add the POSIX table or its complement into the main table that is
|
||||
being built and we are done. */
|
||||
|
||||
{
|
||||
uint32_t *classwords = cb->classbits.classwords;
|
||||
|
||||
if (local_negate)
|
||||
for (int i = 0; i < 8; i++)
|
||||
classwords[i] |= (uint32_t)(~pbits.classwords[i]);
|
||||
else
|
||||
for (int i = 0; i < 8; i++)
|
||||
classwords[i] |= pbits.classwords[i];
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
/* Every class contains at least one < 256 character. */
|
||||
xclass_props |= XCLASS_HAS_8BIT_CHARS;
|
||||
#endif
|
||||
continue; /* End of POSIX handling */
|
||||
|
||||
/* Other than POSIX classes, the only items we should encounter are
|
||||
\d-type escapes and literal characters (possibly as ranges). */
|
||||
case META_BIGVALUE:
|
||||
meta = *(pptr++);
|
||||
break;
|
||||
|
||||
case META_ESCAPE:
|
||||
escape = META_DATA(meta);
|
||||
|
||||
switch(escape)
|
||||
{
|
||||
case ESC_d:
|
||||
for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit];
|
||||
break;
|
||||
|
||||
case ESC_D:
|
||||
should_flip_negation = TRUE;
|
||||
for (int i = 0; i < 32; i++)
|
||||
classbits[i] |= (uint8_t)(~cbits[i+cbit_digit]);
|
||||
break;
|
||||
|
||||
case ESC_w:
|
||||
for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word];
|
||||
break;
|
||||
|
||||
case ESC_W:
|
||||
should_flip_negation = TRUE;
|
||||
for (int i = 0; i < 32; i++)
|
||||
classbits[i] |= (uint8_t)(~cbits[i+cbit_word]);
|
||||
break;
|
||||
|
||||
/* Perl 5.004 onwards omitted VT from \s, but restored it at Perl
|
||||
5.18. Before PCRE 8.34, we had to preserve the VT bit if it was
|
||||
previously set by something earlier in the character class.
|
||||
Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so
|
||||
we could just adjust the appropriate bit. From PCRE 8.34 we no
|
||||
longer treat \s and \S specially. */
|
||||
|
||||
case ESC_s:
|
||||
for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space];
|
||||
break;
|
||||
|
||||
case ESC_S:
|
||||
should_flip_negation = TRUE;
|
||||
for (int i = 0; i < 32; i++)
|
||||
classbits[i] |= (uint8_t)(~cbits[i+cbit_space]);
|
||||
break;
|
||||
|
||||
/* When adding the horizontal or vertical space lists to a class, or
|
||||
their complements, disable PCRE2_CASELESS, because it justs wastes
|
||||
time, and in the "not-x" UTF cases can create unwanted duplicates in
|
||||
the XCLASS list (provoked by characters that have more than one other
|
||||
case and by both cases being in the same "not-x" sublist). */
|
||||
|
||||
case ESC_h:
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (cranges != NULL) break;
|
||||
#endif
|
||||
add_list_to_class(options & ~PCRE2_CASELESS, xoptions,
|
||||
cb, PRIV(hspace_list));
|
||||
#else
|
||||
PCRE2_ASSERT(cranges != NULL);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case ESC_H:
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (cranges != NULL) break;
|
||||
#endif
|
||||
add_not_list_to_class(options & ~PCRE2_CASELESS, xoptions,
|
||||
cb, PRIV(hspace_list));
|
||||
#else
|
||||
PCRE2_ASSERT(cranges != NULL);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case ESC_v:
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (cranges != NULL) break;
|
||||
#endif
|
||||
add_list_to_class(options & ~PCRE2_CASELESS, xoptions,
|
||||
cb, PRIV(vspace_list));
|
||||
#else
|
||||
PCRE2_ASSERT(cranges != NULL);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case ESC_V:
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (cranges != NULL) break;
|
||||
#endif
|
||||
add_not_list_to_class(options & ~PCRE2_CASELESS, xoptions,
|
||||
cb, PRIV(vspace_list));
|
||||
#else
|
||||
PCRE2_ASSERT(cranges != NULL);
|
||||
#endif
|
||||
break;
|
||||
|
||||
/* If Unicode is not supported, \P and \p are not allowed and are
|
||||
faulted at parse time, so will never appear here. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case ESC_p:
|
||||
case ESC_P:
|
||||
{
|
||||
uint32_t ptype = *pptr >> 16;
|
||||
uint32_t pdata = *(pptr++) & 0xffff;
|
||||
|
||||
/* The "Any" is processed by PRIV(update_classbits)(). */
|
||||
if (ptype == PT_ANY)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (!utf && escape == ESC_p) memset(classbits, 0xff, 32);
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
PRIV(update_classbits)(ptype, pdata, (escape == ESC_P), classbits);
|
||||
|
||||
if ((xclass_props & XCLASS_HIGH_ANY) == 0)
|
||||
{
|
||||
if (lengthptr != NULL)
|
||||
*lengthptr += 3;
|
||||
else
|
||||
{
|
||||
*class_uchardata++ = (escape == ESC_p)? XCL_PROP : XCL_NOTPROP;
|
||||
*class_uchardata++ = ptype;
|
||||
*class_uchardata++ = pdata;
|
||||
}
|
||||
xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_PROPS;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
/* Every non-property class contains at least one < 256 character. */
|
||||
xclass_props |= XCLASS_HAS_8BIT_CHARS;
|
||||
#endif
|
||||
/* End handling \d-type escapes */
|
||||
continue;
|
||||
|
||||
CLASS_END_CASES(meta)
|
||||
/* Literals. */
|
||||
if (meta < META_END) break;
|
||||
/* Non-literals: end of class contents. */
|
||||
goto END_PROCESSING;
|
||||
}
|
||||
|
||||
/* A literal character may be followed by a range meta. At parse time
|
||||
there are checks for out-of-order characters, for ranges where the two
|
||||
characters are equal, and for hyphens that cannot indicate a range. At
|
||||
this point, therefore, no checking is needed. */
|
||||
|
||||
c = meta;
|
||||
|
||||
/* Remember if \r or \n were explicitly used */
|
||||
|
||||
if (c == CHAR_CR || c == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF;
|
||||
|
||||
/* Process a character range */
|
||||
|
||||
if (*pptr == META_RANGE_LITERAL || *pptr == META_RANGE_ESCAPED)
|
||||
{
|
||||
uint32_t d;
|
||||
|
||||
#ifdef EBCDIC
|
||||
BOOL range_is_literal = (*pptr == META_RANGE_LITERAL);
|
||||
#endif
|
||||
++pptr;
|
||||
d = *(pptr++);
|
||||
if (d == META_BIGVALUE) d = *(pptr++);
|
||||
|
||||
/* Remember an explicit \r or \n, and add the range to the class. */
|
||||
|
||||
if (d == CHAR_CR || d == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (cranges != NULL) continue;
|
||||
xclass_props |= XCLASS_HAS_8BIT_CHARS;
|
||||
#endif
|
||||
|
||||
/* In an EBCDIC environment, Perl treats alphabetic ranges specially
|
||||
because there are holes in the encoding, and simply using the range
|
||||
A-Z (for example) would include the characters in the holes. This
|
||||
applies only to literal ranges; [\xC1-\xE9] is different to [A-Z]. */
|
||||
|
||||
#ifdef EBCDIC
|
||||
if (range_is_literal &&
|
||||
(cb->ctypes[c] & ctype_letter) != 0 &&
|
||||
(cb->ctypes[d] & ctype_letter) != 0 &&
|
||||
(c <= CHAR_z) == (d <= CHAR_z))
|
||||
{
|
||||
uint32_t uc = (d <= CHAR_z)? 0 : 64;
|
||||
uint32_t C = c - uc;
|
||||
uint32_t D = d - uc;
|
||||
|
||||
if (C <= CHAR_i)
|
||||
{
|
||||
add_to_class(options, xoptions, cb, C + uc,
|
||||
((D < CHAR_i)? D : CHAR_i) + uc);
|
||||
C = CHAR_j;
|
||||
}
|
||||
|
||||
if (C <= D && C <= CHAR_r)
|
||||
{
|
||||
add_to_class(options, xoptions, cb, C + uc,
|
||||
((D < CHAR_r)? D : CHAR_r) + uc);
|
||||
C = CHAR_s;
|
||||
}
|
||||
|
||||
if (C <= D)
|
||||
add_to_class(options, xoptions, cb, C + uc, D + uc);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
/* Not an EBCDIC special range */
|
||||
|
||||
add_to_class(options, xoptions, cb, c, d);
|
||||
#else
|
||||
PCRE2_ASSERT(cranges != NULL);
|
||||
#endif
|
||||
continue;
|
||||
} /* End of range handling */
|
||||
|
||||
/* Character ranges are ignored when class_ranges is present. */
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (cranges != NULL) continue;
|
||||
xclass_props |= XCLASS_HAS_8BIT_CHARS;
|
||||
#endif
|
||||
/* Handle a single character. */
|
||||
|
||||
add_to_class(options, xoptions, cb, meta, meta);
|
||||
#else
|
||||
PCRE2_ASSERT(cranges != NULL);
|
||||
#endif
|
||||
} /* End of main class-processing loop */
|
||||
|
||||
END_PROCESSING:
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
PCRE2_ASSERT((xclass_props & XCLASS_HAS_PROPS) == 0 ||
|
||||
(xclass_props & XCLASS_HIGH_ANY) == 0);
|
||||
|
||||
if (cranges != NULL)
|
||||
{
|
||||
uint32_t *range = (uint32_t*)(cranges + 1);
|
||||
uint32_t *end = range + cranges->range_list_size;
|
||||
|
||||
while (range < end && range[0] < 256)
|
||||
{
|
||||
PCRE2_ASSERT((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0);
|
||||
/* Add range to bitset. If we are in UTF or UCP mode, then clear the
|
||||
caseless bit, because the cranges handle caselessness (only) in this
|
||||
condition; see the condition for PARSE_CLASS_CASELESS_UTF in
|
||||
compile_optimize_class(). */
|
||||
add_to_class(((options & (PCRE2_UTF|PCRE2_UCP)) != 0)?
|
||||
(options & ~PCRE2_CASELESS) : options, xoptions, cb, range[0], range[1]);
|
||||
|
||||
if (range[1] > 255) break;
|
||||
range += 2;
|
||||
}
|
||||
|
||||
if (cranges->char_lists_size > 0)
|
||||
{
|
||||
/* The cranges structure is still used and freed later. */
|
||||
PCRE2_ASSERT((xclass_props & XCLASS_HIGH_ANY) == 0);
|
||||
xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_CHAR_LISTS;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((xclass_props & XCLASS_HIGH_ANY) != 0)
|
||||
{
|
||||
PCRE2_ASSERT(range + 2 == end && range[0] <= 256 &&
|
||||
range[1] >= GET_MAX_CHAR_VALUE(utf));
|
||||
should_flip_negation = TRUE;
|
||||
range = end;
|
||||
}
|
||||
|
||||
while (range < end)
|
||||
{
|
||||
uint32_t range_start = range[0];
|
||||
uint32_t range_end = range[1];
|
||||
|
||||
range += 2;
|
||||
xclass_props |= XCLASS_REQUIRED;
|
||||
|
||||
if (range_start < 256) range_start = 256;
|
||||
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
*lengthptr += 1;
|
||||
|
||||
if (range_start < range_end)
|
||||
*lengthptr += PRIV(ord2utf)(range_start, class_uchardata);
|
||||
|
||||
*lengthptr += PRIV(ord2utf)(range_end, class_uchardata);
|
||||
continue;
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
*lengthptr += range_start < range_end ? 3 : 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
if (range_start < range_end)
|
||||
{
|
||||
*class_uchardata++ = XCL_RANGE;
|
||||
class_uchardata += PRIV(ord2utf)(range_start, class_uchardata);
|
||||
}
|
||||
else
|
||||
*class_uchardata++ = XCL_SINGLE;
|
||||
|
||||
class_uchardata += PRIV(ord2utf)(range_end, class_uchardata);
|
||||
continue;
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Without UTF support, character values are constrained
|
||||
by the bit length, and can only be > 256 for 16-bit and
|
||||
32-bit libraries. */
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (range_start < range_end)
|
||||
{
|
||||
*class_uchardata++ = XCL_RANGE;
|
||||
*class_uchardata++ = range_start;
|
||||
}
|
||||
else
|
||||
*class_uchardata++ = XCL_SINGLE;
|
||||
|
||||
*class_uchardata++ = range_end;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
}
|
||||
|
||||
if (lengthptr == NULL)
|
||||
cb->cx->memctl.free(cranges, cb->cx->memctl.memory_data);
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
|
||||
/* If there are characters with values > 255, or Unicode property settings
|
||||
(\p or \P), we have to compile an extended class, with its own opcode,
|
||||
unless there were no property settings and there was a negated special such
|
||||
as \S in the class, and PCRE2_UCP is not set, because in that case all
|
||||
characters > 255 are in or not in the class, so any that were explicitly
|
||||
given as well can be ignored.
|
||||
|
||||
In the UCP case, if certain negated POSIX classes (ex: [:^ascii:]) were
|
||||
were present in a class, we either have to match or not match all wide
|
||||
characters (depending on whether the whole class is or is not negated).
|
||||
This requirement is indicated by match_all_or_no_wide_chars being true.
|
||||
We do this by including an explicit range, which works in both cases.
|
||||
This applies only in UTF and 16-bit and 32-bit non-UTF modes, since there
|
||||
cannot be any wide characters in 8-bit non-UTF mode.
|
||||
|
||||
When there *are* properties in a positive UTF-8 or any 16-bit or 32_bit
|
||||
class where \S etc is present without PCRE2_UCP, causing an extended class
|
||||
to be compiled, we make sure that all characters > 255 are included by
|
||||
forcing match_all_or_no_wide_chars to be true.
|
||||
|
||||
If, when generating an xclass, there are no characters < 256, we can omit
|
||||
the bitmap in the actual compiled code. */
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS /* Defined for 16/32 bits, or 8-bit with Unicode */
|
||||
if ((xclass_props & XCLASS_REQUIRED) != 0)
|
||||
{
|
||||
PCRE2_UCHAR *previous = code;
|
||||
|
||||
if ((xclass_props & XCLASS_HAS_CHAR_LISTS) == 0)
|
||||
*class_uchardata++ = XCL_END; /* Marks the end of extra data */
|
||||
*code++ = OP_XCLASS;
|
||||
code += LINK_SIZE;
|
||||
*code = negate_class? XCL_NOT:0;
|
||||
if ((xclass_props & XCLASS_HAS_PROPS) != 0) *code |= XCL_HASPROP;
|
||||
|
||||
/* If the map is required, move up the extra data to make room for it;
|
||||
otherwise just move the code pointer to the end of the extra data. */
|
||||
|
||||
if ((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0 || has_bitmap != NULL)
|
||||
{
|
||||
if (negate_class)
|
||||
{
|
||||
uint32_t *classwords = cb->classbits.classwords;
|
||||
for (int i = 0; i < 8; i++) classwords[i] = ~classwords[i];
|
||||
}
|
||||
|
||||
if (has_bitmap == NULL)
|
||||
{
|
||||
*code++ |= XCL_MAP;
|
||||
(void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,
|
||||
CU2BYTES(class_uchardata - code));
|
||||
memcpy(code, classbits, 32);
|
||||
code = class_uchardata + (32 / sizeof(PCRE2_UCHAR));
|
||||
}
|
||||
else
|
||||
{
|
||||
code = class_uchardata;
|
||||
if ((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0)
|
||||
*has_bitmap = TRUE;
|
||||
}
|
||||
}
|
||||
else code = class_uchardata;
|
||||
|
||||
if ((xclass_props & XCLASS_HAS_CHAR_LISTS) != 0)
|
||||
{
|
||||
/* Char lists size is an even number, because all items are 16 or 32
|
||||
bit values. The character list data is always aligned to 32 bits. */
|
||||
size_t char_lists_size = cranges->char_lists_size;
|
||||
PCRE2_ASSERT((char_lists_size & 0x1) == 0 &&
|
||||
(cb->char_lists_size & 0x3) == 0);
|
||||
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
char_lists_size = CLIST_ALIGN_TO(char_lists_size, sizeof(uint32_t));
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
*lengthptr += 2 + LINK_SIZE;
|
||||
#else
|
||||
*lengthptr += 1 + LINK_SIZE;
|
||||
#endif
|
||||
|
||||
cb->char_lists_size += char_lists_size;
|
||||
|
||||
char_lists_size /= sizeof(PCRE2_UCHAR);
|
||||
|
||||
/* Storage space for character lists is included
|
||||
in the maximum pattern size. */
|
||||
if (*lengthptr > MAX_PATTERN_SIZE ||
|
||||
MAX_PATTERN_SIZE - *lengthptr < char_lists_size)
|
||||
{
|
||||
*errorcodeptr = ERR20; /* Pattern is too large */
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
uint8_t *data;
|
||||
|
||||
PCRE2_ASSERT(cranges->char_lists_types <= XCL_TYPE_MASK);
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
/* Encode as high / low bytes. */
|
||||
code[0] = (uint8_t)(XCL_LIST |
|
||||
(cranges->char_lists_types >> 8));
|
||||
code[1] = (uint8_t)cranges->char_lists_types;
|
||||
code += 2;
|
||||
#else
|
||||
*code++ = (PCRE2_UCHAR)(XCL_LIST | cranges->char_lists_types);
|
||||
#endif
|
||||
|
||||
/* Character lists are stored in backwards direction from
|
||||
byte code start. The non-dfa/dfa matchers can access these
|
||||
lists using the byte code start stored in match blocks.
|
||||
Each list is aligned to 32 bit with an optional unused
|
||||
16 bit value at the beginning of the character list. */
|
||||
|
||||
cb->char_lists_size += char_lists_size;
|
||||
data = (uint8_t*)cb->start_code - cb->char_lists_size;
|
||||
|
||||
memcpy(data, (uint8_t*)(cranges + 1) + cranges->char_lists_start,
|
||||
char_lists_size);
|
||||
|
||||
/* Since character lists total size is less than MAX_PATTERN_SIZE,
|
||||
their starting offset fits into a value which size is LINK_SIZE. */
|
||||
|
||||
char_lists_size = cb->char_lists_size;
|
||||
PUT(code, 0, (uint32_t)(char_lists_size >> 1));
|
||||
code += LINK_SIZE;
|
||||
|
||||
#if defined PCRE2_DEBUG || defined SUPPORT_VALGRIND
|
||||
if ((char_lists_size & 0x2) != 0)
|
||||
{
|
||||
/* In debug the unused 16 bit value is set
|
||||
to a fixed value and marked unused. */
|
||||
((uint16_t*)data)[-1] = 0x5555;
|
||||
#ifdef SUPPORT_VALGRIND
|
||||
VALGRIND_MAKE_MEM_NOACCESS(data - 2, 2);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
cb->char_lists_size =
|
||||
CLIST_ALIGN_TO(char_lists_size, sizeof(uint32_t));
|
||||
|
||||
cb->cx->memctl.free(cranges, cb->cx->memctl.memory_data);
|
||||
}
|
||||
}
|
||||
|
||||
/* Now fill in the complete length of the item */
|
||||
|
||||
PUT(previous, 1, (int)(code - previous));
|
||||
goto DONE; /* End of class handling */
|
||||
}
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
|
||||
/* If there are no characters > 255, or they are all to be included or
|
||||
excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the
|
||||
whole class was negated and whether there were negative specials such as \S
|
||||
(non-UCP) in the class. Then copy the 32-byte map into the code vector,
|
||||
negating it if necessary. */
|
||||
|
||||
if (negate_class)
|
||||
{
|
||||
uint32_t *classwords = cb->classbits.classwords;
|
||||
|
||||
for (int i = 0; i < 8; i++) classwords[i] = ~classwords[i];
|
||||
}
|
||||
|
||||
if ((SELECT_VALUE8(!utf, 0) || negate_class != should_flip_negation) &&
|
||||
cb->classbits.classwords[0] == ~(uint32_t)0)
|
||||
{
|
||||
const uint32_t *classwords = cb->classbits.classwords;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
if (classwords[i] != ~(uint32_t)0) break;
|
||||
|
||||
if (i == 8)
|
||||
{
|
||||
*code++ = OP_ALLANY;
|
||||
goto DONE; /* End of class handling */
|
||||
}
|
||||
}
|
||||
|
||||
*code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
|
||||
memcpy(code, classbits, 32);
|
||||
code += 32 / sizeof(PCRE2_UCHAR);
|
||||
|
||||
DONE:
|
||||
*pcode = code;
|
||||
return pptr - 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* ===================================================================*/
|
||||
/* Here follows a block of ECLASS-compiling functions. You may well want to
|
||||
read them from top to bottom; they are ordered from leafmost (at the top) to
|
||||
outermost parser (at the bottom of the file). */
|
||||
|
||||
/* This function folds one operand using the negation operator.
|
||||
The new, combined chunk of stack code is written out to *pop_info. */
|
||||
|
||||
static void
|
||||
fold_negation(eclass_op_info *pop_info, PCRE2_SIZE *lengthptr,
|
||||
BOOL preserve_classbits)
|
||||
{
|
||||
/* If the chunk of stack code is already composed of multiple ops, we won't
|
||||
descend in and try and propagate the negation down the tree. (That would lead
|
||||
to O(n^2) compile-time, which could be exploitable with a malicious regex -
|
||||
although maybe that's not really too much of a worry in a library that offers
|
||||
an exponential-time matching function!) */
|
||||
|
||||
if (pop_info->op_single_type == 0)
|
||||
{
|
||||
if (lengthptr != NULL)
|
||||
*lengthptr += 1;
|
||||
else
|
||||
pop_info->code_start[pop_info->length] = ECL_NOT;
|
||||
pop_info->length += 1;
|
||||
}
|
||||
|
||||
/* Otherwise, it's a nice single-op item, so we can easily fold in the negation
|
||||
without needing to produce an ECL_NOT. */
|
||||
|
||||
else if (pop_info->op_single_type == ECL_ANY ||
|
||||
pop_info->op_single_type == ECL_NONE)
|
||||
{
|
||||
pop_info->op_single_type = (pop_info->op_single_type == ECL_NONE)?
|
||||
ECL_ANY : ECL_NONE;
|
||||
if (lengthptr == NULL)
|
||||
*(pop_info->code_start) = pop_info->op_single_type;
|
||||
}
|
||||
else
|
||||
{
|
||||
PCRE2_ASSERT(pop_info->op_single_type == ECL_XCLASS &&
|
||||
pop_info->length >= 1 + LINK_SIZE + 1);
|
||||
if (lengthptr == NULL)
|
||||
pop_info->code_start[1 + LINK_SIZE] ^= XCL_NOT;
|
||||
}
|
||||
|
||||
if (!preserve_classbits)
|
||||
{
|
||||
for (int i = 0; i < 8; i++)
|
||||
pop_info->bits.classwords[i] = ~pop_info->bits.classwords[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* This function folds together two operands using a binary operator.
|
||||
The new, combined chunk of stack code is written out to *lhs_op_info. */
|
||||
|
||||
static void
|
||||
fold_binary(int op, eclass_op_info *lhs_op_info, eclass_op_info *rhs_op_info,
|
||||
PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
/* ECL_AND truth table:
|
||||
|
||||
LHS RHS RESULT
|
||||
----------------
|
||||
ANY * RHS
|
||||
* ANY LHS
|
||||
NONE * NONE
|
||||
* NONE NONE
|
||||
X Y X & Y
|
||||
*/
|
||||
|
||||
case ECL_AND:
|
||||
if (rhs_op_info->op_single_type == ECL_ANY)
|
||||
{
|
||||
/* no-op: drop the RHS */
|
||||
}
|
||||
else if (lhs_op_info->op_single_type == ECL_ANY)
|
||||
{
|
||||
/* no-op: drop the LHS, and memmove the RHS into its place */
|
||||
if (lengthptr == NULL)
|
||||
memmove(lhs_op_info->code_start, rhs_op_info->code_start,
|
||||
CU2BYTES(rhs_op_info->length));
|
||||
lhs_op_info->length = rhs_op_info->length;
|
||||
lhs_op_info->op_single_type = rhs_op_info->op_single_type;
|
||||
}
|
||||
else if (rhs_op_info->op_single_type == ECL_NONE)
|
||||
{
|
||||
/* the result is ECL_NONE: write into the LHS */
|
||||
if (lengthptr == NULL)
|
||||
lhs_op_info->code_start[0] = ECL_NONE;
|
||||
lhs_op_info->length = 1;
|
||||
lhs_op_info->op_single_type = ECL_NONE;
|
||||
}
|
||||
else if (lhs_op_info->op_single_type == ECL_NONE)
|
||||
{
|
||||
/* the result is ECL_NONE: drop the RHS */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */
|
||||
if (lengthptr != NULL)
|
||||
*lengthptr += 1;
|
||||
else
|
||||
{
|
||||
PCRE2_ASSERT(rhs_op_info->code_start ==
|
||||
lhs_op_info->code_start + lhs_op_info->length);
|
||||
rhs_op_info->code_start[rhs_op_info->length] = ECL_AND;
|
||||
}
|
||||
lhs_op_info->length += rhs_op_info->length + 1;
|
||||
lhs_op_info->op_single_type = 0;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
lhs_op_info->bits.classwords[i] &= rhs_op_info->bits.classwords[i];
|
||||
break;
|
||||
|
||||
/* ECL_OR truth table:
|
||||
|
||||
LHS RHS RESULT
|
||||
----------------
|
||||
ANY * ANY
|
||||
* ANY ANY
|
||||
NONE * RHS
|
||||
* NONE LHS
|
||||
X Y X | Y
|
||||
*/
|
||||
|
||||
case ECL_OR:
|
||||
if (rhs_op_info->op_single_type == ECL_NONE)
|
||||
{
|
||||
/* no-op: drop the RHS */
|
||||
}
|
||||
else if (lhs_op_info->op_single_type == ECL_NONE)
|
||||
{
|
||||
/* no-op: drop the LHS, and memmove the RHS into its place */
|
||||
if (lengthptr == NULL)
|
||||
memmove(lhs_op_info->code_start, rhs_op_info->code_start,
|
||||
CU2BYTES(rhs_op_info->length));
|
||||
lhs_op_info->length = rhs_op_info->length;
|
||||
lhs_op_info->op_single_type = rhs_op_info->op_single_type;
|
||||
}
|
||||
else if (rhs_op_info->op_single_type == ECL_ANY)
|
||||
{
|
||||
/* the result is ECL_ANY: write into the LHS */
|
||||
if (lengthptr == NULL)
|
||||
lhs_op_info->code_start[0] = ECL_ANY;
|
||||
lhs_op_info->length = 1;
|
||||
lhs_op_info->op_single_type = ECL_ANY;
|
||||
}
|
||||
else if (lhs_op_info->op_single_type == ECL_ANY)
|
||||
{
|
||||
/* the result is ECL_ANY: drop the RHS */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */
|
||||
if (lengthptr != NULL)
|
||||
*lengthptr += 1;
|
||||
else
|
||||
{
|
||||
PCRE2_ASSERT(rhs_op_info->code_start ==
|
||||
lhs_op_info->code_start + lhs_op_info->length);
|
||||
rhs_op_info->code_start[rhs_op_info->length] = ECL_OR;
|
||||
}
|
||||
lhs_op_info->length += rhs_op_info->length + 1;
|
||||
lhs_op_info->op_single_type = 0;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
lhs_op_info->bits.classwords[i] |= rhs_op_info->bits.classwords[i];
|
||||
break;
|
||||
|
||||
/* ECL_XOR truth table:
|
||||
|
||||
LHS RHS RESULT
|
||||
----------------
|
||||
ANY * !RHS
|
||||
* ANY !LHS
|
||||
NONE * RHS
|
||||
* NONE LHS
|
||||
X Y X ^ Y
|
||||
*/
|
||||
|
||||
case ECL_XOR:
|
||||
if (rhs_op_info->op_single_type == ECL_NONE)
|
||||
{
|
||||
/* no-op: drop the RHS */
|
||||
}
|
||||
else if (lhs_op_info->op_single_type == ECL_NONE)
|
||||
{
|
||||
/* no-op: drop the LHS, and memmove the RHS into its place */
|
||||
if (lengthptr == NULL)
|
||||
memmove(lhs_op_info->code_start, rhs_op_info->code_start,
|
||||
CU2BYTES(rhs_op_info->length));
|
||||
lhs_op_info->length = rhs_op_info->length;
|
||||
lhs_op_info->op_single_type = rhs_op_info->op_single_type;
|
||||
}
|
||||
else if (rhs_op_info->op_single_type == ECL_ANY)
|
||||
{
|
||||
/* the result is !LHS: fold in the negation, and drop the RHS */
|
||||
/* Preserve the classbits, because we promise to deal with them later. */
|
||||
fold_negation(lhs_op_info, lengthptr, TRUE);
|
||||
}
|
||||
else if (lhs_op_info->op_single_type == ECL_ANY)
|
||||
{
|
||||
/* the result is !RHS: drop the LHS, memmove the RHS into its place, and
|
||||
fold in the negation */
|
||||
if (lengthptr == NULL)
|
||||
memmove(lhs_op_info->code_start, rhs_op_info->code_start,
|
||||
CU2BYTES(rhs_op_info->length));
|
||||
lhs_op_info->length = rhs_op_info->length;
|
||||
lhs_op_info->op_single_type = rhs_op_info->op_single_type;
|
||||
|
||||
/* Preserve the classbits, because we promise to deal with them later. */
|
||||
fold_negation(lhs_op_info, lengthptr, TRUE);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */
|
||||
if (lengthptr != NULL)
|
||||
*lengthptr += 1;
|
||||
else
|
||||
{
|
||||
PCRE2_ASSERT(rhs_op_info->code_start ==
|
||||
lhs_op_info->code_start + lhs_op_info->length);
|
||||
rhs_op_info->code_start[rhs_op_info->length] = ECL_XOR;
|
||||
}
|
||||
lhs_op_info->length += rhs_op_info->length + 1;
|
||||
lhs_op_info->op_single_type = 0;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
lhs_op_info->bits.classwords[i] ^= rhs_op_info->bits.classwords[i];
|
||||
break;
|
||||
|
||||
default:
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
static BOOL
|
||||
compile_eclass_nested(eclass_context *context, BOOL negated,
|
||||
uint32_t **pptr, PCRE2_UCHAR **pcode,
|
||||
eclass_op_info *pop_info, PCRE2_SIZE *lengthptr);
|
||||
|
||||
/* This function consumes a group of implicitly-unioned class elements.
|
||||
These can be characters, ranges, properties, or nested classes, as long
|
||||
as they are all joined by being placed adjacently. */
|
||||
|
||||
static BOOL
|
||||
compile_class_operand(eclass_context *context, BOOL negated,
|
||||
uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
|
||||
PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
uint32_t *ptr = *pptr;
|
||||
uint32_t *prev_ptr;
|
||||
PCRE2_UCHAR *code = *pcode;
|
||||
PCRE2_UCHAR *code_start = code;
|
||||
PCRE2_SIZE prev_length = (lengthptr != NULL)? *lengthptr : 0;
|
||||
PCRE2_SIZE extra_length;
|
||||
uint32_t meta = META_CODE(*ptr);
|
||||
|
||||
switch (meta)
|
||||
{
|
||||
case META_CLASS_EMPTY_NOT:
|
||||
case META_CLASS_EMPTY:
|
||||
++ptr;
|
||||
pop_info->length = 1;
|
||||
if ((meta == META_CLASS_EMPTY) == negated)
|
||||
{
|
||||
*code++ = pop_info->op_single_type = ECL_ANY;
|
||||
memset(pop_info->bits.classbits, 0xff, 32);
|
||||
}
|
||||
else
|
||||
{
|
||||
*code++ = pop_info->op_single_type = ECL_NONE;
|
||||
memset(pop_info->bits.classbits, 0, 32);
|
||||
}
|
||||
break;
|
||||
|
||||
case META_CLASS:
|
||||
case META_CLASS_NOT:
|
||||
if ((*ptr & CLASS_IS_ECLASS) != 0)
|
||||
{
|
||||
if (!compile_eclass_nested(context, negated, &ptr, &code,
|
||||
pop_info, lengthptr))
|
||||
return FALSE;
|
||||
|
||||
PCRE2_ASSERT(*ptr == META_CLASS_END);
|
||||
ptr++;
|
||||
goto DONE;
|
||||
}
|
||||
|
||||
ptr++;
|
||||
/* Fall through */
|
||||
|
||||
default:
|
||||
/* Scan forward characters, ranges, and properties.
|
||||
For example: inside [a-z_ -- m] we don't have brackets around "a-z_" but
|
||||
we still need to collect that fragment up into a "leaf" OP_CLASS. */
|
||||
|
||||
prev_ptr = ptr;
|
||||
ptr = PRIV(compile_class_not_nested)(
|
||||
context->options, context->xoptions, ptr, &code,
|
||||
(meta != META_CLASS_NOT) == negated, &context->needs_bitmap,
|
||||
context->errorcodeptr, context->cb, lengthptr);
|
||||
if (ptr == NULL) return FALSE;
|
||||
|
||||
/* We must have a 100% guarantee that ptr increases when
|
||||
compile_class_operand() returns, even on Release builds, so that we can
|
||||
statically prove our loops terminate. */
|
||||
if (ptr <= prev_ptr)
|
||||
{
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* If we fell through above, consume the closing ']'. */
|
||||
if (meta == META_CLASS || meta == META_CLASS_NOT)
|
||||
{
|
||||
PCRE2_ASSERT(*ptr == META_CLASS_END);
|
||||
ptr++;
|
||||
}
|
||||
|
||||
/* Regardless of whether (lengthptr == NULL), some data will still be written
|
||||
out to *pcode, which we need: we have to peek at it, to transform the opcode
|
||||
into the ECLASS version (since we need to hoist up the bitmaps). */
|
||||
PCRE2_ASSERT(code > code_start);
|
||||
extra_length = (lengthptr != NULL)? *lengthptr - prev_length : 0;
|
||||
|
||||
/* Easiest case: convert OP_ALLANY to ECL_ANY */
|
||||
|
||||
if (*code_start == OP_ALLANY)
|
||||
{
|
||||
PCRE2_ASSERT(code - code_start == 1 && extra_length == 0);
|
||||
pop_info->length = 1;
|
||||
*code_start = pop_info->op_single_type = ECL_ANY;
|
||||
memset(pop_info->bits.classbits, 0xff, 32);
|
||||
}
|
||||
|
||||
/* For OP_CLASS and OP_NCLASS, we hoist out the bitmap and convert to
|
||||
ECL_NONE / ECL_ANY respectively. */
|
||||
|
||||
else if (*code_start == OP_CLASS || *code_start == OP_NCLASS)
|
||||
{
|
||||
PCRE2_ASSERT(code - code_start == 1 + 32 / sizeof(PCRE2_UCHAR) &&
|
||||
extra_length == 0);
|
||||
pop_info->length = 1;
|
||||
*code_start = pop_info->op_single_type =
|
||||
(*code_start == OP_CLASS)? ECL_NONE : ECL_ANY;
|
||||
memcpy(pop_info->bits.classbits, code_start + 1, 32);
|
||||
/* Rewind the code pointer, but make sure we adjust *lengthptr, because we
|
||||
do need to reserve that space (even though we only use it temporarily). */
|
||||
if (lengthptr != NULL)
|
||||
*lengthptr += code - (code_start + 1);
|
||||
code = code_start + 1;
|
||||
|
||||
if (!context->needs_bitmap && *code_start == ECL_NONE)
|
||||
{
|
||||
uint32_t *classwords = pop_info->bits.classwords;
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
if (classwords[i] != 0)
|
||||
{
|
||||
context->needs_bitmap = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
context->needs_bitmap = TRUE;
|
||||
}
|
||||
|
||||
/* Finally, for OP_XCLASS we hoist out the bitmap (if any), and convert to
|
||||
ECL_XCLASS. */
|
||||
|
||||
else
|
||||
{
|
||||
PCRE2_ASSERT(*code_start == OP_XCLASS);
|
||||
*code_start = pop_info->op_single_type = ECL_XCLASS;
|
||||
|
||||
PCRE2_ASSERT(code - code_start >= 1 + LINK_SIZE + 1);
|
||||
|
||||
memcpy(pop_info->bits.classbits, context->cb->classbits.classbits, 32);
|
||||
pop_info->length = (code - code_start) + extra_length;
|
||||
}
|
||||
|
||||
break;
|
||||
} /* End of switch(meta) */
|
||||
|
||||
pop_info->code_start = (lengthptr == NULL)? code_start : NULL;
|
||||
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
*lengthptr += code - code_start;
|
||||
code = code_start;
|
||||
}
|
||||
|
||||
DONE:
|
||||
PCRE2_ASSERT(lengthptr == NULL || (code == code_start));
|
||||
|
||||
*pptr = ptr;
|
||||
*pcode = code;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* This function consumes a group of implicitly-unioned class elements.
|
||||
These can be characters, ranges, properties, or nested classes, as long
|
||||
as they are all joined by being placed adjacently. */
|
||||
|
||||
static BOOL
|
||||
compile_class_juxtaposition(eclass_context *context, BOOL negated,
|
||||
uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
|
||||
PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
uint32_t *ptr = *pptr;
|
||||
PCRE2_UCHAR *code = *pcode;
|
||||
#ifdef PCRE2_DEBUG
|
||||
PCRE2_UCHAR *start_code = *pcode;
|
||||
#endif
|
||||
|
||||
/* See compile_class_binary_loose() for comments on compile-time folding of
|
||||
the "negated" flag. */
|
||||
|
||||
/* Because it's a non-empty class, there must be an operand at the start. */
|
||||
if (!compile_class_operand(context, negated, &ptr, &code, pop_info, lengthptr))
|
||||
return FALSE;
|
||||
|
||||
while (*ptr != META_CLASS_END &&
|
||||
!(*ptr >= META_ECLASS_AND && *ptr <= META_ECLASS_NOT))
|
||||
{
|
||||
uint32_t op;
|
||||
BOOL rhs_negated;
|
||||
eclass_op_info rhs_op_info;
|
||||
|
||||
if (negated)
|
||||
{
|
||||
/* !(A juxtapose B) -> !A && !B */
|
||||
op = ECL_AND;
|
||||
rhs_negated = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* A juxtapose B -> A || B */
|
||||
op = ECL_OR;
|
||||
rhs_negated = FALSE;
|
||||
}
|
||||
|
||||
/* An operand must follow the operator. */
|
||||
if (!compile_class_operand(context, rhs_negated, &ptr, &code,
|
||||
&rhs_op_info, lengthptr))
|
||||
return FALSE;
|
||||
|
||||
/* Convert infix to postfix (RPN). */
|
||||
fold_binary(op, pop_info, &rhs_op_info, lengthptr);
|
||||
if (lengthptr == NULL)
|
||||
code = pop_info->code_start + pop_info->length;
|
||||
}
|
||||
|
||||
PCRE2_ASSERT(lengthptr == NULL || code == start_code);
|
||||
|
||||
*pptr = ptr;
|
||||
*pcode = code;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* This function consumes unary prefix operators. */
|
||||
|
||||
static BOOL
|
||||
compile_class_unary(eclass_context *context, BOOL negated,
|
||||
uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
|
||||
PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
uint32_t *ptr = *pptr;
|
||||
#ifdef PCRE2_DEBUG
|
||||
PCRE2_UCHAR *start_code = *pcode;
|
||||
#endif
|
||||
|
||||
while (*ptr == META_ECLASS_NOT)
|
||||
{
|
||||
++ptr;
|
||||
negated = !negated;
|
||||
}
|
||||
|
||||
*pptr = ptr;
|
||||
/* Because it's a non-empty class, there must be an operand. */
|
||||
if (!compile_class_juxtaposition(context, negated, pptr, pcode,
|
||||
pop_info, lengthptr))
|
||||
return FALSE;
|
||||
|
||||
PCRE2_ASSERT(lengthptr == NULL || *pcode == start_code);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* This function consumes tightly-binding binary operators. */
|
||||
|
||||
static BOOL
|
||||
compile_class_binary_tight(eclass_context *context, BOOL negated,
|
||||
uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
|
||||
PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
uint32_t *ptr = *pptr;
|
||||
PCRE2_UCHAR *code = *pcode;
|
||||
#ifdef PCRE2_DEBUG
|
||||
PCRE2_UCHAR *start_code = *pcode;
|
||||
#endif
|
||||
|
||||
/* See compile_class_binary_loose() for comments on compile-time folding of
|
||||
the "negated" flag. */
|
||||
|
||||
/* Because it's a non-empty class, there must be an operand at the start. */
|
||||
if (!compile_class_unary(context, negated, &ptr, &code, pop_info, lengthptr))
|
||||
return FALSE;
|
||||
|
||||
while (*ptr == META_ECLASS_AND)
|
||||
{
|
||||
uint32_t op;
|
||||
BOOL rhs_negated;
|
||||
eclass_op_info rhs_op_info;
|
||||
|
||||
if (negated)
|
||||
{
|
||||
/* !(A && B) -> !A || !B */
|
||||
op = ECL_OR;
|
||||
rhs_negated = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* A && B -> A && B */
|
||||
op = ECL_AND;
|
||||
rhs_negated = FALSE;
|
||||
}
|
||||
|
||||
++ptr;
|
||||
|
||||
/* An operand must follow the operator. */
|
||||
if (!compile_class_unary(context, rhs_negated, &ptr, &code,
|
||||
&rhs_op_info, lengthptr))
|
||||
return FALSE;
|
||||
|
||||
/* Convert infix to postfix (RPN). */
|
||||
fold_binary(op, pop_info, &rhs_op_info, lengthptr);
|
||||
if (lengthptr == NULL)
|
||||
code = pop_info->code_start + pop_info->length;
|
||||
}
|
||||
|
||||
PCRE2_ASSERT(lengthptr == NULL || code == start_code);
|
||||
|
||||
*pptr = ptr;
|
||||
*pcode = code;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* This function consumes loosely-binding binary operators. */
|
||||
|
||||
static BOOL
|
||||
compile_class_binary_loose(eclass_context *context, BOOL negated,
|
||||
uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
|
||||
PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
uint32_t *ptr = *pptr;
|
||||
PCRE2_UCHAR *code = *pcode;
|
||||
#ifdef PCRE2_DEBUG
|
||||
PCRE2_UCHAR *start_code = *pcode;
|
||||
#endif
|
||||
|
||||
/* We really want to fold the negation operator, if at all possible, so that
|
||||
simple cases can be reduced down. In particular, in 8-bit no-UTF mode, we want
|
||||
to produce a fully-folded expression, so that we can guarantee not to emit any
|
||||
OP_ECLASS codes (in the same way that we never emit OP_XCLASS in this mode).
|
||||
|
||||
This has the consequence that with a little ingenuity, we can in fact avoid
|
||||
emitting (nearly...) all cases of the "NOT" operator. Imagine that we have:
|
||||
!(A ...
|
||||
We have parsed the preceding "!", and we are about to parse the "A" operand. We
|
||||
don't know yet whether there will even be a following binary operand! Both of
|
||||
these are possibilities for what follows:
|
||||
!(A && B)
|
||||
!(A)
|
||||
However, we can still fold the "!" into the "A" operand, because no matter what
|
||||
the following binary operator will be, we can produce an expression which is
|
||||
equivalent. */
|
||||
|
||||
/* Because it's a non-empty class, there must be an operand at the start. */
|
||||
if (!compile_class_binary_tight(context, negated, &ptr, &code,
|
||||
pop_info, lengthptr))
|
||||
return FALSE;
|
||||
|
||||
while (*ptr >= META_ECLASS_OR && *ptr <= META_ECLASS_XOR)
|
||||
{
|
||||
uint32_t op;
|
||||
BOOL op_neg;
|
||||
BOOL rhs_negated;
|
||||
eclass_op_info rhs_op_info;
|
||||
|
||||
if (negated)
|
||||
{
|
||||
/* The whole expression is being negated; we respond by unconditionally
|
||||
negating the LHS A, before seeing what follows. And hooray! We can recover,
|
||||
no matter what follows. */
|
||||
/* !(A || B) -> !A && !B */
|
||||
/* !(A -- B) -> !(A && !B) -> !A || B */
|
||||
/* !(A XOR B) -> !(!A XOR !B) -> !A XNOR !B */
|
||||
op = (*ptr == META_ECLASS_OR )? ECL_AND :
|
||||
(*ptr == META_ECLASS_SUB)? ECL_OR :
|
||||
/*ptr == META_ECLASS_XOR*/ ECL_XOR;
|
||||
op_neg = (*ptr == META_ECLASS_XOR);
|
||||
rhs_negated = *ptr != META_ECLASS_SUB;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* A || B -> A || B */
|
||||
/* A -- B -> A && !B */
|
||||
/* A XOR B -> A XOR B */
|
||||
op = (*ptr == META_ECLASS_OR )? ECL_OR :
|
||||
(*ptr == META_ECLASS_SUB)? ECL_AND :
|
||||
/*ptr == META_ECLASS_XOR*/ ECL_XOR;
|
||||
op_neg = FALSE;
|
||||
rhs_negated = *ptr == META_ECLASS_SUB;
|
||||
}
|
||||
|
||||
++ptr;
|
||||
|
||||
/* An operand must follow the operator. */
|
||||
if (!compile_class_binary_tight(context, rhs_negated, &ptr, &code,
|
||||
&rhs_op_info, lengthptr))
|
||||
return FALSE;
|
||||
|
||||
/* Convert infix to postfix (RPN). */
|
||||
fold_binary(op, pop_info, &rhs_op_info, lengthptr);
|
||||
if (op_neg) fold_negation(pop_info, lengthptr, FALSE);
|
||||
if (lengthptr == NULL)
|
||||
code = pop_info->code_start + pop_info->length;
|
||||
}
|
||||
|
||||
PCRE2_ASSERT(lengthptr == NULL || code == start_code);
|
||||
|
||||
*pptr = ptr;
|
||||
*pcode = code;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* This function converts the META codes in pptr into opcodes written to
|
||||
pcode. The pptr must start at a META_CLASS or META_CLASS_NOT.
|
||||
|
||||
The class is compiled as a left-associative sequence of operator
|
||||
applications.
|
||||
|
||||
The pptr will be left pointing at the matching META_CLASS_END. */
|
||||
|
||||
static BOOL
|
||||
compile_eclass_nested(eclass_context *context, BOOL negated,
|
||||
uint32_t **pptr, PCRE2_UCHAR **pcode,
|
||||
eclass_op_info *pop_info, PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
uint32_t *ptr = *pptr;
|
||||
#ifdef PCRE2_DEBUG
|
||||
PCRE2_UCHAR *start_code = *pcode;
|
||||
#endif
|
||||
|
||||
/* The CLASS_IS_ECLASS bit must be set since it is a nested class. */
|
||||
PCRE2_ASSERT(*ptr == (META_CLASS | CLASS_IS_ECLASS) ||
|
||||
*ptr == (META_CLASS_NOT | CLASS_IS_ECLASS));
|
||||
|
||||
if (*ptr++ == (META_CLASS_NOT | CLASS_IS_ECLASS))
|
||||
negated = !negated;
|
||||
|
||||
(*pptr)++;
|
||||
|
||||
/* Because it's a non-empty class, there must be an operand at the start. */
|
||||
if (!compile_class_binary_loose(context, negated, pptr, pcode,
|
||||
pop_info, lengthptr))
|
||||
return FALSE;
|
||||
|
||||
PCRE2_ASSERT(**pptr == META_CLASS_END);
|
||||
PCRE2_ASSERT(lengthptr == NULL || *pcode == start_code);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
BOOL
|
||||
PRIV(compile_class_nested)(uint32_t options, uint32_t xoptions,
|
||||
uint32_t **pptr, PCRE2_UCHAR **pcode, int *errorcodeptr,
|
||||
compile_block *cb, PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
eclass_context context;
|
||||
eclass_op_info op_info;
|
||||
PCRE2_SIZE previous_length = (lengthptr != NULL)? *lengthptr : 0;
|
||||
PCRE2_UCHAR *code = *pcode;
|
||||
PCRE2_UCHAR *previous;
|
||||
BOOL allbitsone = TRUE;
|
||||
|
||||
context.needs_bitmap = FALSE;
|
||||
context.options = options;
|
||||
context.xoptions = xoptions;
|
||||
context.errorcodeptr = errorcodeptr;
|
||||
context.cb = cb;
|
||||
|
||||
previous = code;
|
||||
*code++ = OP_ECLASS;
|
||||
code += LINK_SIZE;
|
||||
*code++ = 0; /* Flags, currently zero. */
|
||||
if (!compile_eclass_nested(&context, FALSE, pptr, &code, &op_info, lengthptr))
|
||||
return FALSE;
|
||||
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
*lengthptr += code - previous;
|
||||
code = previous;
|
||||
/* (*lengthptr - previous_length) now holds the amount of buffer that
|
||||
we require to make the call to compile_class_nested() with
|
||||
lengthptr = NULL, and including the (1+LINK_SIZE+1) that we write out
|
||||
before that call. */
|
||||
}
|
||||
|
||||
/* Do some useful counting of what's in the bitmap. */
|
||||
for (int i = 0; i < 8; i++)
|
||||
if (op_info.bits.classwords[i] != 0xffffffff)
|
||||
{
|
||||
allbitsone = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
/* After constant-folding the extended class syntax, it may turn out to be
|
||||
a simple class after all. In that case, we can unwrap it from the
|
||||
OP_ECLASS container - and in fact, we must do so, because in 8-bit
|
||||
no-Unicode mode the matcher is compiled without support for OP_ECLASS. */
|
||||
|
||||
#ifndef SUPPORT_WIDE_CHARS
|
||||
PCRE2_ASSERT(op_info.op_single_type != 0);
|
||||
#else
|
||||
if (op_info.op_single_type != 0)
|
||||
#endif
|
||||
{
|
||||
/* Rewind back over the OP_ECLASS. */
|
||||
code = previous;
|
||||
|
||||
/* If the bits are all ones, and the "high characters" are all matched
|
||||
too, we use a special-cased encoding of OP_ALLANY. */
|
||||
|
||||
if (op_info.op_single_type == ECL_ANY && allbitsone)
|
||||
{
|
||||
/* Advancing code means rewinding lengthptr, at this point. */
|
||||
if (lengthptr != NULL) *lengthptr -= 1;
|
||||
*code++ = OP_ALLANY;
|
||||
}
|
||||
|
||||
/* If the high bits are all matched / all not-matched, then we emit an
|
||||
OP_NCLASS/OP_CLASS respectively. */
|
||||
|
||||
else if (op_info.op_single_type == ECL_ANY ||
|
||||
op_info.op_single_type == ECL_NONE)
|
||||
{
|
||||
PCRE2_SIZE required_len = 1 + (32 / sizeof(PCRE2_UCHAR));
|
||||
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
if (required_len > (*lengthptr - previous_length))
|
||||
*lengthptr = previous_length + required_len;
|
||||
}
|
||||
|
||||
/* Advancing code means rewinding lengthptr, at this point. */
|
||||
if (lengthptr != NULL) *lengthptr -= required_len;
|
||||
*code++ = (op_info.op_single_type == ECL_ANY)? OP_NCLASS : OP_CLASS;
|
||||
memcpy(code, op_info.bits.classbits, 32);
|
||||
code += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
|
||||
/* Otherwise, we have an ECL_XCLASS, so we have the OP_XCLASS data
|
||||
there, but, we pulled out its bitmap into op_info, so now we have to
|
||||
put that back into the OP_XCLASS. */
|
||||
|
||||
else
|
||||
{
|
||||
#ifndef SUPPORT_WIDE_CHARS
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
#else
|
||||
BOOL need_map = context.needs_bitmap;
|
||||
PCRE2_SIZE required_len;
|
||||
|
||||
PCRE2_ASSERT(op_info.op_single_type == ECL_XCLASS);
|
||||
required_len = op_info.length + (need_map? 32/sizeof(PCRE2_UCHAR) : 0);
|
||||
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
/* Don't unconditionally request all the space we need - we may
|
||||
already have asked for more during processing of the ECLASS. */
|
||||
if (required_len > (*lengthptr - previous_length))
|
||||
*lengthptr = previous_length + required_len;
|
||||
|
||||
/* The code we write out here won't be ignored, even during the
|
||||
(lengthptr != NULL) phase, because if there's a following quantifier
|
||||
it will peek backwards. So we do have to write out a (truncated)
|
||||
OP_XCLASS, even on this branch. */
|
||||
*lengthptr -= 1 + LINK_SIZE + 1;
|
||||
*code++ = OP_XCLASS;
|
||||
PUT(code, 0, 1 + LINK_SIZE + 1);
|
||||
code += LINK_SIZE;
|
||||
*code++ = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
PCRE2_UCHAR *rest;
|
||||
PCRE2_SIZE rest_len;
|
||||
PCRE2_UCHAR flags;
|
||||
|
||||
/* 1 unit: OP_XCLASS | LINK_SIZE units | 1 unit: flags | ...rest */
|
||||
PCRE2_ASSERT(op_info.length >= 1 + LINK_SIZE + 1);
|
||||
rest = op_info.code_start + 1 + LINK_SIZE + 1;
|
||||
rest_len = (op_info.code_start + op_info.length) - rest;
|
||||
|
||||
/* First read any data we use, before memmove splats it. */
|
||||
flags = op_info.code_start[1 + LINK_SIZE];
|
||||
PCRE2_ASSERT((flags & XCL_MAP) == 0);
|
||||
|
||||
/* Next do the memmove before any writes. */
|
||||
memmove(code + 1 + LINK_SIZE + 1 + (need_map? 32/sizeof(PCRE2_UCHAR) : 0),
|
||||
rest, CU2BYTES(rest_len));
|
||||
|
||||
/* Finally write the header data. */
|
||||
*code++ = OP_XCLASS;
|
||||
PUT(code, 0, (int)required_len);
|
||||
code += LINK_SIZE;
|
||||
*code++ = flags | (need_map? XCL_MAP : 0);
|
||||
if (need_map)
|
||||
{
|
||||
memcpy(code, op_info.bits.classbits, 32);
|
||||
code += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
code += rest_len;
|
||||
}
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
}
|
||||
}
|
||||
|
||||
/* Otherwise, we're going to keep the OP_ECLASS. However, again we need
|
||||
to do some adjustment to insert the bitmap if we have one. */
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
else
|
||||
{
|
||||
BOOL need_map = context.needs_bitmap;
|
||||
PCRE2_SIZE required_len =
|
||||
1 + LINK_SIZE + 1 + (need_map? 32/sizeof(PCRE2_UCHAR) : 0) + op_info.length;
|
||||
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
if (required_len > (*lengthptr - previous_length))
|
||||
*lengthptr = previous_length + required_len;
|
||||
|
||||
/* As for the XCLASS branch above, we do have to write out a dummy
|
||||
OP_ECLASS, because of the backwards peek by the quantifier code. Write
|
||||
out a (truncated) OP_ECLASS, even on this branch. */
|
||||
*lengthptr -= 1 + LINK_SIZE + 1;
|
||||
*code++ = OP_ECLASS;
|
||||
PUT(code, 0, 1 + LINK_SIZE + 1);
|
||||
code += LINK_SIZE;
|
||||
*code++ = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (need_map)
|
||||
{
|
||||
PCRE2_UCHAR *map_start = previous + 1 + LINK_SIZE + 1;
|
||||
previous[1 + LINK_SIZE] |= ECL_MAP;
|
||||
memmove(map_start + 32/sizeof(PCRE2_UCHAR), map_start,
|
||||
CU2BYTES(code - map_start));
|
||||
memcpy(map_start, op_info.bits.classbits, 32);
|
||||
code += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
PUT(previous, 1, (int)(code - previous));
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
|
||||
*pcode = code;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* End of pcre2_compile_class.c */
|
||||
@@ -1,252 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes
|
||||
its value gets changed by pcre2_intmodedep.h (included by pcre2_internal.h) to
|
||||
be in code units. */
|
||||
|
||||
static int configured_link_size = LINK_SIZE;
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
/* These macros are the standard way of turning unquoted text into C strings.
|
||||
They allow macros like PCRE2_MAJOR to be defined without quotes, which is
|
||||
convenient for user programs that want to test their values. */
|
||||
|
||||
#define STRING(a) # a
|
||||
#define XSTRING(s) STRING(s)
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return info about what features are configured *
|
||||
*************************************************/
|
||||
|
||||
/* If where is NULL, the length of memory required is returned.
|
||||
|
||||
Arguments:
|
||||
what what information is required
|
||||
where where to put the information
|
||||
|
||||
Returns: 0 if a numerical value is returned
|
||||
>= 0 if a string value
|
||||
PCRE2_ERROR_BADOPTION if "where" not recognized
|
||||
or JIT target requested when JIT not enabled
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_config(uint32_t what, void *where)
|
||||
{
|
||||
if (where == NULL) /* Requests a length */
|
||||
{
|
||||
switch(what)
|
||||
{
|
||||
default:
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
case PCRE2_CONFIG_BSR:
|
||||
case PCRE2_CONFIG_COMPILED_WIDTHS:
|
||||
case PCRE2_CONFIG_DEPTHLIMIT:
|
||||
case PCRE2_CONFIG_HEAPLIMIT:
|
||||
case PCRE2_CONFIG_JIT:
|
||||
case PCRE2_CONFIG_LINKSIZE:
|
||||
case PCRE2_CONFIG_MATCHLIMIT:
|
||||
case PCRE2_CONFIG_NEVER_BACKSLASH_C:
|
||||
case PCRE2_CONFIG_NEWLINE:
|
||||
case PCRE2_CONFIG_PARENSLIMIT:
|
||||
case PCRE2_CONFIG_STACKRECURSE: /* Obsolete */
|
||||
case PCRE2_CONFIG_TABLES_LENGTH:
|
||||
case PCRE2_CONFIG_UNICODE:
|
||||
return sizeof(uint32_t);
|
||||
|
||||
/* These are handled below */
|
||||
|
||||
case PCRE2_CONFIG_JITTARGET:
|
||||
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||
case PCRE2_CONFIG_VERSION:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
switch (what)
|
||||
{
|
||||
default:
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
case PCRE2_CONFIG_BSR:
|
||||
#ifdef BSR_ANYCRLF
|
||||
*((uint32_t *)where) = PCRE2_BSR_ANYCRLF;
|
||||
#else
|
||||
*((uint32_t *)where) = PCRE2_BSR_UNICODE;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_COMPILED_WIDTHS:
|
||||
*((uint32_t *)where) = 0
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
+ 1
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
+ 2
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
+ 4
|
||||
#endif
|
||||
;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_DEPTHLIMIT:
|
||||
*((uint32_t *)where) = MATCH_LIMIT_DEPTH;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_HEAPLIMIT:
|
||||
*((uint32_t *)where) = HEAP_LIMIT;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_JIT:
|
||||
#ifdef SUPPORT_JIT
|
||||
*((uint32_t *)where) = 1;
|
||||
#else
|
||||
*((uint32_t *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_JITTARGET:
|
||||
#ifdef SUPPORT_JIT
|
||||
{
|
||||
const char *v = PRIV(jit_get_target)();
|
||||
return (int)(1 + ((where == NULL)?
|
||||
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
|
||||
}
|
||||
#else
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
#endif
|
||||
|
||||
case PCRE2_CONFIG_LINKSIZE:
|
||||
*((uint32_t *)where) = (uint32_t)configured_link_size;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_MATCHLIMIT:
|
||||
*((uint32_t *)where) = MATCH_LIMIT;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_NEWLINE:
|
||||
*((uint32_t *)where) = NEWLINE_DEFAULT;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_NEVER_BACKSLASH_C:
|
||||
#ifdef NEVER_BACKSLASH_C
|
||||
*((uint32_t *)where) = 1;
|
||||
#else
|
||||
*((uint32_t *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_PARENSLIMIT:
|
||||
*((uint32_t *)where) = PARENS_NEST_LIMIT;
|
||||
break;
|
||||
|
||||
/* This is now obsolete. The stack is no longer used via recursion for
|
||||
handling backtracking in pcre2_match(). */
|
||||
|
||||
case PCRE2_CONFIG_STACKRECURSE:
|
||||
*((uint32_t *)where) = 0;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_TABLES_LENGTH:
|
||||
*((uint32_t *)where) = TABLES_LENGTH;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||
{
|
||||
#if defined SUPPORT_UNICODE
|
||||
const char *v = PRIV(unicode_version);
|
||||
#else
|
||||
const char *v = "Unicode not supported";
|
||||
#endif
|
||||
return (int)(1 + ((where == NULL)?
|
||||
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
|
||||
}
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_UNICODE:
|
||||
#if defined SUPPORT_UNICODE
|
||||
*((uint32_t *)where) = 1;
|
||||
#else
|
||||
*((uint32_t *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
/* The hackery in setting "v" below is to cope with the case when
|
||||
PCRE2_PRERELEASE is set to an empty string (which it is for real releases).
|
||||
If the second alternative is used in this case, it does not leave a space
|
||||
before the date. On the other hand, if all four macros are put into a single
|
||||
XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted.
|
||||
There are problems using an "obvious" approach like this:
|
||||
|
||||
XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE2_MINOR)
|
||||
XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE2_DATE)
|
||||
|
||||
because, when PCRE2_PRERELEASE is empty, this leads to an attempted expansion
|
||||
of STRING(). The C standard states: "If (before argument substitution) any
|
||||
argument consists of no preprocessing tokens, the behavior is undefined." It
|
||||
turns out the gcc treats this case as a single empty string - which is what
|
||||
we really want - but Visual C grumbles about the lack of an argument for the
|
||||
macro. Unfortunately, both are within their rights. As there seems to be no
|
||||
way to test for a macro's value being empty at compile time, we have to
|
||||
resort to a runtime test. */
|
||||
|
||||
case PCRE2_CONFIG_VERSION:
|
||||
{
|
||||
const char *v = (XSTRING(Z PCRE2_PRERELEASE)[1] == 0)?
|
||||
XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
|
||||
XSTRING(PCRE2_MAJOR.PCRE2_MINOR) XSTRING(PCRE2_PRERELEASE PCRE2_DATE);
|
||||
return (int)(1 + ((where == NULL)?
|
||||
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre2_config.c */
|
||||
@@ -1,556 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Default malloc/free functions *
|
||||
*************************************************/
|
||||
|
||||
/* Ignore the "user data" argument in each case. */
|
||||
|
||||
static void *default_malloc(size_t size, void *data)
|
||||
{
|
||||
(void)data;
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
|
||||
static void default_free(void *block, void *data)
|
||||
{
|
||||
(void)data;
|
||||
free(block);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get a block and save memory control *
|
||||
*************************************************/
|
||||
|
||||
/* This internal function is called to get a block of memory in which the
|
||||
memory control data is to be stored at the start for future use.
|
||||
|
||||
Arguments:
|
||||
size amount of memory required
|
||||
memctl pointer to a memctl block or NULL
|
||||
|
||||
Returns: pointer to memory or NULL on failure
|
||||
*/
|
||||
|
||||
extern void *
|
||||
PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl)
|
||||
{
|
||||
pcre2_memctl *newmemctl;
|
||||
void *yield = (memctl == NULL)? malloc(size) :
|
||||
memctl->malloc(size, memctl->memory_data);
|
||||
if (yield == NULL) return NULL;
|
||||
newmemctl = (pcre2_memctl *)yield;
|
||||
if (memctl == NULL)
|
||||
{
|
||||
newmemctl->malloc = default_malloc;
|
||||
newmemctl->free = default_free;
|
||||
newmemctl->memory_data = NULL;
|
||||
}
|
||||
else *newmemctl = *memctl;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create and initialize contexts *
|
||||
*************************************************/
|
||||
|
||||
/* Initializing for compile and match contexts is done in separate, private
|
||||
functions so that these can be called from functions such as pcre2_compile()
|
||||
when an external context is not supplied. The initializing functions have an
|
||||
option to set up default memory management. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_general_context_create(void *(*private_malloc)(size_t, void *),
|
||||
void (*private_free)(void *, void *), void *memory_data)
|
||||
{
|
||||
pcre2_general_context *gcontext;
|
||||
if (private_malloc == NULL) private_malloc = default_malloc;
|
||||
if (private_free == NULL) private_free = default_free;
|
||||
gcontext = private_malloc(sizeof(pcre2_real_general_context), memory_data);
|
||||
if (gcontext == NULL) return NULL;
|
||||
gcontext->memctl.malloc = private_malloc;
|
||||
gcontext->memctl.free = private_free;
|
||||
gcontext->memctl.memory_data = memory_data;
|
||||
return gcontext;
|
||||
}
|
||||
|
||||
|
||||
/* A default compile context is set up to save having to initialize at run time
|
||||
when no context is supplied to the compile function. */
|
||||
|
||||
pcre2_compile_context PRIV(default_compile_context) = {
|
||||
{ default_malloc, default_free, NULL }, /* Default memory handling */
|
||||
NULL, /* Stack guard */
|
||||
NULL, /* Stack guard data */
|
||||
PRIV(default_tables), /* Character tables */
|
||||
PCRE2_UNSET, /* Max pattern length */
|
||||
PCRE2_UNSET, /* Max pattern compiled length */
|
||||
BSR_DEFAULT, /* Backslash R default */
|
||||
NEWLINE_DEFAULT, /* Newline convention */
|
||||
PARENS_NEST_LIMIT, /* As it says */
|
||||
0, /* Extra options */
|
||||
MAX_VARLOOKBEHIND, /* As it says */
|
||||
PCRE2_OPTIMIZATION_ALL /* All optimizations enabled */
|
||||
};
|
||||
|
||||
/* The create function copies the default into the new memory, but must
|
||||
override the default memory handling functions if a gcontext was provided. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_compile_context_create(pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_compile_context *ccontext = PRIV(memctl_malloc)(
|
||||
sizeof(pcre2_real_compile_context), (pcre2_memctl *)gcontext);
|
||||
if (ccontext == NULL) return NULL;
|
||||
*ccontext = PRIV(default_compile_context);
|
||||
if (gcontext != NULL)
|
||||
*((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
|
||||
return ccontext;
|
||||
}
|
||||
|
||||
|
||||
/* A default match context is set up to save having to initialize at run time
|
||||
when no context is supplied to a match function. */
|
||||
|
||||
pcre2_match_context PRIV(default_match_context) = {
|
||||
{ default_malloc, default_free, NULL },
|
||||
#ifdef SUPPORT_JIT
|
||||
NULL, /* JIT callback */
|
||||
NULL, /* JIT callback data */
|
||||
#endif
|
||||
NULL, /* Callout function */
|
||||
NULL, /* Callout data */
|
||||
NULL, /* Substitute callout function */
|
||||
NULL, /* Substitute callout data */
|
||||
NULL, /* Substitute case callout function */
|
||||
NULL, /* Substitute case callout data */
|
||||
PCRE2_UNSET, /* Offset limit */
|
||||
HEAP_LIMIT,
|
||||
MATCH_LIMIT,
|
||||
MATCH_LIMIT_DEPTH };
|
||||
|
||||
/* The create function copies the default into the new memory, but must
|
||||
override the default memory handling functions if a gcontext was provided. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_context_create(pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_match_context *mcontext = PRIV(memctl_malloc)(
|
||||
sizeof(pcre2_real_match_context), (pcre2_memctl *)gcontext);
|
||||
if (mcontext == NULL) return NULL;
|
||||
*mcontext = PRIV(default_match_context);
|
||||
if (gcontext != NULL)
|
||||
*((pcre2_memctl *)mcontext) = *((pcre2_memctl *)gcontext);
|
||||
return mcontext;
|
||||
}
|
||||
|
||||
|
||||
/* A default convert context is set up to save having to initialize at run time
|
||||
when no context is supplied to the convert function. */
|
||||
|
||||
pcre2_convert_context PRIV(default_convert_context) = {
|
||||
{ default_malloc, default_free, NULL }, /* Default memory handling */
|
||||
#ifdef _WIN32
|
||||
CHAR_BACKSLASH, /* Default path separator */
|
||||
CHAR_GRAVE_ACCENT /* Default escape character */
|
||||
#else /* Not Windows */
|
||||
CHAR_SLASH, /* Default path separator */
|
||||
CHAR_BACKSLASH /* Default escape character */
|
||||
#endif
|
||||
};
|
||||
|
||||
/* The create function copies the default into the new memory, but must
|
||||
override the default memory handling functions if a gcontext was provided. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_convert_context_create(pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_convert_context *ccontext = PRIV(memctl_malloc)(
|
||||
sizeof(pcre2_real_convert_context), (pcre2_memctl *)gcontext);
|
||||
if (ccontext == NULL) return NULL;
|
||||
*ccontext = PRIV(default_convert_context);
|
||||
if (gcontext != NULL)
|
||||
*((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
|
||||
return ccontext;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Context copy functions *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_general_context_copy(pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_general_context *newcontext =
|
||||
gcontext->memctl.malloc(sizeof(pcre2_real_general_context),
|
||||
gcontext->memctl.memory_data);
|
||||
if (newcontext == NULL) return NULL;
|
||||
memcpy(newcontext, gcontext, sizeof(pcre2_real_general_context));
|
||||
return newcontext;
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_compile_context_copy(pcre2_compile_context *ccontext)
|
||||
{
|
||||
pcre2_compile_context *newcontext =
|
||||
ccontext->memctl.malloc(sizeof(pcre2_real_compile_context),
|
||||
ccontext->memctl.memory_data);
|
||||
if (newcontext == NULL) return NULL;
|
||||
memcpy(newcontext, ccontext, sizeof(pcre2_real_compile_context));
|
||||
return newcontext;
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_context_copy(pcre2_match_context *mcontext)
|
||||
{
|
||||
pcre2_match_context *newcontext =
|
||||
mcontext->memctl.malloc(sizeof(pcre2_real_match_context),
|
||||
mcontext->memctl.memory_data);
|
||||
if (newcontext == NULL) return NULL;
|
||||
memcpy(newcontext, mcontext, sizeof(pcre2_real_match_context));
|
||||
return newcontext;
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_convert_context_copy(pcre2_convert_context *ccontext)
|
||||
{
|
||||
pcre2_convert_context *newcontext =
|
||||
ccontext->memctl.malloc(sizeof(pcre2_real_convert_context),
|
||||
ccontext->memctl.memory_data);
|
||||
if (newcontext == NULL) return NULL;
|
||||
memcpy(newcontext, ccontext, sizeof(pcre2_real_convert_context));
|
||||
return newcontext;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Context free functions *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_general_context_free(pcre2_general_context *gcontext)
|
||||
{
|
||||
if (gcontext != NULL)
|
||||
gcontext->memctl.free(gcontext, gcontext->memctl.memory_data);
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_compile_context_free(pcre2_compile_context *ccontext)
|
||||
{
|
||||
if (ccontext != NULL)
|
||||
ccontext->memctl.free(ccontext, ccontext->memctl.memory_data);
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_match_context_free(pcre2_match_context *mcontext)
|
||||
{
|
||||
if (mcontext != NULL)
|
||||
mcontext->memctl.free(mcontext, mcontext->memctl.memory_data);
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_convert_context_free(pcre2_convert_context *ccontext)
|
||||
{
|
||||
if (ccontext != NULL)
|
||||
ccontext->memctl.free(ccontext, ccontext->memctl.memory_data);
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Set values in contexts *
|
||||
*************************************************/
|
||||
|
||||
/* All these functions return 0 for success or PCRE2_ERROR_BADDATA if invalid
|
||||
data is given. Only some of the functions are able to test the validity of the
|
||||
data. */
|
||||
|
||||
|
||||
/* ------------ Compile context ------------ */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_character_tables(pcre2_compile_context *ccontext,
|
||||
const uint8_t *tables)
|
||||
{
|
||||
ccontext->tables = tables;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_bsr(pcre2_compile_context *ccontext, uint32_t value)
|
||||
{
|
||||
switch(value)
|
||||
{
|
||||
case PCRE2_BSR_ANYCRLF:
|
||||
case PCRE2_BSR_UNICODE:
|
||||
ccontext->bsr_convention = value;
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, PCRE2_SIZE length)
|
||||
{
|
||||
ccontext->max_pattern_length = length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_max_pattern_compiled_length(pcre2_compile_context *ccontext, PCRE2_SIZE length)
|
||||
{
|
||||
ccontext->max_pattern_compiled_length = length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline)
|
||||
{
|
||||
switch(newline)
|
||||
{
|
||||
case PCRE2_NEWLINE_CR:
|
||||
case PCRE2_NEWLINE_LF:
|
||||
case PCRE2_NEWLINE_CRLF:
|
||||
case PCRE2_NEWLINE_ANY:
|
||||
case PCRE2_NEWLINE_ANYCRLF:
|
||||
case PCRE2_NEWLINE_NUL:
|
||||
ccontext->newline_convention = newline;
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_max_varlookbehind(pcre2_compile_context *ccontext, uint32_t limit)
|
||||
{
|
||||
ccontext->max_varlookbehind = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, uint32_t limit)
|
||||
{
|
||||
ccontext->parens_nest_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, uint32_t options)
|
||||
{
|
||||
ccontext->extra_options = options;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
|
||||
int (*guard)(uint32_t, void *), void *user_data)
|
||||
{
|
||||
ccontext->stack_guard = guard;
|
||||
ccontext->stack_guard_data = user_data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_optimize(pcre2_compile_context *ccontext, uint32_t directive)
|
||||
{
|
||||
if (ccontext == NULL)
|
||||
return PCRE2_ERROR_NULL;
|
||||
|
||||
switch (directive)
|
||||
{
|
||||
case PCRE2_OPTIMIZATION_NONE:
|
||||
ccontext->optimization_flags = 0;
|
||||
break;
|
||||
|
||||
case PCRE2_OPTIMIZATION_FULL:
|
||||
ccontext->optimization_flags = PCRE2_OPTIMIZATION_ALL;
|
||||
break;
|
||||
|
||||
default:
|
||||
if (directive >= PCRE2_AUTO_POSSESS && directive <= PCRE2_START_OPTIMIZE_OFF)
|
||||
{
|
||||
/* Even directive numbers starting from 64 switch a bit on;
|
||||
* Odd directive numbers starting from 65 switch a bit off */
|
||||
if ((directive & 1) != 0)
|
||||
ccontext->optimization_flags &= ~(1u << ((directive >> 1) - 32));
|
||||
else
|
||||
ccontext->optimization_flags |= 1u << ((directive >> 1) - 32);
|
||||
return 0;
|
||||
}
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ------------ Match context ------------ */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_callout(pcre2_match_context *mcontext,
|
||||
int (*callout)(pcre2_callout_block *, void *), void *callout_data)
|
||||
{
|
||||
mcontext->callout = callout;
|
||||
mcontext->callout_data = callout_data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_substitute_callout(pcre2_match_context *mcontext,
|
||||
int (*substitute_callout)(pcre2_substitute_callout_block *, void *),
|
||||
void *substitute_callout_data)
|
||||
{
|
||||
mcontext->substitute_callout = substitute_callout;
|
||||
mcontext->substitute_callout_data = substitute_callout_data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_substitute_case_callout(pcre2_match_context *mcontext,
|
||||
PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *,
|
||||
PCRE2_SIZE, int, void *),
|
||||
void *substitute_case_callout_data)
|
||||
{
|
||||
mcontext->substitute_case_callout = substitute_case_callout;
|
||||
mcontext->substitute_case_callout_data = substitute_case_callout_data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
mcontext->heap_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
mcontext->match_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_depth_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
mcontext->depth_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE limit)
|
||||
{
|
||||
mcontext->offset_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* These functions became obsolete at release 10.30. The first is kept as a
|
||||
synonym for backwards compatibility. The second now does nothing. Exclude both
|
||||
from coverage reports. */
|
||||
|
||||
/* LCOV_EXCL_START */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
return pcre2_set_depth_limit(mcontext, limit);
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
|
||||
void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *),
|
||||
void *mydata)
|
||||
{
|
||||
(void)mcontext;
|
||||
(void)mymalloc;
|
||||
(void)myfree;
|
||||
(void)mydata;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* LCOV_EXCL_STOP */
|
||||
|
||||
|
||||
/* ------------ Convert context ------------ */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_glob_separator(pcre2_convert_context *ccontext, uint32_t separator)
|
||||
{
|
||||
if (separator != CHAR_SLASH && separator != CHAR_BACKSLASH &&
|
||||
separator != CHAR_DOT) return PCRE2_ERROR_BADDATA;
|
||||
ccontext->glob_separator = separator;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_glob_escape(pcre2_convert_context *ccontext, uint32_t escape)
|
||||
{
|
||||
if (escape > 255 || (escape != 0 && !ispunct(escape)))
|
||||
return PCRE2_ERROR_BADDATA;
|
||||
ccontext->glob_escape = escape;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre2_context.c */
|
||||
|
||||
@@ -1,1191 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#define TYPE_OPTIONS (PCRE2_CONVERT_GLOB| \
|
||||
PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED)
|
||||
|
||||
#define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \
|
||||
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \
|
||||
PCRE2_CONVERT_GLOB_NO_STARSTAR| \
|
||||
TYPE_OPTIONS)
|
||||
|
||||
#define DUMMY_BUFFER_SIZE 100
|
||||
|
||||
/* Generated pattern fragments */
|
||||
|
||||
#define STR_BACKSLASH_A STR_BACKSLASH STR_A
|
||||
#define STR_BACKSLASH_z STR_BACKSLASH STR_z
|
||||
#define STR_COLON_RIGHT_SQUARE_BRACKET STR_COLON STR_RIGHT_SQUARE_BRACKET
|
||||
#define STR_DOT_STAR_LOOKBEHIND STR_DOT STR_ASTERISK STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_LESS_THAN_SIGN STR_EQUALS_SIGN
|
||||
#define STR_LOOKAHEAD_NOT_DOT STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_EXCLAMATION_MARK STR_BACKSLASH STR_DOT STR_RIGHT_PARENTHESIS
|
||||
#define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS
|
||||
#define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
|
||||
|
||||
/* States for POSIX processing */
|
||||
|
||||
enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET,
|
||||
POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED };
|
||||
|
||||
/* Macro to add a character string to the output buffer, checking for overflow. */
|
||||
|
||||
#define PUTCHARS(string) \
|
||||
{ \
|
||||
for (const char *s = string; *s != 0; s++) \
|
||||
{ \
|
||||
if (p >= endp) return PCRE2_ERROR_NOMEMORY; \
|
||||
*p++ = *s; \
|
||||
} \
|
||||
}
|
||||
|
||||
/* Literals that must be escaped: \ ? * + | . ^ $ { } [ ] ( ) */
|
||||
|
||||
static const char *pcre2_escaped_literals =
|
||||
STR_BACKSLASH STR_QUESTION_MARK STR_ASTERISK STR_PLUS
|
||||
STR_VERTICAL_LINE STR_DOT STR_CIRCUMFLEX_ACCENT STR_DOLLAR_SIGN
|
||||
STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
|
||||
STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
|
||||
STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS;
|
||||
|
||||
/* Recognized escaped metacharacters in POSIX basic patterns. */
|
||||
|
||||
static const char *posix_meta_escapes =
|
||||
STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS
|
||||
STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
|
||||
STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9;
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Convert a POSIX pattern *
|
||||
*************************************************/
|
||||
|
||||
/* This function handles both basic and extended POSIX patterns.
|
||||
|
||||
Arguments:
|
||||
pattype the pattern type
|
||||
pattern the pattern
|
||||
plength length in code units
|
||||
utf TRUE if UTF
|
||||
use_buffer where to put the output
|
||||
use_length length of use_buffer
|
||||
bufflenptr where to put the used length
|
||||
dummyrun TRUE if a dummy run
|
||||
ccontext the convert context
|
||||
|
||||
Returns: 0 => success
|
||||
!0 => error code
|
||||
*/
|
||||
|
||||
static int
|
||||
convert_posix(uint32_t pattype, PCRE2_SPTR pattern, PCRE2_SIZE plength,
|
||||
BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
|
||||
PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
|
||||
{
|
||||
PCRE2_SPTR posix = pattern;
|
||||
PCRE2_UCHAR *p = use_buffer;
|
||||
PCRE2_UCHAR *pp = p;
|
||||
PCRE2_UCHAR *endp = p + use_length - 1; /* Allow for trailing zero */
|
||||
PCRE2_SIZE convlength = 0;
|
||||
|
||||
uint32_t bracount = 0;
|
||||
uint32_t posix_state = POSIX_START_REGEX;
|
||||
uint32_t lastspecial = 0;
|
||||
BOOL extended = (pattype & PCRE2_CONVERT_POSIX_EXTENDED) != 0;
|
||||
BOOL nextisliteral = FALSE;
|
||||
|
||||
(void)utf; /* Not used when Unicode not supported */
|
||||
(void)ccontext; /* Not currently used */
|
||||
|
||||
/* Initialize default for error offset as end of input. */
|
||||
|
||||
*bufflenptr = plength;
|
||||
PUTCHARS(STR_STAR_NUL);
|
||||
|
||||
/* Now scan the input. */
|
||||
|
||||
while (plength > 0)
|
||||
{
|
||||
uint32_t c, sc;
|
||||
int clength = 1;
|
||||
|
||||
/* Add in the length of the last item, then, if in the dummy run, pull the
|
||||
pointer back to the start of the (temporary) buffer and then remember the
|
||||
start of the next item. */
|
||||
|
||||
convlength += p - pp;
|
||||
if (dummyrun) p = use_buffer;
|
||||
pp = p;
|
||||
|
||||
/* Pick up the next character */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
c = *posix;
|
||||
#else
|
||||
GETCHARLENTEST(c, posix, clength);
|
||||
#endif
|
||||
posix += clength;
|
||||
plength -= clength;
|
||||
|
||||
sc = nextisliteral? 0 : c;
|
||||
nextisliteral = FALSE;
|
||||
|
||||
/* Handle a character within a class. */
|
||||
|
||||
if (posix_state >= POSIX_CLASS_NOT_STARTED)
|
||||
{
|
||||
if (c == CHAR_RIGHT_SQUARE_BRACKET)
|
||||
{
|
||||
PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
|
||||
posix_state = POSIX_NOT_BRACKET;
|
||||
}
|
||||
|
||||
/* Not the end of the class */
|
||||
|
||||
else
|
||||
{
|
||||
switch (posix_state)
|
||||
{
|
||||
case POSIX_CLASS_STARTED:
|
||||
if (c <= 127 && islower(c)) break; /* Remain in started state */
|
||||
posix_state = POSIX_CLASS_NOT_STARTED;
|
||||
if (c == CHAR_COLON && plength > 0 &&
|
||||
*posix == CHAR_RIGHT_SQUARE_BRACKET)
|
||||
{
|
||||
PUTCHARS(STR_COLON_RIGHT_SQUARE_BRACKET);
|
||||
plength--;
|
||||
posix++;
|
||||
continue; /* With next character after :] */
|
||||
}
|
||||
/* Fall through */
|
||||
|
||||
case POSIX_CLASS_NOT_STARTED:
|
||||
if (c == CHAR_LEFT_SQUARE_BRACKET)
|
||||
posix_state = POSIX_CLASS_STARTING;
|
||||
break;
|
||||
|
||||
case POSIX_CLASS_STARTING:
|
||||
if (c == CHAR_COLON) posix_state = POSIX_CLASS_STARTED;
|
||||
break;
|
||||
}
|
||||
|
||||
if (c == CHAR_BACKSLASH) PUTCHARS(STR_BACKSLASH);
|
||||
if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
|
||||
memcpy(p, posix - clength, CU2BYTES(clength));
|
||||
p += clength;
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle a character not within a class. */
|
||||
|
||||
else switch(sc)
|
||||
{
|
||||
case CHAR_LEFT_SQUARE_BRACKET:
|
||||
PUTCHARS(STR_LEFT_SQUARE_BRACKET);
|
||||
|
||||
#ifdef NEVER
|
||||
/* We could handle special cases [[:<:]] and [[:>:]] (which PCRE does
|
||||
support) but they are not part of POSIX 1003.1. */
|
||||
|
||||
if (plength >= 6)
|
||||
{
|
||||
if (posix[0] == CHAR_LEFT_SQUARE_BRACKET &&
|
||||
posix[1] == CHAR_COLON &&
|
||||
(posix[2] == CHAR_LESS_THAN_SIGN ||
|
||||
posix[2] == CHAR_GREATER_THAN_SIGN) &&
|
||||
posix[3] == CHAR_COLON &&
|
||||
posix[4] == CHAR_RIGHT_SQUARE_BRACKET &&
|
||||
posix[5] == CHAR_RIGHT_SQUARE_BRACKET)
|
||||
{
|
||||
if (p + 6 > endp) return PCRE2_ERROR_NOMEMORY;
|
||||
memcpy(p, posix, CU2BYTES(6));
|
||||
p += 6;
|
||||
posix += 6;
|
||||
plength -= 6;
|
||||
continue; /* With next character */
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Handle start of "normal" character classes */
|
||||
|
||||
posix_state = POSIX_CLASS_NOT_STARTED;
|
||||
|
||||
/* Handle ^ and ] as first characters */
|
||||
|
||||
if (plength > 0)
|
||||
{
|
||||
if (*posix == CHAR_CIRCUMFLEX_ACCENT)
|
||||
{
|
||||
posix++;
|
||||
plength--;
|
||||
PUTCHARS(STR_CIRCUMFLEX_ACCENT);
|
||||
}
|
||||
if (plength > 0 && *posix == CHAR_RIGHT_SQUARE_BRACKET)
|
||||
{
|
||||
posix++;
|
||||
plength--;
|
||||
PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case CHAR_BACKSLASH:
|
||||
if (plength == 0) return PCRE2_ERROR_END_BACKSLASH;
|
||||
if (extended) nextisliteral = TRUE; else
|
||||
{
|
||||
if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL)
|
||||
{
|
||||
if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
|
||||
if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
|
||||
lastspecial = *p++ = *posix++;
|
||||
plength--;
|
||||
}
|
||||
else nextisliteral = TRUE;
|
||||
}
|
||||
break;
|
||||
|
||||
case CHAR_RIGHT_PARENTHESIS:
|
||||
if (!extended || bracount == 0) goto ESCAPE_LITERAL;
|
||||
bracount--;
|
||||
goto COPY_SPECIAL;
|
||||
|
||||
case CHAR_LEFT_PARENTHESIS:
|
||||
bracount++;
|
||||
/* Fall through */
|
||||
|
||||
case CHAR_QUESTION_MARK:
|
||||
case CHAR_PLUS:
|
||||
case CHAR_LEFT_CURLY_BRACKET:
|
||||
case CHAR_RIGHT_CURLY_BRACKET:
|
||||
case CHAR_VERTICAL_LINE:
|
||||
if (!extended) goto ESCAPE_LITERAL;
|
||||
/* Fall through */
|
||||
|
||||
case CHAR_DOT:
|
||||
case CHAR_DOLLAR_SIGN:
|
||||
posix_state = POSIX_NOT_BRACKET;
|
||||
COPY_SPECIAL:
|
||||
lastspecial = c;
|
||||
if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
|
||||
*p++ = c;
|
||||
break;
|
||||
|
||||
case CHAR_ASTERISK:
|
||||
if (lastspecial != CHAR_ASTERISK)
|
||||
{
|
||||
if (!extended && (posix_state < POSIX_NOT_BRACKET ||
|
||||
lastspecial == CHAR_LEFT_PARENTHESIS))
|
||||
goto ESCAPE_LITERAL;
|
||||
goto COPY_SPECIAL;
|
||||
}
|
||||
break; /* Ignore second and subsequent asterisks */
|
||||
|
||||
case CHAR_CIRCUMFLEX_ACCENT:
|
||||
if (extended) goto COPY_SPECIAL;
|
||||
if (posix_state == POSIX_START_REGEX ||
|
||||
lastspecial == CHAR_LEFT_PARENTHESIS)
|
||||
{
|
||||
posix_state = POSIX_ANCHORED;
|
||||
goto COPY_SPECIAL;
|
||||
}
|
||||
/* Fall through */
|
||||
|
||||
default:
|
||||
if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
|
||||
{
|
||||
ESCAPE_LITERAL:
|
||||
PUTCHARS(STR_BACKSLASH);
|
||||
}
|
||||
lastspecial = 0xff; /* Indicates nothing special */
|
||||
if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
|
||||
memcpy(p, posix - clength, CU2BYTES(clength));
|
||||
p += clength;
|
||||
posix_state = POSIX_NOT_BRACKET;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (posix_state >= POSIX_CLASS_NOT_STARTED)
|
||||
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
|
||||
convlength += p - pp; /* Final segment */
|
||||
*bufflenptr = convlength;
|
||||
*p++ = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Convert a glob pattern *
|
||||
*************************************************/
|
||||
|
||||
/* Context for writing the output into a buffer. */
|
||||
|
||||
typedef struct pcre2_output_context {
|
||||
PCRE2_UCHAR *output; /* current output position */
|
||||
PCRE2_SPTR output_end; /* output end */
|
||||
PCRE2_SIZE output_size; /* size of the output */
|
||||
uint8_t out_str[8]; /* string copied to the output */
|
||||
} pcre2_output_context;
|
||||
|
||||
|
||||
/* Write a character into the output.
|
||||
|
||||
Arguments:
|
||||
out output context
|
||||
chr the next character
|
||||
*/
|
||||
|
||||
static void
|
||||
convert_glob_write(pcre2_output_context *out, PCRE2_UCHAR chr)
|
||||
{
|
||||
out->output_size++;
|
||||
|
||||
if (out->output < out->output_end)
|
||||
*out->output++ = chr;
|
||||
}
|
||||
|
||||
|
||||
/* Write a string into the output.
|
||||
|
||||
Arguments:
|
||||
out output context
|
||||
length length of out->out_str
|
||||
*/
|
||||
|
||||
static void
|
||||
convert_glob_write_str(pcre2_output_context *out, PCRE2_SIZE length)
|
||||
{
|
||||
uint8_t *out_str = out->out_str;
|
||||
PCRE2_UCHAR *output = out->output;
|
||||
PCRE2_SPTR output_end = out->output_end;
|
||||
PCRE2_SIZE output_size = out->output_size;
|
||||
|
||||
do
|
||||
{
|
||||
output_size++;
|
||||
|
||||
if (output < output_end)
|
||||
*output++ = *out_str++;
|
||||
}
|
||||
while (--length != 0);
|
||||
|
||||
out->output = output;
|
||||
out->output_size = output_size;
|
||||
}
|
||||
|
||||
|
||||
/* Prints the separator into the output.
|
||||
|
||||
Arguments:
|
||||
out output context
|
||||
separator glob separator
|
||||
with_escape backslash is needed before separator
|
||||
*/
|
||||
|
||||
static void
|
||||
convert_glob_print_separator(pcre2_output_context *out,
|
||||
PCRE2_UCHAR separator, BOOL with_escape)
|
||||
{
|
||||
if (with_escape)
|
||||
convert_glob_write(out, CHAR_BACKSLASH);
|
||||
|
||||
convert_glob_write(out, separator);
|
||||
}
|
||||
|
||||
|
||||
/* Prints a wildcard into the output.
|
||||
|
||||
Arguments:
|
||||
out output context
|
||||
separator glob separator
|
||||
with_escape backslash is needed before separator
|
||||
*/
|
||||
|
||||
static void
|
||||
convert_glob_print_wildcard(pcre2_output_context *out,
|
||||
PCRE2_UCHAR separator, BOOL with_escape)
|
||||
{
|
||||
out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
|
||||
out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
|
||||
convert_glob_write_str(out, 2);
|
||||
|
||||
convert_glob_print_separator(out, separator, with_escape);
|
||||
|
||||
convert_glob_write(out, CHAR_RIGHT_SQUARE_BRACKET);
|
||||
}
|
||||
|
||||
|
||||
/* Parse a posix class.
|
||||
|
||||
Arguments:
|
||||
from starting point of scanning the range
|
||||
pattern_end end of pattern
|
||||
out output context
|
||||
|
||||
Returns: >0 => class index
|
||||
0 => malformed class
|
||||
*/
|
||||
|
||||
static int
|
||||
convert_glob_parse_class(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
|
||||
pcre2_output_context *out)
|
||||
{
|
||||
static const char *posix_classes = "alnum:alpha:ascii:blank:cntrl:digit:"
|
||||
"graph:lower:print:punct:space:upper:word:xdigit:";
|
||||
PCRE2_SPTR start = *from + 1;
|
||||
PCRE2_SPTR pattern = start;
|
||||
const char *class_ptr;
|
||||
PCRE2_UCHAR c;
|
||||
int class_index;
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
if (pattern >= pattern_end) return 0;
|
||||
|
||||
c = *pattern++;
|
||||
|
||||
if (c < CHAR_a || c > CHAR_z) break;
|
||||
}
|
||||
|
||||
if (c != CHAR_COLON || pattern >= pattern_end ||
|
||||
*pattern != CHAR_RIGHT_SQUARE_BRACKET)
|
||||
return 0;
|
||||
|
||||
class_ptr = posix_classes;
|
||||
class_index = 1;
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
if (*class_ptr == CHAR_NUL) return 0;
|
||||
|
||||
pattern = start;
|
||||
|
||||
while (*pattern == (PCRE2_UCHAR) *class_ptr)
|
||||
{
|
||||
if (*pattern == CHAR_COLON)
|
||||
{
|
||||
pattern += 2;
|
||||
start -= 2;
|
||||
|
||||
do convert_glob_write(out, *start++); while (start < pattern);
|
||||
|
||||
*from = pattern;
|
||||
return class_index;
|
||||
}
|
||||
pattern++;
|
||||
class_ptr++;
|
||||
}
|
||||
|
||||
while (*class_ptr != CHAR_COLON) class_ptr++;
|
||||
class_ptr++;
|
||||
class_index++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Checks whether the character is in the class.
|
||||
|
||||
Arguments:
|
||||
class_index class index
|
||||
c character
|
||||
|
||||
Returns: !0 => character is found in the class
|
||||
0 => otherwise
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
convert_glob_char_in_class(int class_index, PCRE2_UCHAR c)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (c > 0xff)
|
||||
{
|
||||
/* ctype functions are not sane for c > 0xff */
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
switch (class_index)
|
||||
{
|
||||
case 1: return isalnum(c);
|
||||
case 2: return isalpha(c);
|
||||
case 3: return 1;
|
||||
case 4: return c == CHAR_HT || c == CHAR_SPACE;
|
||||
case 5: return iscntrl(c);
|
||||
case 6: return isdigit(c);
|
||||
case 7: return isgraph(c);
|
||||
case 8: return islower(c);
|
||||
case 9: return isprint(c);
|
||||
case 10: return ispunct(c);
|
||||
case 11: return isspace(c);
|
||||
case 12: return isupper(c);
|
||||
case 13: return isalnum(c) || c == CHAR_UNDERSCORE;
|
||||
default: return isxdigit(c);
|
||||
}
|
||||
}
|
||||
|
||||
/* Parse a range of characters.
|
||||
|
||||
Arguments:
|
||||
from starting point of scanning the range
|
||||
pattern_end end of pattern
|
||||
out output context
|
||||
separator glob separator
|
||||
with_escape backslash is needed before separator
|
||||
|
||||
Returns: 0 => success
|
||||
!0 => error code
|
||||
*/
|
||||
|
||||
static int
|
||||
convert_glob_parse_range(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
|
||||
pcre2_output_context *out, BOOL utf, PCRE2_UCHAR separator,
|
||||
BOOL with_escape, PCRE2_UCHAR escape, BOOL no_wildsep)
|
||||
{
|
||||
BOOL is_negative = FALSE;
|
||||
BOOL separator_seen = FALSE;
|
||||
BOOL has_prev_c;
|
||||
PCRE2_SPTR pattern = *from;
|
||||
PCRE2_SPTR char_start = NULL;
|
||||
uint32_t c, prev_c;
|
||||
int len, class_index;
|
||||
|
||||
(void)utf; /* Avoid compiler warning. */
|
||||
|
||||
if (pattern >= pattern_end)
|
||||
{
|
||||
*from = pattern;
|
||||
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
|
||||
}
|
||||
|
||||
if (*pattern == CHAR_EXCLAMATION_MARK
|
||||
|| *pattern == CHAR_CIRCUMFLEX_ACCENT)
|
||||
{
|
||||
pattern++;
|
||||
|
||||
if (pattern >= pattern_end)
|
||||
{
|
||||
*from = pattern;
|
||||
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
|
||||
}
|
||||
|
||||
is_negative = TRUE;
|
||||
|
||||
out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
|
||||
out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
|
||||
len = 2;
|
||||
|
||||
if (!no_wildsep)
|
||||
{
|
||||
if (with_escape)
|
||||
{
|
||||
out->out_str[len] = CHAR_BACKSLASH;
|
||||
len++;
|
||||
}
|
||||
out->out_str[len] = (uint8_t) separator;
|
||||
}
|
||||
|
||||
convert_glob_write_str(out, len + 1);
|
||||
}
|
||||
else
|
||||
convert_glob_write(out, CHAR_LEFT_SQUARE_BRACKET);
|
||||
|
||||
has_prev_c = FALSE;
|
||||
prev_c = 0;
|
||||
|
||||
if (*pattern == CHAR_RIGHT_SQUARE_BRACKET)
|
||||
{
|
||||
out->out_str[0] = CHAR_BACKSLASH;
|
||||
out->out_str[1] = CHAR_RIGHT_SQUARE_BRACKET;
|
||||
convert_glob_write_str(out, 2);
|
||||
has_prev_c = TRUE;
|
||||
prev_c = CHAR_RIGHT_SQUARE_BRACKET;
|
||||
pattern++;
|
||||
}
|
||||
|
||||
while (pattern < pattern_end)
|
||||
{
|
||||
char_start = pattern;
|
||||
GETCHARINCTEST(c, pattern);
|
||||
|
||||
if (c == CHAR_RIGHT_SQUARE_BRACKET)
|
||||
{
|
||||
convert_glob_write(out, c);
|
||||
|
||||
if (!is_negative && !no_wildsep && separator_seen)
|
||||
{
|
||||
out->out_str[0] = CHAR_LEFT_PARENTHESIS;
|
||||
out->out_str[1] = CHAR_QUESTION_MARK;
|
||||
out->out_str[2] = CHAR_LESS_THAN_SIGN;
|
||||
out->out_str[3] = CHAR_EXCLAMATION_MARK;
|
||||
convert_glob_write_str(out, 4);
|
||||
|
||||
convert_glob_print_separator(out, separator, with_escape);
|
||||
convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
|
||||
}
|
||||
|
||||
*from = pattern;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (pattern >= pattern_end) break;
|
||||
|
||||
if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
|
||||
{
|
||||
*from = pattern;
|
||||
class_index = convert_glob_parse_class(from, pattern_end, out);
|
||||
|
||||
if (class_index != 0)
|
||||
{
|
||||
pattern = *from;
|
||||
|
||||
has_prev_c = FALSE;
|
||||
prev_c = 0;
|
||||
|
||||
if (!is_negative &&
|
||||
convert_glob_char_in_class (class_index, separator))
|
||||
separator_seen = TRUE;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (c == CHAR_MINUS && has_prev_c &&
|
||||
*pattern != CHAR_RIGHT_SQUARE_BRACKET)
|
||||
{
|
||||
convert_glob_write(out, CHAR_MINUS);
|
||||
|
||||
char_start = pattern;
|
||||
GETCHARINCTEST(c, pattern);
|
||||
|
||||
if (pattern >= pattern_end) break;
|
||||
|
||||
if (escape != 0 && c == escape)
|
||||
{
|
||||
char_start = pattern;
|
||||
GETCHARINCTEST(c, pattern);
|
||||
}
|
||||
else if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
|
||||
{
|
||||
*from = pattern;
|
||||
return PCRE2_ERROR_CONVERT_SYNTAX;
|
||||
}
|
||||
|
||||
if (prev_c > c)
|
||||
{
|
||||
*from = pattern;
|
||||
return PCRE2_ERROR_CONVERT_SYNTAX;
|
||||
}
|
||||
|
||||
if (prev_c < separator && separator < c) separator_seen = TRUE;
|
||||
|
||||
has_prev_c = FALSE;
|
||||
prev_c = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (escape != 0 && c == escape)
|
||||
{
|
||||
char_start = pattern;
|
||||
GETCHARINCTEST(c, pattern);
|
||||
|
||||
if (pattern >= pattern_end) break;
|
||||
}
|
||||
|
||||
has_prev_c = TRUE;
|
||||
prev_c = c;
|
||||
}
|
||||
|
||||
if (c == CHAR_LEFT_SQUARE_BRACKET || c == CHAR_RIGHT_SQUARE_BRACKET ||
|
||||
c == CHAR_BACKSLASH || c == CHAR_MINUS)
|
||||
convert_glob_write(out, CHAR_BACKSLASH);
|
||||
|
||||
if (c == separator) separator_seen = TRUE;
|
||||
|
||||
do convert_glob_write(out, *char_start++); while (char_start < pattern);
|
||||
}
|
||||
|
||||
*from = pattern;
|
||||
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
|
||||
}
|
||||
|
||||
|
||||
/* Prints a (*COMMIT) into the output.
|
||||
|
||||
Arguments:
|
||||
out output context
|
||||
*/
|
||||
|
||||
static void
|
||||
convert_glob_print_commit(pcre2_output_context *out)
|
||||
{
|
||||
out->out_str[0] = CHAR_LEFT_PARENTHESIS;
|
||||
out->out_str[1] = CHAR_ASTERISK;
|
||||
out->out_str[2] = CHAR_C;
|
||||
out->out_str[3] = CHAR_O;
|
||||
out->out_str[4] = CHAR_M;
|
||||
out->out_str[5] = CHAR_M;
|
||||
out->out_str[6] = CHAR_I;
|
||||
out->out_str[7] = CHAR_T;
|
||||
convert_glob_write_str(out, 8);
|
||||
convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
|
||||
}
|
||||
|
||||
|
||||
/* Bash glob converter.
|
||||
|
||||
Arguments:
|
||||
pattype the pattern type
|
||||
pattern the pattern
|
||||
plength length in code units
|
||||
utf TRUE if UTF
|
||||
use_buffer where to put the output
|
||||
use_length length of use_buffer
|
||||
bufflenptr where to put the used length
|
||||
dummyrun TRUE if a dummy run
|
||||
ccontext the convert context
|
||||
|
||||
Returns: 0 => success
|
||||
!0 => error code
|
||||
*/
|
||||
|
||||
static int
|
||||
convert_glob(uint32_t options, PCRE2_SPTR pattern, PCRE2_SIZE plength,
|
||||
BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
|
||||
PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
|
||||
{
|
||||
pcre2_output_context out;
|
||||
PCRE2_SPTR pattern_start = pattern;
|
||||
PCRE2_SPTR pattern_end = pattern + plength;
|
||||
PCRE2_UCHAR separator = ccontext->glob_separator;
|
||||
PCRE2_UCHAR escape = ccontext->glob_escape;
|
||||
PCRE2_UCHAR c;
|
||||
BOOL no_wildsep = (options & PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR) != 0;
|
||||
BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0;
|
||||
BOOL in_atomic = FALSE;
|
||||
BOOL after_starstar = FALSE;
|
||||
BOOL no_slash_z = FALSE;
|
||||
BOOL with_escape, is_start, after_separator;
|
||||
int result = 0;
|
||||
|
||||
(void)utf; /* Avoid compiler warning. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (separator >= 128 || escape >= 128))
|
||||
{
|
||||
/* Currently only ASCII characters are supported. */
|
||||
*bufflenptr = 0;
|
||||
return PCRE2_ERROR_CONVERT_SYNTAX;
|
||||
}
|
||||
#endif
|
||||
|
||||
with_escape = strchr(pcre2_escaped_literals, separator) != NULL;
|
||||
|
||||
/* Initialize default for error offset as end of input. */
|
||||
out.output = use_buffer;
|
||||
out.output_end = use_buffer + use_length;
|
||||
out.output_size = 0;
|
||||
|
||||
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
|
||||
out.out_str[1] = CHAR_QUESTION_MARK;
|
||||
out.out_str[2] = CHAR_s;
|
||||
out.out_str[3] = CHAR_RIGHT_PARENTHESIS;
|
||||
convert_glob_write_str(&out, 4);
|
||||
|
||||
is_start = TRUE;
|
||||
|
||||
if (pattern < pattern_end && pattern[0] == CHAR_ASTERISK)
|
||||
{
|
||||
if (no_wildsep)
|
||||
is_start = FALSE;
|
||||
else if (!no_starstar && pattern + 1 < pattern_end &&
|
||||
pattern[1] == CHAR_ASTERISK)
|
||||
is_start = FALSE;
|
||||
}
|
||||
|
||||
if (is_start)
|
||||
{
|
||||
out.out_str[0] = CHAR_BACKSLASH;
|
||||
out.out_str[1] = CHAR_A;
|
||||
convert_glob_write_str(&out, 2);
|
||||
}
|
||||
|
||||
while (pattern < pattern_end)
|
||||
{
|
||||
c = *pattern++;
|
||||
|
||||
if (c == CHAR_ASTERISK)
|
||||
{
|
||||
is_start = pattern == pattern_start + 1;
|
||||
|
||||
if (in_atomic)
|
||||
{
|
||||
convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
|
||||
in_atomic = FALSE;
|
||||
}
|
||||
|
||||
if (!no_starstar && pattern < pattern_end && *pattern == CHAR_ASTERISK)
|
||||
{
|
||||
after_separator = is_start || (pattern[-2] == separator);
|
||||
|
||||
do pattern++; while (pattern < pattern_end &&
|
||||
*pattern == CHAR_ASTERISK);
|
||||
|
||||
if (pattern >= pattern_end)
|
||||
{
|
||||
no_slash_z = TRUE;
|
||||
break;
|
||||
}
|
||||
|
||||
after_starstar = TRUE;
|
||||
|
||||
if (after_separator && escape != 0 && *pattern == escape &&
|
||||
pattern + 1 < pattern_end && pattern[1] == separator)
|
||||
pattern++;
|
||||
|
||||
if (is_start)
|
||||
{
|
||||
if (*pattern != separator) continue;
|
||||
|
||||
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
|
||||
out.out_str[1] = CHAR_QUESTION_MARK;
|
||||
out.out_str[2] = CHAR_COLON;
|
||||
out.out_str[3] = CHAR_BACKSLASH;
|
||||
out.out_str[4] = CHAR_A;
|
||||
out.out_str[5] = CHAR_VERTICAL_LINE;
|
||||
convert_glob_write_str(&out, 6);
|
||||
|
||||
convert_glob_print_separator(&out, separator, with_escape);
|
||||
convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
|
||||
|
||||
pattern++;
|
||||
continue;
|
||||
}
|
||||
|
||||
convert_glob_print_commit(&out);
|
||||
|
||||
if (!after_separator || *pattern != separator)
|
||||
{
|
||||
out.out_str[0] = CHAR_DOT;
|
||||
out.out_str[1] = CHAR_ASTERISK;
|
||||
out.out_str[2] = CHAR_QUESTION_MARK;
|
||||
convert_glob_write_str(&out, 3);
|
||||
continue;
|
||||
}
|
||||
|
||||
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
|
||||
out.out_str[1] = CHAR_QUESTION_MARK;
|
||||
out.out_str[2] = CHAR_COLON;
|
||||
out.out_str[3] = CHAR_DOT;
|
||||
out.out_str[4] = CHAR_ASTERISK;
|
||||
out.out_str[5] = CHAR_QUESTION_MARK;
|
||||
|
||||
convert_glob_write_str(&out, 6);
|
||||
|
||||
convert_glob_print_separator(&out, separator, with_escape);
|
||||
|
||||
out.out_str[0] = CHAR_RIGHT_PARENTHESIS;
|
||||
out.out_str[1] = CHAR_QUESTION_MARK;
|
||||
out.out_str[2] = CHAR_QUESTION_MARK;
|
||||
convert_glob_write_str(&out, 3);
|
||||
|
||||
pattern++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pattern < pattern_end && *pattern == CHAR_ASTERISK)
|
||||
{
|
||||
do pattern++; while (pattern < pattern_end &&
|
||||
*pattern == CHAR_ASTERISK);
|
||||
}
|
||||
|
||||
if (no_wildsep)
|
||||
{
|
||||
if (pattern >= pattern_end)
|
||||
{
|
||||
no_slash_z = TRUE;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Start check must be after the end check. */
|
||||
if (is_start) continue;
|
||||
}
|
||||
|
||||
if (!is_start)
|
||||
{
|
||||
if (after_starstar)
|
||||
{
|
||||
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
|
||||
out.out_str[1] = CHAR_QUESTION_MARK;
|
||||
out.out_str[2] = CHAR_GREATER_THAN_SIGN;
|
||||
convert_glob_write_str(&out, 3);
|
||||
in_atomic = TRUE;
|
||||
}
|
||||
else
|
||||
convert_glob_print_commit(&out);
|
||||
}
|
||||
|
||||
if (no_wildsep)
|
||||
convert_glob_write(&out, CHAR_DOT);
|
||||
else
|
||||
convert_glob_print_wildcard(&out, separator, with_escape);
|
||||
|
||||
out.out_str[0] = CHAR_ASTERISK;
|
||||
out.out_str[1] = CHAR_QUESTION_MARK;
|
||||
if (pattern >= pattern_end)
|
||||
out.out_str[1] = CHAR_PLUS;
|
||||
convert_glob_write_str(&out, 2);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == CHAR_QUESTION_MARK)
|
||||
{
|
||||
if (no_wildsep)
|
||||
convert_glob_write(&out, CHAR_DOT);
|
||||
else
|
||||
convert_glob_print_wildcard(&out, separator, with_escape);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == CHAR_LEFT_SQUARE_BRACKET)
|
||||
{
|
||||
result = convert_glob_parse_range(&pattern, pattern_end,
|
||||
&out, utf, separator, with_escape, escape, no_wildsep);
|
||||
if (result != 0) break;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (escape != 0 && c == escape)
|
||||
{
|
||||
if (pattern >= pattern_end)
|
||||
{
|
||||
result = PCRE2_ERROR_CONVERT_SYNTAX;
|
||||
break;
|
||||
}
|
||||
c = *pattern++;
|
||||
}
|
||||
|
||||
if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
|
||||
convert_glob_write(&out, CHAR_BACKSLASH);
|
||||
|
||||
convert_glob_write(&out, c);
|
||||
}
|
||||
|
||||
if (result == 0)
|
||||
{
|
||||
if (!no_slash_z)
|
||||
{
|
||||
out.out_str[0] = CHAR_BACKSLASH;
|
||||
out.out_str[1] = CHAR_z;
|
||||
convert_glob_write_str(&out, 2);
|
||||
}
|
||||
|
||||
if (in_atomic)
|
||||
convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
|
||||
|
||||
convert_glob_write(&out, CHAR_NUL);
|
||||
|
||||
if (!dummyrun && out.output_size != (PCRE2_SIZE) (out.output - use_buffer))
|
||||
result = PCRE2_ERROR_NOMEMORY;
|
||||
}
|
||||
|
||||
if (result != 0)
|
||||
{
|
||||
*bufflenptr = pattern - pattern_start;
|
||||
return result;
|
||||
}
|
||||
|
||||
*bufflenptr = out.output_size - 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Convert pattern *
|
||||
*************************************************/
|
||||
|
||||
/* This is the external-facing function for converting other forms of pattern
|
||||
into PCRE2 regular expression patterns. On error, the bufflenptr argument is
|
||||
used to return an offset in the original pattern.
|
||||
|
||||
Arguments:
|
||||
pattern the input pattern
|
||||
plength length of input, or PCRE2_ZERO_TERMINATED
|
||||
options options bits
|
||||
buffptr pointer to pointer to output buffer
|
||||
bufflenptr pointer to length of output buffer
|
||||
ccontext convert context or NULL
|
||||
|
||||
Returns: 0 for success, else an error code (+ve or -ve)
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE plength, uint32_t options,
|
||||
PCRE2_UCHAR **buffptr, PCRE2_SIZE *bufflenptr,
|
||||
pcre2_convert_context *ccontext)
|
||||
{
|
||||
int rc;
|
||||
PCRE2_UCHAR dummy_buffer[DUMMY_BUFFER_SIZE];
|
||||
PCRE2_UCHAR *use_buffer = dummy_buffer;
|
||||
PCRE2_SIZE use_length = DUMMY_BUFFER_SIZE;
|
||||
BOOL utf = (options & PCRE2_CONVERT_UTF) != 0;
|
||||
uint32_t pattype = options & TYPE_OPTIONS;
|
||||
|
||||
if (pattern == NULL || bufflenptr == NULL) return PCRE2_ERROR_NULL;
|
||||
|
||||
if ((options & ~ALL_OPTIONS) != 0 || /* Undefined bit set */
|
||||
(pattype & (~pattype+1)) != pattype || /* More than one type set */
|
||||
pattype == 0) /* No type set */
|
||||
{
|
||||
*bufflenptr = 0; /* Error offset */
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
if (plength == PCRE2_ZERO_TERMINATED) plength = PRIV(strlen)(pattern);
|
||||
if (ccontext == NULL) ccontext =
|
||||
(pcre2_convert_context *)(&PRIV(default_convert_context));
|
||||
|
||||
/* Check UTF if required. */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
*bufflenptr = 0; /* Error offset */
|
||||
return PCRE2_ERROR_UNICODE_NOT_SUPPORTED;
|
||||
}
|
||||
#else
|
||||
if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0)
|
||||
{
|
||||
PCRE2_SIZE erroroffset;
|
||||
rc = PRIV(valid_utf)(pattern, plength, &erroroffset);
|
||||
if (rc != 0)
|
||||
{
|
||||
*bufflenptr = erroroffset;
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* If buffptr is not NULL, and what it points to is not NULL, we are being
|
||||
provided with a buffer and a length, so set them as the buffer to use. */
|
||||
|
||||
if (buffptr != NULL && *buffptr != NULL)
|
||||
{
|
||||
use_buffer = *buffptr;
|
||||
use_length = *bufflenptr;
|
||||
}
|
||||
|
||||
/* Call an individual converter, either just once (if a buffer was provided or
|
||||
just the length is needed), or twice (if a memory allocation is required). */
|
||||
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
PCRE2_UCHAR *allocated;
|
||||
BOOL dummyrun = buffptr == NULL || *buffptr == NULL;
|
||||
|
||||
switch(pattype)
|
||||
{
|
||||
case PCRE2_CONVERT_GLOB:
|
||||
rc = convert_glob(options & ~PCRE2_CONVERT_GLOB, pattern, plength, utf,
|
||||
use_buffer, use_length, bufflenptr, dummyrun, ccontext);
|
||||
break;
|
||||
|
||||
case PCRE2_CONVERT_POSIX_BASIC:
|
||||
case PCRE2_CONVERT_POSIX_EXTENDED:
|
||||
rc = convert_posix(pattype, pattern, plength, utf, use_buffer, use_length,
|
||||
bufflenptr, dummyrun, ccontext);
|
||||
break;
|
||||
|
||||
default:
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
if (rc != 0 || /* Error */
|
||||
buffptr == NULL || /* Just the length is required */
|
||||
*buffptr != NULL) /* Buffer was provided or allocated */
|
||||
return rc;
|
||||
|
||||
/* Allocate memory for the buffer, with hidden space for an allocator at
|
||||
the start. The next time round the loop runs the conversion for real. */
|
||||
|
||||
allocated = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
|
||||
(*bufflenptr + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)ccontext);
|
||||
if (allocated == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
*buffptr = (PCRE2_UCHAR *)(((char *)allocated) + sizeof(pcre2_memctl));
|
||||
|
||||
use_buffer = *buffptr;
|
||||
use_length = *bufflenptr + 1;
|
||||
}
|
||||
|
||||
/* Something went terribly wrong. Trigger an assert and return an error */
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
|
||||
EXIT:
|
||||
|
||||
*bufflenptr = 0; /* Error offset */
|
||||
return PCRE2_ERROR_INTERNAL;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free converted pattern *
|
||||
*************************************************/
|
||||
|
||||
/* This frees a converted pattern that was put in newly-allocated memory.
|
||||
|
||||
Argument: the converted pattern
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_converted_pattern_free(PCRE2_UCHAR *converted)
|
||||
{
|
||||
if (converted != NULL)
|
||||
{
|
||||
pcre2_memctl *memctl =
|
||||
(pcre2_memctl *)((char *)converted - sizeof(pcre2_memctl));
|
||||
memctl->free(memctl, memctl->memory_data);
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_convert.c */
|
||||
@@ -1,4110 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre2_dfa_match(), which is an
|
||||
alternative matching function that uses a sort of DFA algorithm (not a true
|
||||
FSM). This is NOT Perl-compatible, but it has advantages in certain
|
||||
applications. */
|
||||
|
||||
|
||||
/* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
|
||||
the performance of his patterns greatly. I could not use it as it stood, as it
|
||||
was not thread safe, and made assumptions about pattern sizes. Also, it caused
|
||||
test 7 to loop, and test 9 to crash with a segfault.
|
||||
|
||||
The issue is the check for duplicate states, which is done by a simple linear
|
||||
search up the state list. (Grep for "duplicate" below to find the code.) For
|
||||
many patterns, there will never be many states active at one time, so a simple
|
||||
linear search is fine. In patterns that have many active states, it might be a
|
||||
bottleneck. The suggested code used an indexing scheme to remember which states
|
||||
had previously been used for each character, and avoided the linear search when
|
||||
it knew there was no chance of a duplicate. This was implemented when adding
|
||||
states to the state lists.
|
||||
|
||||
I wrote some thread-safe, not-limited code to try something similar at the time
|
||||
of checking for duplicates (instead of when adding states), using index vectors
|
||||
on the stack. It did give a 13% improvement with one specially constructed
|
||||
pattern for certain subject strings, but on other strings and on many of the
|
||||
simpler patterns in the test suite it did worse. The major problem, I think,
|
||||
was the extra time to initialize the index. This had to be done for each call
|
||||
of internal_dfa_match(). (The supplied patch used a static vector, initialized
|
||||
only once - I suspect this was the cause of the problems with the tests.)
|
||||
|
||||
Overall, I concluded that the gains in some cases did not outweigh the losses
|
||||
in others, so I abandoned this code. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#define NLBLOCK mb /* Block containing newline information */
|
||||
#define PSSTART start_subject /* Field containing processed string start */
|
||||
#define PSEND end_subject /* Field containing processed string end */
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#define PUBLIC_DFA_MATCH_OPTIONS \
|
||||
(PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
|
||||
PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
|
||||
PCRE2_PARTIAL_SOFT|PCRE2_DFA_SHORTEST|PCRE2_DFA_RESTART| \
|
||||
PCRE2_COPY_MATCHED_SUBJECT)
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Code parameters and static tables *
|
||||
*************************************************/
|
||||
|
||||
/* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
|
||||
into others, under special conditions. A gap of 20 between the blocks should be
|
||||
enough. The resulting opcodes don't have to be less than 256 because they are
|
||||
never stored, so we push them well clear of the normal opcodes. */
|
||||
|
||||
#define OP_PROP_EXTRA 300
|
||||
#define OP_EXTUNI_EXTRA 320
|
||||
#define OP_ANYNL_EXTRA 340
|
||||
#define OP_HSPACE_EXTRA 360
|
||||
#define OP_VSPACE_EXTRA 380
|
||||
|
||||
|
||||
/* This table identifies those opcodes that are followed immediately by a
|
||||
character that is to be tested in some way. This makes it possible to
|
||||
centralize the loading of these characters. In the case of Type * etc, the
|
||||
"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
|
||||
small value. Non-zero values in the table are the offsets from the opcode where
|
||||
the character is to be found. ***NOTE*** If the start of this table is
|
||||
modified, the three tables that follow must also be modified. */
|
||||
|
||||
static const uint8_t coptable[] = {
|
||||
0, /* End */
|
||||
0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */
|
||||
0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */
|
||||
0, 0, 0, /* Any, AllAny, Anybyte */
|
||||
0, 0, /* \P, \p */
|
||||
0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
|
||||
0, /* \X */
|
||||
0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
|
||||
1, /* Char */
|
||||
1, /* Chari */
|
||||
1, /* not */
|
||||
1, /* noti */
|
||||
/* Positive single-char repeats */
|
||||
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
|
||||
1+IMM2_SIZE, 1+IMM2_SIZE, /* upto, minupto */
|
||||
1+IMM2_SIZE, /* exact */
|
||||
1, 1, 1, 1+IMM2_SIZE, /* *+, ++, ?+, upto+ */
|
||||
1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */
|
||||
1+IMM2_SIZE, 1+IMM2_SIZE, /* upto I, minupto I */
|
||||
1+IMM2_SIZE, /* exact I */
|
||||
1, 1, 1, 1+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */
|
||||
/* Negative single-char repeats - only for chars < 256 */
|
||||
1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
|
||||
1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto, minupto */
|
||||
1+IMM2_SIZE, /* NOT exact */
|
||||
1, 1, 1, 1+IMM2_SIZE, /* NOT *+, ++, ?+, upto+ */
|
||||
1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */
|
||||
1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto I, minupto I */
|
||||
1+IMM2_SIZE, /* NOT exact I */
|
||||
1, 1, 1, 1+IMM2_SIZE, /* NOT *+I, ++I, ?+I, upto+I */
|
||||
/* Positive type repeats */
|
||||
1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
|
||||
1+IMM2_SIZE, 1+IMM2_SIZE, /* Type upto, minupto */
|
||||
1+IMM2_SIZE, /* Type exact */
|
||||
1, 1, 1, 1+IMM2_SIZE, /* Type *+, ++, ?+, upto+ */
|
||||
/* Character class & ref repeats */
|
||||
0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */
|
||||
0, 0, /* CRRANGE, CRMINRANGE */
|
||||
0, 0, 0, 0, /* Possessive *+, ++, ?+, CRPOSRANGE */
|
||||
0, /* CLASS */
|
||||
0, /* NCLASS */
|
||||
0, /* XCLASS - variable length */
|
||||
0, /* ECLASS - variable length */
|
||||
0, /* REF */
|
||||
0, /* REFI */
|
||||
0, /* DNREF */
|
||||
0, /* DNREFI */
|
||||
0, /* RECURSE */
|
||||
0, /* CALLOUT */
|
||||
0, /* CALLOUT_STR */
|
||||
0, /* Alt */
|
||||
0, /* Ket */
|
||||
0, /* KetRmax */
|
||||
0, /* KetRmin */
|
||||
0, /* KetRpos */
|
||||
0, 0, /* Reverse, Vreverse */
|
||||
0, /* Assert */
|
||||
0, /* Assert not */
|
||||
0, /* Assert behind */
|
||||
0, /* Assert behind not */
|
||||
0, /* NA assert */
|
||||
0, /* NA assert behind */
|
||||
0, /* Assert scan substring */
|
||||
0, /* ONCE */
|
||||
0, /* SCRIPT_RUN */
|
||||
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
|
||||
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
|
||||
0, 0, /* CREF, DNCREF */
|
||||
0, 0, /* RREF, DNRREF */
|
||||
0, 0, /* FALSE, TRUE */
|
||||
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
|
||||
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
|
||||
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
|
||||
0, 0, /* COMMIT, COMMIT_ARG */
|
||||
0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */
|
||||
0, 0, 0, /* CLOSE, SKIPZERO, DEFINE */
|
||||
0, 0, /* \B and \b in UCP mode */
|
||||
};
|
||||
|
||||
/* This table identifies those opcodes that inspect a character. It is used to
|
||||
remember the fact that a character could have been inspected when the end of
|
||||
the subject is reached. ***NOTE*** If the start of this table is modified, the
|
||||
two tables that follow must also be modified. */
|
||||
|
||||
static const uint8_t poptable[] = {
|
||||
0, /* End */
|
||||
0, 0, 0, 1, 1, /* \A, \G, \K, \B, \b */
|
||||
1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */
|
||||
1, 1, 1, /* Any, AllAny, Anybyte */
|
||||
1, 1, /* \P, \p */
|
||||
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */
|
||||
1, /* \X */
|
||||
0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
|
||||
1, /* Char */
|
||||
1, /* Chari */
|
||||
1, /* not */
|
||||
1, /* noti */
|
||||
/* Positive single-char repeats */
|
||||
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
|
||||
1, 1, 1, /* upto, minupto, exact */
|
||||
1, 1, 1, 1, /* *+, ++, ?+, upto+ */
|
||||
1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */
|
||||
1, 1, 1, /* upto I, minupto I, exact I */
|
||||
1, 1, 1, 1, /* *+I, ++I, ?+I, upto+I */
|
||||
/* Negative single-char repeats - only for chars < 256 */
|
||||
1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
|
||||
1, 1, 1, /* NOT upto, minupto, exact */
|
||||
1, 1, 1, 1, /* NOT *+, ++, ?+, upto+ */
|
||||
1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */
|
||||
1, 1, 1, /* NOT upto I, minupto I, exact I */
|
||||
1, 1, 1, 1, /* NOT *+I, ++I, ?+I, upto+I */
|
||||
/* Positive type repeats */
|
||||
1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
|
||||
1, 1, 1, /* Type upto, minupto, exact */
|
||||
1, 1, 1, 1, /* Type *+, ++, ?+, upto+ */
|
||||
/* Character class & ref repeats */
|
||||
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
|
||||
1, 1, /* CRRANGE, CRMINRANGE */
|
||||
1, 1, 1, 1, /* Possessive *+, ++, ?+, CRPOSRANGE */
|
||||
1, /* CLASS */
|
||||
1, /* NCLASS */
|
||||
1, /* XCLASS - variable length */
|
||||
1, /* ECLASS - variable length */
|
||||
0, /* REF */
|
||||
0, /* REFI */
|
||||
0, /* DNREF */
|
||||
0, /* DNREFI */
|
||||
0, /* RECURSE */
|
||||
0, /* CALLOUT */
|
||||
0, /* CALLOUT_STR */
|
||||
0, /* Alt */
|
||||
0, /* Ket */
|
||||
0, /* KetRmax */
|
||||
0, /* KetRmin */
|
||||
0, /* KetRpos */
|
||||
0, 0, /* Reverse, Vreverse */
|
||||
0, /* Assert */
|
||||
0, /* Assert not */
|
||||
0, /* Assert behind */
|
||||
0, /* Assert behind not */
|
||||
0, /* NA assert */
|
||||
0, /* NA assert behind */
|
||||
0, /* Assert scan substring */
|
||||
0, /* ONCE */
|
||||
0, /* SCRIPT_RUN */
|
||||
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
|
||||
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
|
||||
0, 0, /* CREF, DNCREF */
|
||||
0, 0, /* RREF, DNRREF */
|
||||
0, 0, /* FALSE, TRUE */
|
||||
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
|
||||
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
|
||||
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
|
||||
0, 0, /* COMMIT, COMMIT_ARG */
|
||||
0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */
|
||||
0, 0, 0, /* CLOSE, SKIPZERO, DEFINE */
|
||||
1, 1, /* \B and \b in UCP mode */
|
||||
};
|
||||
|
||||
/* Compile-time check that these tables have the correct size. */
|
||||
STATIC_ASSERT(sizeof(coptable) == OP_TABLE_LENGTH, coptable);
|
||||
STATIC_ASSERT(sizeof(poptable) == OP_TABLE_LENGTH, poptable);
|
||||
|
||||
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
|
||||
and \w */
|
||||
|
||||
static const uint8_t toptable1[] = {
|
||||
0, 0, 0, 0, 0, 0,
|
||||
ctype_digit, ctype_digit,
|
||||
ctype_space, ctype_space,
|
||||
ctype_word, ctype_word,
|
||||
0, 0 /* OP_ANY, OP_ALLANY */
|
||||
};
|
||||
|
||||
static const uint8_t toptable2[] = {
|
||||
0, 0, 0, 0, 0, 0,
|
||||
ctype_digit, 0,
|
||||
ctype_space, 0,
|
||||
ctype_word, 0,
|
||||
1, 1 /* OP_ANY, OP_ALLANY */
|
||||
};
|
||||
|
||||
|
||||
/* Structure for holding data about a particular state, which is in effect the
|
||||
current data for an active path through the match tree. It must consist
|
||||
entirely of ints because the working vector we are passed, and which we put
|
||||
these structures in, is a vector of ints. */
|
||||
|
||||
typedef struct stateblock {
|
||||
int offset; /* Offset to opcode (-ve has meaning) */
|
||||
int count; /* Count for repeats */
|
||||
int data; /* Some use extra data */
|
||||
} stateblock;
|
||||
|
||||
#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))
|
||||
|
||||
|
||||
/* Before version 10.32 the recursive calls of internal_dfa_match() were passed
|
||||
local working space and output vectors that were created on the stack. This has
|
||||
caused issues for some patterns, especially in small-stack environments such as
|
||||
Windows. A new scheme is now in use which sets up a vector on the stack, but if
|
||||
this is too small, heap memory is used, up to the heap_limit. The main
|
||||
parameters are all numbers of ints because the workspace is a vector of ints.
|
||||
|
||||
The size of the starting stack vector, DFA_START_RWS_SIZE, is in bytes, and is
|
||||
defined in pcre2_internal.h so as to be available to pcre2test when it is
|
||||
finding the minimum heap requirement for a match. */
|
||||
|
||||
#define OVEC_UNIT (sizeof(PCRE2_SIZE)/sizeof(int))
|
||||
|
||||
#define RWS_BASE_SIZE (DFA_START_RWS_SIZE/sizeof(int)) /* Stack vector */
|
||||
#define RWS_RSIZE 1000 /* Work size for recursion */
|
||||
#define RWS_OVEC_RSIZE (1000*OVEC_UNIT) /* Ovector for recursion */
|
||||
#define RWS_OVEC_OSIZE (2*OVEC_UNIT) /* Ovector in other cases */
|
||||
|
||||
/* This structure is at the start of each workspace block. */
|
||||
|
||||
typedef struct RWS_anchor {
|
||||
struct RWS_anchor *next;
|
||||
uint32_t size; /* Number of ints */
|
||||
uint32_t free; /* Number of ints */
|
||||
} RWS_anchor;
|
||||
|
||||
#define RWS_ANCHOR_SIZE (sizeof(RWS_anchor)/sizeof(int))
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Process a callout *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called to perform a callout.
|
||||
|
||||
Arguments:
|
||||
code current code pointer
|
||||
offsets points to current capture offsets
|
||||
current_subject start of current subject match
|
||||
ptr current position in subject
|
||||
mb the match block
|
||||
extracode extra code offset when called from condition
|
||||
lengthptr where to return the callout length
|
||||
|
||||
Returns: the return from the callout
|
||||
*/
|
||||
|
||||
static int
|
||||
do_callout_dfa(PCRE2_SPTR code, PCRE2_SIZE *offsets, PCRE2_SPTR current_subject,
|
||||
PCRE2_SPTR ptr, dfa_match_block *mb, PCRE2_SIZE extracode,
|
||||
PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
pcre2_callout_block *cb = mb->cb;
|
||||
|
||||
*lengthptr = (code[extracode] == OP_CALLOUT)?
|
||||
(PCRE2_SIZE)PRIV(OP_lengths)[OP_CALLOUT] :
|
||||
(PCRE2_SIZE)GET(code, 1 + 2*LINK_SIZE + extracode);
|
||||
|
||||
if (mb->callout == NULL) return 0; /* No callout provided */
|
||||
|
||||
/* Fixed fields in the callout block are set once and for all at the start of
|
||||
matching. */
|
||||
|
||||
cb->offset_vector = offsets;
|
||||
cb->start_match = (PCRE2_SIZE)(current_subject - mb->start_subject);
|
||||
cb->current_position = (PCRE2_SIZE)(ptr - mb->start_subject);
|
||||
cb->pattern_position = GET(code, 1 + extracode);
|
||||
cb->next_item_length = GET(code, 1 + LINK_SIZE + extracode);
|
||||
|
||||
if (code[extracode] == OP_CALLOUT)
|
||||
{
|
||||
cb->callout_number = code[1 + 2*LINK_SIZE + extracode];
|
||||
cb->callout_string_offset = 0;
|
||||
cb->callout_string = NULL;
|
||||
cb->callout_string_length = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
cb->callout_number = 0;
|
||||
cb->callout_string_offset = GET(code, 1 + 3*LINK_SIZE + extracode);
|
||||
cb->callout_string = code + (1 + 4*LINK_SIZE + extracode) + 1;
|
||||
cb->callout_string_length = *lengthptr - (1 + 4*LINK_SIZE) - 2;
|
||||
}
|
||||
|
||||
return (mb->callout)(cb, mb->callout_data);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Expand local workspace memory *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called when internal_dfa_match() is about to be called
|
||||
recursively and there is insufficient working space left in the current
|
||||
workspace block. If there's an existing next block, use it; otherwise get a new
|
||||
block unless the heap limit is reached.
|
||||
|
||||
Arguments:
|
||||
rwsptr pointer to block pointer (updated)
|
||||
ovecsize space needed for an ovector
|
||||
mb the match block
|
||||
|
||||
Returns: 0 rwsptr has been updated
|
||||
!0 an error code
|
||||
*/
|
||||
|
||||
static int
|
||||
more_workspace(RWS_anchor **rwsptr, unsigned int ovecsize, dfa_match_block *mb)
|
||||
{
|
||||
RWS_anchor *rws = *rwsptr;
|
||||
RWS_anchor *new;
|
||||
|
||||
if (rws->next != NULL)
|
||||
{
|
||||
new = rws->next;
|
||||
}
|
||||
|
||||
/* Sizes in the RWS_anchor blocks are in units of sizeof(int), but
|
||||
mb->heap_limit and mb->heap_used are in kibibytes. Play carefully, to avoid
|
||||
overflow. */
|
||||
|
||||
else
|
||||
{
|
||||
uint32_t newsize = (rws->size >= UINT32_MAX/(sizeof(int)*2))? UINT32_MAX/sizeof(int) : rws->size * 2;
|
||||
uint32_t newsizeK = newsize/(1024/sizeof(int));
|
||||
|
||||
if (newsizeK + mb->heap_used > mb->heap_limit)
|
||||
newsizeK = (uint32_t)(mb->heap_limit - mb->heap_used);
|
||||
newsize = newsizeK*(1024/sizeof(int));
|
||||
|
||||
if (newsize < RWS_RSIZE + ovecsize + RWS_ANCHOR_SIZE)
|
||||
return PCRE2_ERROR_HEAPLIMIT;
|
||||
new = mb->memctl.malloc(newsize*sizeof(int), mb->memctl.memory_data);
|
||||
if (new == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
mb->heap_used += newsizeK;
|
||||
new->next = NULL;
|
||||
new->size = newsize;
|
||||
rws->next = new;
|
||||
}
|
||||
|
||||
new->free = new->size - RWS_ANCHOR_SIZE;
|
||||
*rwsptr = new;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match a Regular Expression - DFA engine *
|
||||
*************************************************/
|
||||
|
||||
/* This internal function applies a compiled pattern to a subject string,
|
||||
starting at a given point, using a DFA engine. This function is called from the
|
||||
external one, possibly multiple times if the pattern is not anchored. The
|
||||
function calls itself recursively for some kinds of subpattern.
|
||||
|
||||
Arguments:
|
||||
mb the match_data block with fixed information
|
||||
this_start_code the opening bracket of this subexpression's code
|
||||
current_subject where we currently are in the subject string
|
||||
start_offset start offset in the subject string
|
||||
offsets vector to contain the matching string offsets
|
||||
offsetcount size of same
|
||||
workspace vector of workspace
|
||||
wscount size of same
|
||||
rlevel function call recursion level
|
||||
|
||||
Returns: > 0 => number of match offset pairs placed in offsets
|
||||
= 0 => offsets overflowed; longest matches are present
|
||||
-1 => failed to match
|
||||
< -1 => some kind of unexpected problem
|
||||
|
||||
The following macros are used for adding states to the two state vectors (one
|
||||
for the current character, one for the following character). */
|
||||
|
||||
#define ADD_ACTIVE(x,y) \
|
||||
if (active_count++ < wscount) \
|
||||
{ \
|
||||
next_active_state->offset = (x); \
|
||||
next_active_state->count = (y); \
|
||||
next_active_state++; \
|
||||
} \
|
||||
else return PCRE2_ERROR_DFA_WSSIZE
|
||||
|
||||
#define ADD_ACTIVE_DATA(x,y,z) \
|
||||
if (active_count++ < wscount) \
|
||||
{ \
|
||||
next_active_state->offset = (x); \
|
||||
next_active_state->count = (y); \
|
||||
next_active_state->data = (z); \
|
||||
next_active_state++; \
|
||||
} \
|
||||
else return PCRE2_ERROR_DFA_WSSIZE
|
||||
|
||||
#define ADD_NEW(x,y) \
|
||||
if (new_count++ < wscount) \
|
||||
{ \
|
||||
next_new_state->offset = (x); \
|
||||
next_new_state->count = (y); \
|
||||
next_new_state++; \
|
||||
} \
|
||||
else return PCRE2_ERROR_DFA_WSSIZE
|
||||
|
||||
#define ADD_NEW_DATA(x,y,z) \
|
||||
if (new_count++ < wscount) \
|
||||
{ \
|
||||
next_new_state->offset = (x); \
|
||||
next_new_state->count = (y); \
|
||||
next_new_state->data = (z); \
|
||||
next_new_state++; \
|
||||
} \
|
||||
else return PCRE2_ERROR_DFA_WSSIZE
|
||||
|
||||
/* And now, here is the code */
|
||||
|
||||
static int
|
||||
internal_dfa_match(
|
||||
dfa_match_block *mb,
|
||||
PCRE2_SPTR this_start_code,
|
||||
PCRE2_SPTR current_subject,
|
||||
PCRE2_SIZE start_offset,
|
||||
PCRE2_SIZE *offsets,
|
||||
uint32_t offsetcount,
|
||||
int *workspace,
|
||||
int wscount,
|
||||
uint32_t rlevel,
|
||||
int *RWS)
|
||||
{
|
||||
stateblock *active_states, *new_states, *temp_states;
|
||||
stateblock *next_active_state, *next_new_state;
|
||||
const uint8_t *ctypes, *lcc, *fcc;
|
||||
PCRE2_SPTR ptr;
|
||||
PCRE2_SPTR end_code;
|
||||
dfa_recursion_info new_recursive;
|
||||
int active_count, new_count, match_count;
|
||||
|
||||
/* Some fields in the mb block are frequently referenced, so we load them into
|
||||
independent variables in the hope that this will perform better. */
|
||||
|
||||
PCRE2_SPTR start_subject = mb->start_subject;
|
||||
PCRE2_SPTR end_subject = mb->end_subject;
|
||||
PCRE2_SPTR start_code = mb->start_code;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
|
||||
BOOL utf_or_ucp = utf || (mb->poptions & PCRE2_UCP) != 0;
|
||||
#else
|
||||
BOOL utf = FALSE;
|
||||
#endif
|
||||
|
||||
BOOL reset_could_continue = FALSE;
|
||||
|
||||
if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
|
||||
if (rlevel++ > mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;
|
||||
offsetcount &= (uint32_t)(-2); /* Round down */
|
||||
|
||||
wscount -= 2;
|
||||
wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
|
||||
(2 * INTS_PER_STATEBLOCK);
|
||||
|
||||
ctypes = mb->tables + ctypes_offset;
|
||||
lcc = mb->tables + lcc_offset;
|
||||
fcc = mb->tables + fcc_offset;
|
||||
|
||||
match_count = PCRE2_ERROR_NOMATCH; /* A negative number */
|
||||
|
||||
active_states = (stateblock *)(workspace + 2);
|
||||
next_new_state = new_states = active_states + wscount;
|
||||
new_count = 0;
|
||||
|
||||
/* The first thing in any (sub) pattern is a bracket of some sort. Push all
|
||||
the alternative states onto the list, and find out where the end is. This
|
||||
makes is possible to use this function recursively, when we want to stop at a
|
||||
matching internal ket rather than at the end.
|
||||
|
||||
If we are dealing with a backward assertion we have to find out the maximum
|
||||
amount to move back, and set up each alternative appropriately. */
|
||||
|
||||
if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT)
|
||||
{
|
||||
size_t max_back = 0;
|
||||
size_t gone_back;
|
||||
|
||||
end_code = this_start_code;
|
||||
do
|
||||
{
|
||||
size_t back = (size_t)GET2(end_code, 2+LINK_SIZE);
|
||||
if (back > max_back) max_back = back;
|
||||
end_code += GET(end_code, 1);
|
||||
}
|
||||
while (*end_code == OP_ALT);
|
||||
|
||||
/* If we can't go back the amount required for the longest lookbehind
|
||||
pattern, go back as far as we can; some alternatives may still be viable. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/* In character mode we have to step back character by character */
|
||||
|
||||
if (utf)
|
||||
{
|
||||
for (gone_back = 0; gone_back < max_back; gone_back++)
|
||||
{
|
||||
if (current_subject <= start_subject) break;
|
||||
current_subject--;
|
||||
ACROSSCHAR(current_subject > start_subject, current_subject,
|
||||
current_subject--);
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
||||
/* In byte-mode we can do this quickly. */
|
||||
|
||||
{
|
||||
size_t current_offset = (size_t)(current_subject - start_subject);
|
||||
gone_back = (current_offset < max_back)? current_offset : max_back;
|
||||
current_subject -= gone_back;
|
||||
}
|
||||
|
||||
/* Save the earliest consulted character */
|
||||
|
||||
if (current_subject < mb->start_used_ptr)
|
||||
mb->start_used_ptr = current_subject;
|
||||
|
||||
/* Now we can process the individual branches. There will be an OP_REVERSE at
|
||||
the start of each branch, except when the length of the branch is zero. */
|
||||
|
||||
end_code = this_start_code;
|
||||
do
|
||||
{
|
||||
uint32_t revlen = (end_code[1+LINK_SIZE] == OP_REVERSE)? 1 + IMM2_SIZE : 0;
|
||||
size_t back = (revlen == 0)? 0 : (size_t)GET2(end_code, 2+LINK_SIZE);
|
||||
if (back <= gone_back)
|
||||
{
|
||||
int bstate = (int)(end_code - start_code + 1 + LINK_SIZE + revlen);
|
||||
ADD_NEW_DATA(-bstate, 0, (int)(gone_back - back));
|
||||
}
|
||||
end_code += GET(end_code, 1);
|
||||
}
|
||||
while (*end_code == OP_ALT);
|
||||
}
|
||||
|
||||
/* This is the code for a "normal" subpattern (not a backward assertion). The
|
||||
start of a whole pattern is always one of these. If we are at the top level,
|
||||
we may be asked to restart matching from the same point that we reached for a
|
||||
previous partial match. We still have to scan through the top-level branches to
|
||||
find the end state. */
|
||||
|
||||
else
|
||||
{
|
||||
end_code = this_start_code;
|
||||
|
||||
/* Restarting */
|
||||
|
||||
if (rlevel == 1 && (mb->moptions & PCRE2_DFA_RESTART) != 0)
|
||||
{
|
||||
do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
|
||||
new_count = workspace[1];
|
||||
if (!workspace[0])
|
||||
memcpy(new_states, active_states, (size_t)new_count * sizeof(stateblock));
|
||||
}
|
||||
|
||||
/* Not restarting */
|
||||
|
||||
else
|
||||
{
|
||||
int length = 1 + LINK_SIZE +
|
||||
((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
|
||||
*this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
|
||||
? IMM2_SIZE:0);
|
||||
do
|
||||
{
|
||||
ADD_NEW((int)(end_code - start_code + length), 0);
|
||||
end_code += GET(end_code, 1);
|
||||
length = 1 + LINK_SIZE;
|
||||
}
|
||||
while (*end_code == OP_ALT);
|
||||
}
|
||||
}
|
||||
|
||||
workspace[0] = 0; /* Bit indicating which vector is current */
|
||||
|
||||
/* Loop for scanning the subject */
|
||||
|
||||
ptr = current_subject;
|
||||
for (;;)
|
||||
{
|
||||
int i, j;
|
||||
int clen, dlen;
|
||||
uint32_t c, d;
|
||||
BOOL partial_newline = FALSE;
|
||||
BOOL could_continue = reset_could_continue;
|
||||
reset_could_continue = FALSE;
|
||||
|
||||
if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr;
|
||||
|
||||
/* Make the new state list into the active state list and empty the
|
||||
new state list. */
|
||||
|
||||
temp_states = active_states;
|
||||
active_states = new_states;
|
||||
new_states = temp_states;
|
||||
active_count = new_count;
|
||||
new_count = 0;
|
||||
|
||||
workspace[0] ^= 1; /* Remember for the restarting feature */
|
||||
workspace[1] = active_count;
|
||||
|
||||
/* Set the pointers for adding new states */
|
||||
|
||||
next_active_state = active_states + active_count;
|
||||
next_new_state = new_states;
|
||||
|
||||
/* Load the current character from the subject outside the loop, as many
|
||||
different states may want to look at it, and we assume that at least one
|
||||
will. */
|
||||
|
||||
if (ptr < end_subject)
|
||||
{
|
||||
clen = 1; /* Number of data items in the character */
|
||||
#ifdef SUPPORT_UNICODE
|
||||
GETCHARLENTEST(c, ptr, clen);
|
||||
#else
|
||||
c = *ptr;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
else
|
||||
{
|
||||
clen = 0; /* This indicates the end of the subject */
|
||||
c = NOTACHAR; /* This value should never actually be used */
|
||||
}
|
||||
|
||||
/* Scan up the active states and act on each one. The result of an action
|
||||
may be to add more states to the currently active list (e.g. on hitting a
|
||||
parenthesis) or it may be to put states on the new list, for considering
|
||||
when we move the character pointer on. */
|
||||
|
||||
for (i = 0; i < active_count; i++)
|
||||
{
|
||||
stateblock *current_state = active_states + i;
|
||||
BOOL caseless = FALSE;
|
||||
PCRE2_SPTR code;
|
||||
uint32_t codevalue;
|
||||
int state_offset = current_state->offset;
|
||||
int rrc;
|
||||
int count;
|
||||
|
||||
/* A negative offset is a special case meaning "hold off going to this
|
||||
(negated) state until the number of characters in the data field have
|
||||
been skipped". If the could_continue flag was passed over from a previous
|
||||
state, arrange for it to passed on. */
|
||||
|
||||
if (state_offset < 0)
|
||||
{
|
||||
if (current_state->data > 0)
|
||||
{
|
||||
ADD_NEW_DATA(state_offset, current_state->count,
|
||||
current_state->data - 1);
|
||||
if (could_continue) reset_could_continue = TRUE;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
current_state->offset = state_offset = -state_offset;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check for a duplicate state with the same count, and skip if found.
|
||||
See the note at the head of this module about the possibility of improving
|
||||
performance here. */
|
||||
|
||||
for (j = 0; j < i; j++)
|
||||
{
|
||||
if (active_states[j].offset == state_offset &&
|
||||
active_states[j].count == current_state->count)
|
||||
goto NEXT_ACTIVE_STATE;
|
||||
}
|
||||
|
||||
/* The state offset is the offset to the opcode */
|
||||
|
||||
code = start_code + state_offset;
|
||||
codevalue = *code;
|
||||
|
||||
/* If this opcode inspects a character, but we are at the end of the
|
||||
subject, remember the fact for use when testing for a partial match. */
|
||||
|
||||
if (clen == 0 && poptable[codevalue] != 0)
|
||||
could_continue = TRUE;
|
||||
|
||||
/* If this opcode is followed by an inline character, load it. It is
|
||||
tempting to test for the presence of a subject character here, but that
|
||||
is wrong, because sometimes zero repetitions of the subject are
|
||||
permitted.
|
||||
|
||||
We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
|
||||
argument that is not a data character - but is always one byte long because
|
||||
the values are small. We have to take special action to deal with \P, \p,
|
||||
\H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
|
||||
these ones to new opcodes. */
|
||||
|
||||
if (coptable[codevalue] > 0)
|
||||
{
|
||||
dlen = 1;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
d = code[coptable[codevalue]];
|
||||
if (codevalue >= OP_TYPESTAR)
|
||||
{
|
||||
switch(d)
|
||||
{
|
||||
case OP_ANYBYTE: return PCRE2_ERROR_DFA_UITEM;
|
||||
case OP_NOTPROP:
|
||||
case OP_PROP: codevalue += OP_PROP_EXTRA; break;
|
||||
case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
|
||||
case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
|
||||
case OP_NOT_HSPACE:
|
||||
case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
|
||||
case OP_NOT_VSPACE:
|
||||
case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
dlen = 0; /* Not strictly necessary, but compilers moan */
|
||||
d = NOTACHAR; /* if these variables are not set. */
|
||||
}
|
||||
|
||||
|
||||
/* Now process the individual opcodes */
|
||||
|
||||
switch (codevalue)
|
||||
{
|
||||
/* ========================================================================== */
|
||||
/* Reached a closing bracket. If not at the end of the pattern, carry
|
||||
on with the next opcode. For repeating opcodes, also add the repeat
|
||||
state. Note that KETRPOS will always be encountered at the end of the
|
||||
subpattern, because the possessive subpattern repeats are always handled
|
||||
using recursive calls. Thus, it never adds any new states.
|
||||
|
||||
At the end of the (sub)pattern, unless we have an empty string and
|
||||
PCRE2_NOTEMPTY is set, or PCRE2_NOTEMPTY_ATSTART is set and we are at the
|
||||
start of the subject, save the match data, shifting up all previous
|
||||
matches so we always have the longest first. */
|
||||
|
||||
case OP_KET:
|
||||
case OP_KETRMIN:
|
||||
case OP_KETRMAX:
|
||||
case OP_KETRPOS:
|
||||
if (code != end_code)
|
||||
{
|
||||
ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
|
||||
if (codevalue != OP_KET)
|
||||
{
|
||||
ADD_ACTIVE(state_offset - (int)GET(code, 1), 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ptr > current_subject ||
|
||||
((mb->moptions & PCRE2_NOTEMPTY) == 0 &&
|
||||
((mb->moptions & PCRE2_NOTEMPTY_ATSTART) == 0 ||
|
||||
current_subject > start_subject + mb->start_offset)))
|
||||
{
|
||||
if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
|
||||
else if (match_count > 0 && ++match_count * 2 > (int)offsetcount)
|
||||
match_count = 0;
|
||||
count = ((match_count == 0)? (int)offsetcount : match_count * 2) - 2;
|
||||
if (count > 0) (void)memmove(offsets + 2, offsets,
|
||||
(size_t)count * sizeof(PCRE2_SIZE));
|
||||
if (offsetcount >= 2)
|
||||
{
|
||||
offsets[0] = (PCRE2_SIZE)(current_subject - start_subject);
|
||||
offsets[1] = (PCRE2_SIZE)(ptr - start_subject);
|
||||
}
|
||||
if ((mb->moptions & PCRE2_DFA_SHORTEST) != 0) return match_count;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/* ========================================================================== */
|
||||
/* These opcodes add to the current list of states without looking
|
||||
at the current character. */
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ALT:
|
||||
do { code += GET(code, 1); } while (*code == OP_ALT);
|
||||
ADD_ACTIVE((int)(code - start_code), 0);
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_BRA:
|
||||
case OP_SBRA:
|
||||
do
|
||||
{
|
||||
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
|
||||
code += GET(code, 1);
|
||||
}
|
||||
while (*code == OP_ALT);
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_CBRA:
|
||||
case OP_SCBRA:
|
||||
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE), 0);
|
||||
code += GET(code, 1);
|
||||
while (*code == OP_ALT)
|
||||
{
|
||||
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
|
||||
code += GET(code, 1);
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_BRAZERO:
|
||||
case OP_BRAMINZERO:
|
||||
ADD_ACTIVE(state_offset + 1, 0);
|
||||
code += 1 + GET(code, 2);
|
||||
while (*code == OP_ALT) code += GET(code, 1);
|
||||
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_SKIPZERO:
|
||||
code += 1 + GET(code, 2);
|
||||
while (*code == OP_ALT) code += GET(code, 1);
|
||||
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_CIRC:
|
||||
if (ptr == start_subject && (mb->moptions & PCRE2_NOTBOL) == 0)
|
||||
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_CIRCM:
|
||||
if ((ptr == start_subject && (mb->moptions & PCRE2_NOTBOL) == 0) ||
|
||||
((ptr != end_subject || (mb->poptions & PCRE2_ALT_CIRCUMFLEX) != 0 )
|
||||
&& WAS_NEWLINE(ptr)))
|
||||
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_EOD:
|
||||
if (ptr >= end_subject)
|
||||
{
|
||||
if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
return PCRE2_ERROR_PARTIAL;
|
||||
else { ADD_ACTIVE(state_offset + 1, 0); }
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_SOD:
|
||||
if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_SOM:
|
||||
if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
|
||||
break;
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* These opcodes inspect the next subject character, and sometimes
|
||||
the previous one as well, but do not have an argument. The variable
|
||||
clen contains the length of the current character and is zero if we are
|
||||
at the end of the subject. */
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ANY:
|
||||
if (clen > 0 && !IS_NEWLINE(ptr))
|
||||
{
|
||||
if (ptr + 1 >= mb->end_subject &&
|
||||
(mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
|
||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||
NLBLOCK->nllen == 2 &&
|
||||
c == NLBLOCK->nl[0])
|
||||
{
|
||||
could_continue = partial_newline = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ALLANY:
|
||||
if (clen > 0)
|
||||
{ ADD_NEW(state_offset + 1, 0); }
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_EODN:
|
||||
if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - mb->nllen))
|
||||
{
|
||||
if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
return PCRE2_ERROR_PARTIAL;
|
||||
ADD_ACTIVE(state_offset + 1, 0);
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_DOLL:
|
||||
if ((mb->moptions & PCRE2_NOTEOL) == 0)
|
||||
{
|
||||
if (clen == 0 && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
could_continue = TRUE;
|
||||
else if (clen == 0 ||
|
||||
((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
|
||||
(ptr == end_subject - mb->nllen)
|
||||
))
|
||||
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||
else if (ptr + 1 >= mb->end_subject &&
|
||||
(mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 &&
|
||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||
NLBLOCK->nllen == 2 &&
|
||||
c == NLBLOCK->nl[0])
|
||||
{
|
||||
if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
{
|
||||
reset_could_continue = TRUE;
|
||||
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
|
||||
}
|
||||
else could_continue = partial_newline = TRUE;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_DOLLM:
|
||||
if ((mb->moptions & PCRE2_NOTEOL) == 0)
|
||||
{
|
||||
if (clen == 0 && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
could_continue = TRUE;
|
||||
else if (clen == 0 ||
|
||||
((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
|
||||
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||
else if (ptr + 1 >= mb->end_subject &&
|
||||
(mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 &&
|
||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||
NLBLOCK->nllen == 2 &&
|
||||
c == NLBLOCK->nl[0])
|
||||
{
|
||||
if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
{
|
||||
reset_could_continue = TRUE;
|
||||
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
|
||||
}
|
||||
else could_continue = partial_newline = TRUE;
|
||||
}
|
||||
}
|
||||
else if (IS_NEWLINE(ptr))
|
||||
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
|
||||
case OP_DIGIT:
|
||||
case OP_WHITESPACE:
|
||||
case OP_WORDCHAR:
|
||||
if (clen > 0 && c < 256 &&
|
||||
((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
|
||||
{ ADD_NEW(state_offset + 1, 0); }
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_NOT_DIGIT:
|
||||
case OP_NOT_WHITESPACE:
|
||||
case OP_NOT_WORDCHAR:
|
||||
if (clen > 0 && (c >= 256 ||
|
||||
((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
|
||||
{ ADD_NEW(state_offset + 1, 0); }
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_WORD_BOUNDARY:
|
||||
case OP_NOT_WORD_BOUNDARY:
|
||||
case OP_NOT_UCP_WORD_BOUNDARY:
|
||||
case OP_UCP_WORD_BOUNDARY:
|
||||
{
|
||||
int left_word, right_word;
|
||||
|
||||
if (ptr > start_subject)
|
||||
{
|
||||
PCRE2_SPTR temp = ptr - 1;
|
||||
if (temp < mb->start_used_ptr) mb->start_used_ptr = temp;
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (utf) { BACKCHAR(temp); }
|
||||
#endif
|
||||
GETCHARTEST(d, temp);
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (codevalue == OP_UCP_WORD_BOUNDARY ||
|
||||
codevalue == OP_NOT_UCP_WORD_BOUNDARY)
|
||||
{
|
||||
int chartype = UCD_CHARTYPE(d);
|
||||
int category = PRIV(ucp_gentype)[chartype];
|
||||
left_word = (category == ucp_L || category == ucp_N ||
|
||||
chartype == ucp_Mn || chartype == ucp_Pc);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
|
||||
}
|
||||
else left_word = FALSE;
|
||||
|
||||
if (clen > 0)
|
||||
{
|
||||
if (ptr >= mb->last_used_ptr)
|
||||
{
|
||||
PCRE2_SPTR temp = ptr + 1;
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (utf) { FORWARDCHARTEST(temp, mb->end_subject); }
|
||||
#endif
|
||||
mb->last_used_ptr = temp;
|
||||
}
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (codevalue == OP_UCP_WORD_BOUNDARY ||
|
||||
codevalue == OP_NOT_UCP_WORD_BOUNDARY)
|
||||
{
|
||||
int chartype = UCD_CHARTYPE(c);
|
||||
int category = PRIV(ucp_gentype)[chartype];
|
||||
right_word = (category == ucp_L || category == ucp_N ||
|
||||
chartype == ucp_Mn || chartype == ucp_Pc);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
|
||||
}
|
||||
else right_word = FALSE;
|
||||
|
||||
if ((left_word == right_word) ==
|
||||
(codevalue == OP_NOT_WORD_BOUNDARY ||
|
||||
codevalue == OP_NOT_UCP_WORD_BOUNDARY))
|
||||
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
/* Check the next character by Unicode property. We will get here only
|
||||
if the support is in the binary; otherwise a compile-time error occurs.
|
||||
*/
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case OP_PROP:
|
||||
case OP_NOTPROP:
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
int chartype;
|
||||
const uint32_t *cp;
|
||||
const ucd_record * prop = GET_UCD(c);
|
||||
switch(code[1])
|
||||
{
|
||||
case PT_LAMP:
|
||||
chartype = prop->chartype;
|
||||
OK = chartype == ucp_Lu || chartype == ucp_Ll ||
|
||||
chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
OK = prop->chartype == code[2];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
OK = prop->script == code[2];
|
||||
break;
|
||||
|
||||
case PT_SCX:
|
||||
OK = (prop->script == code[2] ||
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[2]) != 0);
|
||||
break;
|
||||
|
||||
/* These are specials for combination cases. */
|
||||
|
||||
case PT_ALNUM:
|
||||
chartype = prop->chartype;
|
||||
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N;
|
||||
break;
|
||||
|
||||
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||
which means that Perl space and POSIX space are now identical. PCRE
|
||||
was changed at release 8.34. */
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
chartype = prop->chartype;
|
||||
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||
chartype == ucp_Mn || chartype == ucp_Pc;
|
||||
break;
|
||||
|
||||
case PT_CLIST:
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (c > MAX_UTF_CODE_POINT)
|
||||
{
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
cp = PRIV(ucd_caseless_sets) + code[2];
|
||||
for (;;)
|
||||
{
|
||||
if (c < *cp) { OK = FALSE; break; }
|
||||
if (c == *cp++) { OK = TRUE; break; }
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_UCNC:
|
||||
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
||||
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
|
||||
c >= 0xe000;
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
OK = UCD_BIDICLASS(c) == code[2];
|
||||
break;
|
||||
|
||||
case PT_BOOL:
|
||||
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||
UCD_BPROPS_PROP(prop), code[2]) != 0;
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
|
||||
default:
|
||||
OK = codevalue != OP_PROP;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* These opcodes likewise inspect the subject character, but have an
|
||||
argument that is not a data character. It is one of these opcodes:
|
||||
OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
|
||||
OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
|
||||
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||
if (clen > 0)
|
||||
{
|
||||
if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
|
||||
(mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
|
||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||
NLBLOCK->nllen == 2 &&
|
||||
c == NLBLOCK->nl[0])
|
||||
{
|
||||
could_continue = partial_newline = TRUE;
|
||||
}
|
||||
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||
(c < 256 &&
|
||||
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||
{
|
||||
if (count > 0 && codevalue == OP_TYPEPOSPLUS)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
count++;
|
||||
ADD_NEW(state_offset, count);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSQUERY:
|
||||
ADD_ACTIVE(state_offset + 2, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
|
||||
(mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
|
||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||
NLBLOCK->nllen == 2 &&
|
||||
c == NLBLOCK->nl[0])
|
||||
{
|
||||
could_continue = partial_newline = TRUE;
|
||||
}
|
||||
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||
(c < 256 &&
|
||||
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||
{
|
||||
if (codevalue == OP_TYPEPOSQUERY)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW(state_offset + 2, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPOSSTAR:
|
||||
ADD_ACTIVE(state_offset + 2, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
|
||||
(mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
|
||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||
NLBLOCK->nllen == 2 &&
|
||||
c == NLBLOCK->nl[0])
|
||||
{
|
||||
could_continue = partial_newline = TRUE;
|
||||
}
|
||||
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||
(c < 256 &&
|
||||
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||
{
|
||||
if (codevalue == OP_TYPEPOSSTAR)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW(state_offset, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_TYPEEXACT:
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
|
||||
(mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
|
||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||
NLBLOCK->nllen == 2 &&
|
||||
c == NLBLOCK->nl[0])
|
||||
{
|
||||
could_continue = partial_newline = TRUE;
|
||||
}
|
||||
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||
(c < 256 &&
|
||||
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||
{
|
||||
if (++count >= (int)GET2(code, 1))
|
||||
{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
|
||||
else
|
||||
{ ADD_NEW(state_offset, count); }
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEPOSUPTO:
|
||||
ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
|
||||
(mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
|
||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||
NLBLOCK->nllen == 2 &&
|
||||
c == NLBLOCK->nl[0])
|
||||
{
|
||||
could_continue = partial_newline = TRUE;
|
||||
}
|
||||
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||
(c < 256 &&
|
||||
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||
{
|
||||
if (codevalue == OP_TYPEPOSUPTO)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
if (++count >= (int)GET2(code, 1))
|
||||
{ ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
|
||||
else
|
||||
{ ADD_NEW(state_offset, count); }
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/* ========================================================================== */
|
||||
/* These are virtual opcodes that are used when something like
|
||||
OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
|
||||
argument. It keeps the code above fast for the other cases. The argument
|
||||
is in the d variable. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case OP_PROP_EXTRA + OP_TYPEPLUS:
|
||||
case OP_PROP_EXTRA + OP_TYPEMINPLUS:
|
||||
case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
int chartype;
|
||||
const uint32_t *cp;
|
||||
const ucd_record * prop = GET_UCD(c);
|
||||
switch(code[2])
|
||||
{
|
||||
case PT_LAMP:
|
||||
chartype = prop->chartype;
|
||||
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
OK = prop->chartype == code[3];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
OK = prop->script == code[3];
|
||||
break;
|
||||
|
||||
case PT_SCX:
|
||||
OK = (prop->script == code[3] ||
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0);
|
||||
break;
|
||||
|
||||
/* These are specials for combination cases. */
|
||||
|
||||
case PT_ALNUM:
|
||||
chartype = prop->chartype;
|
||||
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N;
|
||||
break;
|
||||
|
||||
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||
which means that Perl space and POSIX space are now identical. PCRE
|
||||
was changed at release 8.34. */
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
chartype = prop->chartype;
|
||||
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||
chartype == ucp_Mn || chartype == ucp_Pc;
|
||||
break;
|
||||
|
||||
case PT_CLIST:
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (c > MAX_UTF_CODE_POINT)
|
||||
{
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
cp = PRIV(ucd_caseless_sets) + code[3];
|
||||
for (;;)
|
||||
{
|
||||
if (c < *cp) { OK = FALSE; break; }
|
||||
if (c == *cp++) { OK = TRUE; break; }
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_UCNC:
|
||||
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
||||
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
|
||||
c >= 0xe000;
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
OK = UCD_BIDICLASS(c) == code[3];
|
||||
break;
|
||||
|
||||
case PT_BOOL:
|
||||
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||
UCD_BPROPS_PROP(prop), code[3]) != 0;
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
|
||||
default:
|
||||
OK = codevalue != OP_PROP;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_PROP))
|
||||
{
|
||||
if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
count++;
|
||||
ADD_NEW(state_offset, count);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||
if (clen > 0)
|
||||
{
|
||||
int ncount = 0;
|
||||
if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
(void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
|
||||
&ncount);
|
||||
count++;
|
||||
ADD_NEW_DATA(-state_offset, count, ncount);
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ANYNL_EXTRA + OP_TYPEPLUS:
|
||||
case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
|
||||
case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||
if (clen > 0)
|
||||
{
|
||||
int ncount = 0;
|
||||
switch (c)
|
||||
{
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
case CHAR_NEL:
|
||||
#ifndef EBCDIC
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
#endif /* Not EBCDIC */
|
||||
if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
|
||||
goto ANYNL01;
|
||||
|
||||
case CHAR_CR:
|
||||
if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
|
||||
/* Fall through */
|
||||
|
||||
ANYNL01:
|
||||
case CHAR_LF:
|
||||
if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
count++;
|
||||
ADD_NEW_DATA(-state_offset, count, ncount);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_VSPACE_EXTRA + OP_TYPEPLUS:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
VSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_VSPACE))
|
||||
{
|
||||
if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
count++;
|
||||
ADD_NEW_DATA(-state_offset, count, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_HSPACE_EXTRA + OP_TYPEPLUS:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_HSPACE))
|
||||
{
|
||||
if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
count++;
|
||||
ADD_NEW_DATA(-state_offset, count, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case OP_PROP_EXTRA + OP_TYPEQUERY:
|
||||
case OP_PROP_EXTRA + OP_TYPEMINQUERY:
|
||||
case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
|
||||
count = 4;
|
||||
goto QS1;
|
||||
|
||||
case OP_PROP_EXTRA + OP_TYPESTAR:
|
||||
case OP_PROP_EXTRA + OP_TYPEMINSTAR:
|
||||
case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
|
||||
count = 0;
|
||||
|
||||
QS1:
|
||||
|
||||
ADD_ACTIVE(state_offset + 4, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
int chartype;
|
||||
const uint32_t *cp;
|
||||
const ucd_record * prop = GET_UCD(c);
|
||||
switch(code[2])
|
||||
{
|
||||
case PT_LAMP:
|
||||
chartype = prop->chartype;
|
||||
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
OK = prop->chartype == code[3];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
OK = prop->script == code[3];
|
||||
break;
|
||||
|
||||
case PT_SCX:
|
||||
OK = (prop->script == code[3] ||
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0);
|
||||
break;
|
||||
|
||||
/* These are specials for combination cases. */
|
||||
|
||||
case PT_ALNUM:
|
||||
chartype = prop->chartype;
|
||||
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N;
|
||||
break;
|
||||
|
||||
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||
which means that Perl space and POSIX space are now identical. PCRE
|
||||
was changed at release 8.34. */
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
chartype = prop->chartype;
|
||||
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||
chartype == ucp_Mn || chartype == ucp_Pc;
|
||||
break;
|
||||
|
||||
case PT_CLIST:
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (c > MAX_UTF_CODE_POINT)
|
||||
{
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
cp = PRIV(ucd_caseless_sets) + code[3];
|
||||
for (;;)
|
||||
{
|
||||
if (c < *cp) { OK = FALSE; break; }
|
||||
if (c == *cp++) { OK = TRUE; break; }
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_UCNC:
|
||||
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
||||
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
|
||||
c >= 0xe000;
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
OK = UCD_BIDICLASS(c) == code[3];
|
||||
break;
|
||||
|
||||
case PT_BOOL:
|
||||
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||
UCD_BPROPS_PROP(prop), code[3]) != 0;
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
|
||||
default:
|
||||
OK = codevalue != OP_PROP;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_PROP))
|
||||
{
|
||||
if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
|
||||
codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW(state_offset + count, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
|
||||
count = 2;
|
||||
goto QS2;
|
||||
|
||||
case OP_EXTUNI_EXTRA + OP_TYPESTAR:
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
|
||||
count = 0;
|
||||
|
||||
QS2:
|
||||
|
||||
ADD_ACTIVE(state_offset + 2, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
int ncount = 0;
|
||||
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
|
||||
codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
(void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
|
||||
&ncount);
|
||||
ADD_NEW_DATA(-(state_offset + count), 0, ncount);
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ANYNL_EXTRA + OP_TYPEQUERY:
|
||||
case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
|
||||
case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
|
||||
count = 2;
|
||||
goto QS3;
|
||||
|
||||
case OP_ANYNL_EXTRA + OP_TYPESTAR:
|
||||
case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
|
||||
case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
|
||||
count = 0;
|
||||
|
||||
QS3:
|
||||
ADD_ACTIVE(state_offset + 2, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
int ncount = 0;
|
||||
switch (c)
|
||||
{
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
case CHAR_NEL:
|
||||
#ifndef EBCDIC
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
#endif /* Not EBCDIC */
|
||||
if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
|
||||
goto ANYNL02;
|
||||
|
||||
case CHAR_CR:
|
||||
if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
|
||||
/* Fall through */
|
||||
|
||||
ANYNL02:
|
||||
case CHAR_LF:
|
||||
if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
|
||||
codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_VSPACE_EXTRA + OP_TYPEQUERY:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
|
||||
count = 2;
|
||||
goto QS4;
|
||||
|
||||
case OP_VSPACE_EXTRA + OP_TYPESTAR:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
|
||||
count = 0;
|
||||
|
||||
QS4:
|
||||
ADD_ACTIVE(state_offset + 2, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
VSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
if (OK == (d == OP_VSPACE))
|
||||
{
|
||||
if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
|
||||
codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_HSPACE_EXTRA + OP_TYPEQUERY:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
|
||||
count = 2;
|
||||
goto QS5;
|
||||
|
||||
case OP_HSPACE_EXTRA + OP_TYPESTAR:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
|
||||
count = 0;
|
||||
|
||||
QS5:
|
||||
ADD_ACTIVE(state_offset + 2, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_HSPACE))
|
||||
{
|
||||
if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
|
||||
codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case OP_PROP_EXTRA + OP_TYPEEXACT:
|
||||
case OP_PROP_EXTRA + OP_TYPEUPTO:
|
||||
case OP_PROP_EXTRA + OP_TYPEMINUPTO:
|
||||
case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
|
||||
if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
|
||||
{ ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
int chartype;
|
||||
const uint32_t *cp;
|
||||
const ucd_record * prop = GET_UCD(c);
|
||||
switch(code[1 + IMM2_SIZE + 1])
|
||||
{
|
||||
case PT_LAMP:
|
||||
chartype = prop->chartype;
|
||||
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
OK = prop->chartype == code[1 + IMM2_SIZE + 2];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
OK = prop->script == code[1 + IMM2_SIZE + 2];
|
||||
break;
|
||||
|
||||
case PT_SCX:
|
||||
OK = (prop->script == code[1 + IMM2_SIZE + 2] ||
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop),
|
||||
code[1 + IMM2_SIZE + 2]) != 0);
|
||||
break;
|
||||
|
||||
/* These are specials for combination cases. */
|
||||
|
||||
case PT_ALNUM:
|
||||
chartype = prop->chartype;
|
||||
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N;
|
||||
break;
|
||||
|
||||
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||
which means that Perl space and POSIX space are now identical. PCRE
|
||||
was changed at release 8.34. */
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
chartype = prop->chartype;
|
||||
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||
chartype == ucp_Mn || chartype == ucp_Pc;
|
||||
break;
|
||||
|
||||
case PT_CLIST:
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (c > MAX_UTF_CODE_POINT)
|
||||
{
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
|
||||
for (;;)
|
||||
{
|
||||
if (c < *cp) { OK = FALSE; break; }
|
||||
if (c == *cp++) { OK = TRUE; break; }
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_UCNC:
|
||||
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
||||
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
|
||||
c >= 0xe000;
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
OK = UCD_BIDICLASS(c) == code[1 + IMM2_SIZE + 2];
|
||||
break;
|
||||
|
||||
case PT_BOOL:
|
||||
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||
UCD_BPROPS_PROP(prop), code[1 + IMM2_SIZE + 2]) != 0;
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
|
||||
default:
|
||||
OK = codevalue != OP_PROP;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_PROP))
|
||||
{
|
||||
if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
if (++count >= (int)GET2(code, 1))
|
||||
{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
|
||||
else
|
||||
{ ADD_NEW(state_offset, count); }
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
|
||||
if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
|
||||
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
PCRE2_SPTR nptr;
|
||||
int ncount = 0;
|
||||
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
|
||||
&ncount);
|
||||
if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
reset_could_continue = TRUE;
|
||||
if (++count >= (int)GET2(code, 1))
|
||||
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
|
||||
else
|
||||
{ ADD_NEW_DATA(-state_offset, count, ncount); }
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ANYNL_EXTRA + OP_TYPEEXACT:
|
||||
case OP_ANYNL_EXTRA + OP_TYPEUPTO:
|
||||
case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
|
||||
case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
|
||||
if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
|
||||
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
int ncount = 0;
|
||||
switch (c)
|
||||
{
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
case CHAR_NEL:
|
||||
#ifndef EBCDIC
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
#endif /* Not EBCDIC */
|
||||
if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
|
||||
goto ANYNL03;
|
||||
|
||||
case CHAR_CR:
|
||||
if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
|
||||
/* Fall through */
|
||||
|
||||
ANYNL03:
|
||||
case CHAR_LF:
|
||||
if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
if (++count >= (int)GET2(code, 1))
|
||||
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
|
||||
else
|
||||
{ ADD_NEW_DATA(-state_offset, count, ncount); }
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_VSPACE_EXTRA + OP_TYPEEXACT:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEUPTO:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
|
||||
if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
|
||||
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
VSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_VSPACE))
|
||||
{
|
||||
if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
if (++count >= (int)GET2(code, 1))
|
||||
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
|
||||
else
|
||||
{ ADD_NEW_DATA(-state_offset, count, 0); }
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_HSPACE_EXTRA + OP_TYPEEXACT:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEUPTO:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
|
||||
if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
|
||||
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_HSPACE))
|
||||
{
|
||||
if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
if (++count >= (int)GET2(code, 1))
|
||||
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
|
||||
else
|
||||
{ ADD_NEW_DATA(-state_offset, count, 0); }
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/* ========================================================================== */
|
||||
/* These opcodes are followed by a character that is usually compared
|
||||
to the current subject character; it is loaded into d. We still get
|
||||
here even if there is no subject character, because in some cases zero
|
||||
repetitions are permitted. */
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_CHAR:
|
||||
if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_CHARI:
|
||||
if (clen == 0) break;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf_or_ucp)
|
||||
{
|
||||
if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
|
||||
{
|
||||
unsigned int othercase;
|
||||
if (c < 128)
|
||||
othercase = fcc[c];
|
||||
else
|
||||
othercase = UCD_OTHERCASE(c);
|
||||
if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
/* Not UTF or UCP mode */
|
||||
{
|
||||
if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
|
||||
{ ADD_NEW(state_offset + 2, 0); }
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/*-----------------------------------------------------------------*/
|
||||
/* This is a tricky one because it can match more than one character.
|
||||
Find out how many characters to skip, and then set up a negative state
|
||||
to wait for them to pass before continuing. */
|
||||
|
||||
case OP_EXTUNI:
|
||||
if (clen > 0)
|
||||
{
|
||||
int ncount = 0;
|
||||
PCRE2_SPTR nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject,
|
||||
end_subject, utf, &ncount);
|
||||
if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
reset_could_continue = TRUE;
|
||||
ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
/* This is a tricky like EXTUNI because it too can match more than one
|
||||
character (when CR is followed by LF). In this case, set up a negative
|
||||
state to wait for one character to pass before continuing. */
|
||||
|
||||
case OP_ANYNL:
|
||||
if (clen > 0) switch(c)
|
||||
{
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
case CHAR_NEL:
|
||||
#ifndef EBCDIC
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
#endif /* Not EBCDIC */
|
||||
if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
|
||||
/* Fall through */
|
||||
|
||||
case CHAR_LF:
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
break;
|
||||
|
||||
case CHAR_CR:
|
||||
if (ptr + 1 >= end_subject)
|
||||
{
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
reset_could_continue = TRUE;
|
||||
}
|
||||
else if (UCHAR21TEST(ptr + 1) == CHAR_LF)
|
||||
{
|
||||
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_NOT_VSPACE:
|
||||
if (clen > 0) switch(c)
|
||||
{
|
||||
VSPACE_CASES:
|
||||
break;
|
||||
|
||||
default:
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_VSPACE:
|
||||
if (clen > 0) switch(c)
|
||||
{
|
||||
VSPACE_CASES:
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_NOT_HSPACE:
|
||||
if (clen > 0) switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
break;
|
||||
|
||||
default:
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_HSPACE:
|
||||
if (clen > 0) switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
/* Match a negated single character casefully. */
|
||||
|
||||
case OP_NOT:
|
||||
if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
/* Match a negated single character caselessly. */
|
||||
|
||||
case OP_NOTI:
|
||||
if (clen > 0)
|
||||
{
|
||||
uint32_t otherd;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf_or_ucp && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
otherd = TABLE_GET(d, fcc, d);
|
||||
if (c != d && c != otherd)
|
||||
{ ADD_NEW(state_offset + dlen + 1, 0); }
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_PLUSI:
|
||||
case OP_MINPLUSI:
|
||||
case OP_POSPLUSI:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_NOTPOSPLUSI:
|
||||
caseless = TRUE;
|
||||
codevalue -= OP_STARI - OP_STAR;
|
||||
|
||||
/* Fall through */
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_POSPLUS:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
|
||||
if (clen > 0)
|
||||
{
|
||||
uint32_t otherd = NOTACHAR;
|
||||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf_or_ucp && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
otherd = TABLE_GET(d, fcc, d);
|
||||
}
|
||||
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||
{
|
||||
if (count > 0 &&
|
||||
(codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
count++;
|
||||
ADD_NEW(state_offset, count);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_QUERYI:
|
||||
case OP_MINQUERYI:
|
||||
case OP_POSQUERYI:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_NOTPOSQUERYI:
|
||||
caseless = TRUE;
|
||||
codevalue -= OP_STARI - OP_STAR;
|
||||
/* Fall through */
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_POSQUERY:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTPOSQUERY:
|
||||
ADD_ACTIVE(state_offset + dlen + 1, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
uint32_t otherd = NOTACHAR;
|
||||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf_or_ucp && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
otherd = TABLE_GET(d, fcc, d);
|
||||
}
|
||||
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||
{
|
||||
if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW(state_offset + dlen + 1, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_STARI:
|
||||
case OP_MINSTARI:
|
||||
case OP_POSSTARI:
|
||||
case OP_NOTSTARI:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_NOTPOSSTARI:
|
||||
caseless = TRUE;
|
||||
codevalue -= OP_STARI - OP_STAR;
|
||||
/* Fall through */
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_POSSTAR:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTPOSSTAR:
|
||||
ADD_ACTIVE(state_offset + dlen + 1, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
uint32_t otherd = NOTACHAR;
|
||||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf_or_ucp && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
otherd = TABLE_GET(d, fcc, d);
|
||||
}
|
||||
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||
{
|
||||
if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW(state_offset, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_EXACTI:
|
||||
case OP_NOTEXACTI:
|
||||
caseless = TRUE;
|
||||
codevalue -= OP_STARI - OP_STAR;
|
||||
/* Fall through */
|
||||
case OP_EXACT:
|
||||
case OP_NOTEXACT:
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
uint32_t otherd = NOTACHAR;
|
||||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf_or_ucp && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
otherd = TABLE_GET(d, fcc, d);
|
||||
}
|
||||
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||
{
|
||||
if (++count >= (int)GET2(code, 1))
|
||||
{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
|
||||
else
|
||||
{ ADD_NEW(state_offset, count); }
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_UPTOI:
|
||||
case OP_MINUPTOI:
|
||||
case OP_POSUPTOI:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_NOTPOSUPTOI:
|
||||
caseless = TRUE;
|
||||
codevalue -= OP_STARI - OP_STAR;
|
||||
/* Fall through */
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_POSUPTO:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTPOSUPTO:
|
||||
ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
uint32_t otherd = NOTACHAR;
|
||||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf_or_ucp && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
otherd = TABLE_GET(d, fcc, d);
|
||||
}
|
||||
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||
{
|
||||
if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
if (++count >= (int)GET2(code, 1))
|
||||
{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
|
||||
else
|
||||
{ ADD_NEW(state_offset, count); }
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* These are the class-handling opcodes */
|
||||
|
||||
case OP_CLASS:
|
||||
case OP_NCLASS:
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_XCLASS:
|
||||
case OP_ECLASS:
|
||||
#endif
|
||||
{
|
||||
BOOL isinclass = FALSE;
|
||||
int next_state_offset;
|
||||
PCRE2_SPTR ecode;
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
/* An extended class may have a table or a list of single characters,
|
||||
ranges, or both, and it may be positive or negative. There's a
|
||||
function that sorts all this out. */
|
||||
|
||||
if (codevalue == OP_XCLASS)
|
||||
{
|
||||
ecode = code + GET(code, 1);
|
||||
if (clen > 0)
|
||||
isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE,
|
||||
(const uint8_t*)mb->start_code, utf);
|
||||
}
|
||||
|
||||
/* A nested set-based class has internal opcodes for performing
|
||||
set operations. */
|
||||
|
||||
else if (codevalue == OP_ECLASS)
|
||||
{
|
||||
ecode = code + GET(code, 1);
|
||||
if (clen > 0)
|
||||
isinclass = PRIV(eclass)(c, code + 1 + LINK_SIZE, ecode,
|
||||
(const uint8_t*)mb->start_code, utf);
|
||||
}
|
||||
|
||||
else
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
|
||||
/* For a simple class, there is always just a 32-byte table, and we
|
||||
can set isinclass from it. */
|
||||
|
||||
{
|
||||
ecode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
|
||||
if (clen > 0)
|
||||
{
|
||||
isinclass = (c > 255)? (codevalue == OP_NCLASS) :
|
||||
((((const uint8_t *)(code + 1))[c/8] & (1u << (c&7))) != 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* At this point, isinclass is set for all kinds of class, and ecode
|
||||
points to the byte after the end of the class. If there is a
|
||||
quantifier, this is where it will be. */
|
||||
|
||||
next_state_offset = (int)(ecode - start_code);
|
||||
|
||||
switch (*ecode)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRPOSSTAR:
|
||||
ADD_ACTIVE(next_state_offset + 1, 0);
|
||||
if (isinclass)
|
||||
{
|
||||
if (*ecode == OP_CRPOSSTAR)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW(state_offset, 0);
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_CRPLUS:
|
||||
case OP_CRMINPLUS:
|
||||
case OP_CRPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
|
||||
if (isinclass)
|
||||
{
|
||||
if (count > 0 && *ecode == OP_CRPOSPLUS)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
count++;
|
||||
ADD_NEW(state_offset, count);
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
case OP_CRPOSQUERY:
|
||||
ADD_ACTIVE(next_state_offset + 1, 0);
|
||||
if (isinclass)
|
||||
{
|
||||
if (*ecode == OP_CRPOSQUERY)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW(next_state_offset + 1, 0);
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
case OP_CRPOSRANGE:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count >= (int)GET2(ecode, 1))
|
||||
{ ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
|
||||
if (isinclass)
|
||||
{
|
||||
int max = (int)GET2(ecode, 1 + IMM2_SIZE);
|
||||
|
||||
if (*ecode == OP_CRPOSRANGE && count >= (int)GET2(ecode, 1))
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
|
||||
if (++count >= max && max != 0) /* Max 0 => no limit */
|
||||
{ ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
|
||||
else
|
||||
{ ADD_NEW(state_offset, count); }
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
if (isinclass) { ADD_NEW(next_state_offset, 0); }
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/* ========================================================================== */
|
||||
/* These are the opcodes for fancy brackets of various kinds. We have
|
||||
to use recursion in order to handle them. The "always failing" assertion
|
||||
(?!) is optimised to OP_FAIL when compiling, so we have to support that,
|
||||
though the other "backtracking verbs" are not supported. */
|
||||
|
||||
case OP_FAIL:
|
||||
break;
|
||||
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
{
|
||||
int rc;
|
||||
int *local_workspace;
|
||||
PCRE2_SIZE *local_offsets;
|
||||
PCRE2_SPTR endasscode = code + GET(code, 1);
|
||||
RWS_anchor *rws = (RWS_anchor *)RWS;
|
||||
|
||||
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
|
||||
{
|
||||
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
|
||||
if (rc != 0) return rc;
|
||||
RWS = (int *)rws;
|
||||
}
|
||||
|
||||
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
|
||||
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
|
||||
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
|
||||
|
||||
rc = internal_dfa_match(
|
||||
mb, /* static match data */
|
||||
code, /* this subexpression's code */
|
||||
ptr, /* where we currently are */
|
||||
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
|
||||
local_offsets, /* offset vector */
|
||||
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
|
||||
local_workspace, /* workspace vector */
|
||||
RWS_RSIZE, /* size of same */
|
||||
rlevel, /* function recursion level */
|
||||
RWS); /* recursion workspace */
|
||||
|
||||
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc;
|
||||
if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
|
||||
{ ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_COND:
|
||||
case OP_SCOND:
|
||||
{
|
||||
int codelink = (int)GET(code, 1);
|
||||
PCRE2_UCHAR condcode;
|
||||
|
||||
/* Because of the way auto-callout works during compile, a callout item
|
||||
is inserted between OP_COND and an assertion condition. This does not
|
||||
happen for the other conditions. */
|
||||
|
||||
if (code[LINK_SIZE + 1] == OP_CALLOUT
|
||||
|| code[LINK_SIZE + 1] == OP_CALLOUT_STR)
|
||||
{
|
||||
PCRE2_SIZE callout_length;
|
||||
rrc = do_callout_dfa(code, offsets, current_subject, ptr, mb,
|
||||
1 + LINK_SIZE, &callout_length);
|
||||
if (rrc < 0) return rrc; /* Abandon */
|
||||
if (rrc > 0) break; /* Fail this thread */
|
||||
code += callout_length; /* Skip callout data */
|
||||
}
|
||||
|
||||
condcode = code[LINK_SIZE+1];
|
||||
|
||||
/* Back reference conditions and duplicate named recursion conditions
|
||||
are not supported */
|
||||
|
||||
if (condcode == OP_CREF || condcode == OP_DNCREF ||
|
||||
condcode == OP_DNRREF)
|
||||
return PCRE2_ERROR_DFA_UCOND;
|
||||
|
||||
/* The DEFINE condition is always false, and the assertion (?!) is
|
||||
converted to OP_FAIL. */
|
||||
|
||||
if (condcode == OP_FALSE || condcode == OP_FAIL)
|
||||
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
|
||||
|
||||
/* There is also an always-true condition */
|
||||
|
||||
else if (condcode == OP_TRUE)
|
||||
{ ADD_ACTIVE(state_offset + LINK_SIZE + 2, 0); }
|
||||
|
||||
/* The only supported version of OP_RREF is for the value RREF_ANY,
|
||||
which means "test if in any recursion". We can't test for specifically
|
||||
recursed groups. */
|
||||
|
||||
else if (condcode == OP_RREF)
|
||||
{
|
||||
unsigned int value = GET2(code, LINK_SIZE + 2);
|
||||
if (value != RREF_ANY) return PCRE2_ERROR_DFA_UCOND;
|
||||
if (mb->recursive != NULL)
|
||||
{ ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
|
||||
else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
|
||||
}
|
||||
|
||||
/* Otherwise, the condition is an assertion */
|
||||
|
||||
else
|
||||
{
|
||||
int rc;
|
||||
int *local_workspace;
|
||||
PCRE2_SIZE *local_offsets;
|
||||
PCRE2_SPTR asscode = code + LINK_SIZE + 1;
|
||||
PCRE2_SPTR endasscode = asscode + GET(asscode, 1);
|
||||
RWS_anchor *rws = (RWS_anchor *)RWS;
|
||||
|
||||
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
|
||||
{
|
||||
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
|
||||
if (rc != 0) return rc;
|
||||
RWS = (int *)rws;
|
||||
}
|
||||
|
||||
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
|
||||
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
|
||||
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
|
||||
|
||||
rc = internal_dfa_match(
|
||||
mb, /* fixed match data */
|
||||
asscode, /* this subexpression's code */
|
||||
ptr, /* where we currently are */
|
||||
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
|
||||
local_offsets, /* offset vector */
|
||||
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
|
||||
local_workspace, /* workspace vector */
|
||||
RWS_RSIZE, /* size of same */
|
||||
rlevel, /* function recursion level */
|
||||
RWS); /* recursion workspace */
|
||||
|
||||
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc;
|
||||
if ((rc >= 0) ==
|
||||
(condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
|
||||
{ ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
|
||||
else
|
||||
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_RECURSE:
|
||||
{
|
||||
int rc;
|
||||
int *local_workspace;
|
||||
PCRE2_SIZE *local_offsets;
|
||||
RWS_anchor *rws = (RWS_anchor *)RWS;
|
||||
PCRE2_SPTR callpat = start_code + GET(code, 1);
|
||||
uint32_t recno = (callpat == mb->start_code)? 0 :
|
||||
GET2(callpat, 1 + LINK_SIZE);
|
||||
|
||||
if (rws->free < RWS_RSIZE + RWS_OVEC_RSIZE)
|
||||
{
|
||||
rc = more_workspace(&rws, RWS_OVEC_RSIZE, mb);
|
||||
if (rc != 0) return rc;
|
||||
RWS = (int *)rws;
|
||||
}
|
||||
|
||||
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
|
||||
local_workspace = ((int *)local_offsets) + RWS_OVEC_RSIZE;
|
||||
rws->free -= RWS_RSIZE + RWS_OVEC_RSIZE;
|
||||
|
||||
/* Check for repeating a recursion without advancing the subject
|
||||
pointer or last used character. This should catch convoluted mutual
|
||||
recursions. (Some simple cases are caught at compile time.) */
|
||||
|
||||
for (dfa_recursion_info *ri = mb->recursive;
|
||||
ri != NULL;
|
||||
ri = ri->prevrec)
|
||||
{
|
||||
if (recno == ri->group_num && ptr == ri->subject_position &&
|
||||
mb->last_used_ptr == ri->last_used_ptr)
|
||||
return PCRE2_ERROR_RECURSELOOP;
|
||||
}
|
||||
|
||||
/* Remember this recursion and where we started it so as to
|
||||
catch infinite loops. */
|
||||
|
||||
new_recursive.group_num = recno;
|
||||
new_recursive.subject_position = ptr;
|
||||
new_recursive.last_used_ptr = mb->last_used_ptr;
|
||||
new_recursive.prevrec = mb->recursive;
|
||||
mb->recursive = &new_recursive;
|
||||
|
||||
rc = internal_dfa_match(
|
||||
mb, /* fixed match data */
|
||||
callpat, /* this subexpression's code */
|
||||
ptr, /* where we currently are */
|
||||
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
|
||||
local_offsets, /* offset vector */
|
||||
RWS_OVEC_RSIZE/OVEC_UNIT, /* size of same */
|
||||
local_workspace, /* workspace vector */
|
||||
RWS_RSIZE, /* size of same */
|
||||
rlevel, /* function recursion level */
|
||||
RWS); /* recursion workspace */
|
||||
|
||||
rws->free += RWS_RSIZE + RWS_OVEC_RSIZE;
|
||||
mb->recursive = new_recursive.prevrec; /* Done this recursion */
|
||||
|
||||
/* Ran out of internal offsets */
|
||||
|
||||
if (rc == 0) return PCRE2_ERROR_DFA_RECURSE;
|
||||
|
||||
/* For each successful matched substring, set up the next state with a
|
||||
count of characters to skip before trying it. Note that the count is in
|
||||
characters, not bytes. */
|
||||
|
||||
if (rc > 0)
|
||||
{
|
||||
for (rc = rc*2 - 2; rc >= 0; rc -= 2)
|
||||
{
|
||||
PCRE2_SIZE charcount = local_offsets[rc+1] - local_offsets[rc];
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (utf)
|
||||
{
|
||||
PCRE2_SPTR p = start_subject + local_offsets[rc];
|
||||
PCRE2_SPTR pp = start_subject + local_offsets[rc+1];
|
||||
while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
|
||||
}
|
||||
#endif
|
||||
if (charcount > 0)
|
||||
{
|
||||
ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0,
|
||||
(int)(charcount - 1));
|
||||
}
|
||||
else
|
||||
{
|
||||
ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (rc != PCRE2_ERROR_NOMATCH) return rc;
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_BRAPOS:
|
||||
case OP_SBRAPOS:
|
||||
case OP_CBRAPOS:
|
||||
case OP_SCBRAPOS:
|
||||
case OP_BRAPOSZERO:
|
||||
{
|
||||
int rc;
|
||||
int *local_workspace;
|
||||
PCRE2_SIZE *local_offsets;
|
||||
PCRE2_SIZE charcount, matched_count;
|
||||
PCRE2_SPTR local_ptr = ptr;
|
||||
RWS_anchor *rws = (RWS_anchor *)RWS;
|
||||
BOOL allow_zero;
|
||||
|
||||
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
|
||||
{
|
||||
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
|
||||
if (rc != 0) return rc;
|
||||
RWS = (int *)rws;
|
||||
}
|
||||
|
||||
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
|
||||
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
|
||||
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
if (codevalue == OP_BRAPOSZERO)
|
||||
{
|
||||
allow_zero = TRUE;
|
||||
++code; /* The following opcode will be one of the above BRAs */
|
||||
}
|
||||
else allow_zero = FALSE;
|
||||
|
||||
/* Loop to match the subpattern as many times as possible as if it were
|
||||
a complete pattern. */
|
||||
|
||||
for (matched_count = 0;; matched_count++)
|
||||
{
|
||||
rc = internal_dfa_match(
|
||||
mb, /* fixed match data */
|
||||
code, /* this subexpression's code */
|
||||
local_ptr, /* where we currently are */
|
||||
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
|
||||
local_offsets, /* offset vector */
|
||||
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
|
||||
local_workspace, /* workspace vector */
|
||||
RWS_RSIZE, /* size of same */
|
||||
rlevel, /* function recursion level */
|
||||
RWS); /* recursion workspace */
|
||||
|
||||
/* Failed to match */
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
if (rc != PCRE2_ERROR_NOMATCH) return rc;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Matched: break the loop if zero characters matched. */
|
||||
|
||||
charcount = local_offsets[1] - local_offsets[0];
|
||||
if (charcount == 0) break;
|
||||
local_ptr += charcount; /* Advance temporary position ptr */
|
||||
}
|
||||
|
||||
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
/* At this point we have matched the subpattern matched_count
|
||||
times, and local_ptr is pointing to the character after the end of the
|
||||
last match. */
|
||||
|
||||
if (matched_count > 0 || allow_zero)
|
||||
{
|
||||
PCRE2_SPTR end_subpattern = code;
|
||||
int next_state_offset;
|
||||
|
||||
do { end_subpattern += GET(end_subpattern, 1); }
|
||||
while (*end_subpattern == OP_ALT);
|
||||
next_state_offset =
|
||||
(int)(end_subpattern - start_code + LINK_SIZE + 1);
|
||||
|
||||
/* Optimization: if there are no more active states, and there
|
||||
are no new states yet set up, then skip over the subject string
|
||||
right here, to save looping. Otherwise, set up the new state to swing
|
||||
into action when the end of the matched substring is reached. */
|
||||
|
||||
if (i + 1 >= active_count && new_count == 0)
|
||||
{
|
||||
ptr = local_ptr;
|
||||
clen = 0;
|
||||
ADD_NEW(next_state_offset, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
PCRE2_SPTR p = ptr;
|
||||
PCRE2_SPTR pp = local_ptr;
|
||||
charcount = (PCRE2_SIZE)(pp - p);
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (utf) while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
|
||||
#endif
|
||||
ADD_NEW_DATA(-next_state_offset, 0, (int)(charcount - 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ONCE:
|
||||
{
|
||||
int rc;
|
||||
int *local_workspace;
|
||||
PCRE2_SIZE *local_offsets;
|
||||
RWS_anchor *rws = (RWS_anchor *)RWS;
|
||||
|
||||
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
|
||||
{
|
||||
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
|
||||
if (rc != 0) return rc;
|
||||
RWS = (int *)rws;
|
||||
}
|
||||
|
||||
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
|
||||
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
|
||||
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
rc = internal_dfa_match(
|
||||
mb, /* fixed match data */
|
||||
code, /* this subexpression's code */
|
||||
ptr, /* where we currently are */
|
||||
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
|
||||
local_offsets, /* offset vector */
|
||||
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
|
||||
local_workspace, /* workspace vector */
|
||||
RWS_RSIZE, /* size of same */
|
||||
rlevel, /* function recursion level */
|
||||
RWS); /* recursion workspace */
|
||||
|
||||
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
if (rc >= 0)
|
||||
{
|
||||
PCRE2_SPTR end_subpattern = code;
|
||||
PCRE2_SIZE charcount = local_offsets[1] - local_offsets[0];
|
||||
int next_state_offset, repeat_state_offset;
|
||||
|
||||
do { end_subpattern += GET(end_subpattern, 1); }
|
||||
while (*end_subpattern == OP_ALT);
|
||||
next_state_offset =
|
||||
(int)(end_subpattern - start_code + LINK_SIZE + 1);
|
||||
|
||||
/* If the end of this subpattern is KETRMAX or KETRMIN, we must
|
||||
arrange for the repeat state also to be added to the relevant list.
|
||||
Calculate the offset, or set -1 for no repeat. */
|
||||
|
||||
repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
|
||||
*end_subpattern == OP_KETRMIN)?
|
||||
(int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
|
||||
|
||||
/* If we have matched an empty string, add the next state at the
|
||||
current character pointer. This is important so that the duplicate
|
||||
checking kicks in, which is what breaks infinite loops that match an
|
||||
empty string. */
|
||||
|
||||
if (charcount == 0)
|
||||
{
|
||||
ADD_ACTIVE(next_state_offset, 0);
|
||||
}
|
||||
|
||||
/* Optimization: if there are no more active states, and there
|
||||
are no new states yet set up, then skip over the subject string
|
||||
right here, to save looping. Otherwise, set up the new state to swing
|
||||
into action when the end of the matched substring is reached. */
|
||||
|
||||
else if (i + 1 >= active_count && new_count == 0)
|
||||
{
|
||||
ptr += charcount;
|
||||
clen = 0;
|
||||
ADD_NEW(next_state_offset, 0);
|
||||
|
||||
/* If we are adding a repeat state at the new character position,
|
||||
we must fudge things so that it is the only current state.
|
||||
Otherwise, it might be a duplicate of one we processed before, and
|
||||
that would cause it to be skipped. */
|
||||
|
||||
if (repeat_state_offset >= 0)
|
||||
{
|
||||
next_active_state = active_states;
|
||||
active_count = 0;
|
||||
i = -1;
|
||||
ADD_ACTIVE(repeat_state_offset, 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (utf)
|
||||
{
|
||||
PCRE2_SPTR p = start_subject + local_offsets[0];
|
||||
PCRE2_SPTR pp = start_subject + local_offsets[1];
|
||||
while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
|
||||
}
|
||||
#endif
|
||||
ADD_NEW_DATA(-next_state_offset, 0, (int)(charcount - 1));
|
||||
if (repeat_state_offset >= 0)
|
||||
{ ADD_NEW_DATA(-repeat_state_offset, 0, (int)(charcount - 1)); }
|
||||
}
|
||||
}
|
||||
else if (rc != PCRE2_ERROR_NOMATCH) return rc;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* Handle callouts */
|
||||
|
||||
case OP_CALLOUT:
|
||||
case OP_CALLOUT_STR:
|
||||
{
|
||||
PCRE2_SIZE callout_length;
|
||||
rrc = do_callout_dfa(code, offsets, current_subject, ptr, mb, 0,
|
||||
&callout_length);
|
||||
if (rrc < 0) return rrc; /* Abandon */
|
||||
if (rrc == 0)
|
||||
{ ADD_ACTIVE(state_offset + (int)callout_length, 0); }
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
default: /* Unsupported opcode */
|
||||
return PCRE2_ERROR_DFA_UITEM;
|
||||
}
|
||||
|
||||
NEXT_ACTIVE_STATE: continue;
|
||||
|
||||
} /* End of loop scanning active states */
|
||||
|
||||
/* We have finished the processing at the current subject character. If no
|
||||
new states have been set for the next character, we have found all the
|
||||
matches that we are going to find. If partial matching has been requested,
|
||||
check for appropriate conditions.
|
||||
|
||||
The "could_continue" variable is true if a state could have continued but
|
||||
for the fact that the end of the subject was reached. */
|
||||
|
||||
if (new_count <= 0)
|
||||
{
|
||||
if (could_continue && /* Some could go on, and */
|
||||
( /* either... */
|
||||
(mb->moptions & PCRE2_PARTIAL_HARD) != 0 /* Hard partial */
|
||||
|| /* or... */
|
||||
((mb->moptions & PCRE2_PARTIAL_SOFT) != 0 && /* Soft partial and */
|
||||
match_count < 0) /* no matches */
|
||||
) && /* And... */
|
||||
(
|
||||
partial_newline || /* Either partial NL */
|
||||
( /* or ... */
|
||||
ptr >= end_subject && /* End of subject and */
|
||||
( /* either */
|
||||
ptr > mb->start_used_ptr || /* Inspected non-empty string */
|
||||
mb->allowemptypartial /* or pattern has lookbehind */
|
||||
) /* or could match empty */
|
||||
)
|
||||
))
|
||||
match_count = PCRE2_ERROR_PARTIAL;
|
||||
break; /* Exit from loop along the subject string */
|
||||
}
|
||||
|
||||
/* One or more states are active for the next character. */
|
||||
|
||||
ptr += clen; /* Advance to next subject character */
|
||||
} /* Loop to move along the subject string */
|
||||
|
||||
/* Control gets here from "break" a few lines above. If we have a match and
|
||||
PCRE2_ENDANCHORED is set, the match fails. */
|
||||
|
||||
if (match_count >= 0 &&
|
||||
((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0 &&
|
||||
ptr < end_subject)
|
||||
match_count = PCRE2_ERROR_NOMATCH;
|
||||
|
||||
return match_count;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match a pattern using the DFA algorithm *
|
||||
*************************************************/
|
||||
|
||||
/* This function matches a compiled pattern to a subject string, using the
|
||||
alternate matching algorithm that finds all matches at once.
|
||||
|
||||
Arguments:
|
||||
code points to the compiled pattern
|
||||
subject subject string
|
||||
length length of subject string
|
||||
startoffset where to start matching in the subject
|
||||
options option bits
|
||||
match_data points to a match data structure
|
||||
gcontext points to a match context
|
||||
workspace pointer to workspace
|
||||
wscount size of workspace
|
||||
|
||||
Returns: > 0 => number of match offset pairs placed in offsets
|
||||
= 0 => offsets overflowed; longest matches are present
|
||||
-1 => failed to match
|
||||
< -1 => some kind of unexpected problem
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
|
||||
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
|
||||
pcre2_match_context *mcontext, int *workspace, PCRE2_SIZE wscount)
|
||||
{
|
||||
int rc;
|
||||
int was_zero_terminated = 0;
|
||||
|
||||
const pcre2_real_code *re = (const pcre2_real_code *)code;
|
||||
|
||||
PCRE2_SPTR start_match;
|
||||
PCRE2_SPTR end_subject;
|
||||
PCRE2_SPTR bumpalong_limit;
|
||||
PCRE2_SPTR req_cu_ptr;
|
||||
|
||||
BOOL utf, anchored, startline, firstline;
|
||||
BOOL has_first_cu = FALSE;
|
||||
BOOL has_req_cu = FALSE;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
PCRE2_SPTR memchr_found_first_cu = NULL;
|
||||
PCRE2_SPTR memchr_found_first_cu2 = NULL;
|
||||
#endif
|
||||
|
||||
PCRE2_UCHAR first_cu = 0;
|
||||
PCRE2_UCHAR first_cu2 = 0;
|
||||
PCRE2_UCHAR req_cu = 0;
|
||||
PCRE2_UCHAR req_cu2 = 0;
|
||||
|
||||
const uint8_t *start_bits = NULL;
|
||||
|
||||
/* We need to have mb pointing to a match block, because the IS_NEWLINE macro
|
||||
is used below, and it expects NLBLOCK to be defined as a pointer. */
|
||||
|
||||
pcre2_callout_block cb;
|
||||
dfa_match_block actual_match_block;
|
||||
dfa_match_block *mb = &actual_match_block;
|
||||
|
||||
/* Set up a starting block of memory for use during recursive calls to
|
||||
internal_dfa_match(). By putting this on the stack, it minimizes resource use
|
||||
in the case when it is not needed. If this is too small, more memory is
|
||||
obtained from the heap. At the start of each block is an anchor structure.*/
|
||||
|
||||
int base_recursion_workspace[RWS_BASE_SIZE];
|
||||
RWS_anchor *rws = (RWS_anchor *)base_recursion_workspace;
|
||||
rws->next = NULL;
|
||||
rws->size = RWS_BASE_SIZE;
|
||||
rws->free = RWS_BASE_SIZE - RWS_ANCHOR_SIZE;
|
||||
|
||||
/* Recognize NULL, length 0 as an empty string. */
|
||||
|
||||
if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
|
||||
|
||||
/* Plausibility checks */
|
||||
|
||||
if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
|
||||
if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
|
||||
return PCRE2_ERROR_NULL;
|
||||
|
||||
if (length == PCRE2_ZERO_TERMINATED)
|
||||
{
|
||||
length = PRIV(strlen)(subject);
|
||||
was_zero_terminated = 1;
|
||||
}
|
||||
|
||||
if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE;
|
||||
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
|
||||
|
||||
/* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
|
||||
time. */
|
||||
|
||||
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 &&
|
||||
((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
/* Invalid UTF support is not available for DFA matching. */
|
||||
|
||||
if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
|
||||
return PCRE2_ERROR_DFA_UINVALID_UTF;
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
return with PCRE2_ERROR_BADMAGIC. */
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||
|
||||
/* Check the code unit width. */
|
||||
|
||||
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
|
||||
return PCRE2_ERROR_BADMODE;
|
||||
|
||||
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
|
||||
options variable for this function. Users of PCRE2 who are not calling the
|
||||
function directly would like to have a way of setting these flags, in the same
|
||||
way that they can set pcre2_compile() flags like PCRE2_NO_AUTO_POSSESS with
|
||||
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
|
||||
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
|
||||
transferred to the options for this function. The bits are guaranteed to be
|
||||
adjacent, but do not have the same values. This bit of Boolean trickery assumes
|
||||
that the match-time bits are not more significant than the flag bits. If by
|
||||
accident this is not the case, a compile-time division by zero error will
|
||||
occur. */
|
||||
|
||||
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
|
||||
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
|
||||
options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
|
||||
#undef FF
|
||||
#undef OO
|
||||
|
||||
/* If restarting after a partial match, do some sanity checks on the contents
|
||||
of the workspace. */
|
||||
|
||||
if ((options & PCRE2_DFA_RESTART) != 0)
|
||||
{
|
||||
if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
|
||||
workspace[1] > (int)((wscount - 2)/INTS_PER_STATEBLOCK))
|
||||
return PCRE2_ERROR_DFA_BADRESTART;
|
||||
}
|
||||
|
||||
/* Set some local values */
|
||||
|
||||
utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
start_match = subject + start_offset;
|
||||
end_subject = subject + length;
|
||||
req_cu_ptr = start_match - 1;
|
||||
anchored = (options & (PCRE2_ANCHORED|PCRE2_DFA_RESTART)) != 0 ||
|
||||
(re->overall_options & PCRE2_ANCHORED) != 0;
|
||||
|
||||
/* The "must be at the start of a line" flags are used in a loop when finding
|
||||
where to start. */
|
||||
|
||||
startline = (re->flags & PCRE2_STARTLINE) != 0;
|
||||
firstline = !anchored && (re->overall_options & PCRE2_FIRSTLINE) != 0;
|
||||
bumpalong_limit = end_subject;
|
||||
|
||||
/* Initialize and set up the fixed fields in the callout block, with a pointer
|
||||
in the match block. */
|
||||
|
||||
mb->cb = &cb;
|
||||
cb.version = 2;
|
||||
cb.subject = subject;
|
||||
cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
|
||||
cb.callout_flags = 0;
|
||||
cb.capture_top = 1; /* No capture support */
|
||||
cb.capture_last = 0;
|
||||
cb.mark = NULL; /* No (*MARK) support */
|
||||
|
||||
/* Get data from the match context, if present, and fill in the remaining
|
||||
fields in the match block. It is an error to set an offset limit without
|
||||
setting the flag at compile time. */
|
||||
|
||||
if (mcontext == NULL)
|
||||
{
|
||||
mb->callout = NULL;
|
||||
mb->memctl = re->memctl;
|
||||
mb->match_limit = PRIV(default_match_context).match_limit;
|
||||
mb->match_limit_depth = PRIV(default_match_context).depth_limit;
|
||||
mb->heap_limit = PRIV(default_match_context).heap_limit;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (mcontext->offset_limit != PCRE2_UNSET)
|
||||
{
|
||||
if ((re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
|
||||
return PCRE2_ERROR_BADOFFSETLIMIT;
|
||||
bumpalong_limit = subject + mcontext->offset_limit;
|
||||
}
|
||||
mb->callout = mcontext->callout;
|
||||
mb->callout_data = mcontext->callout_data;
|
||||
mb->memctl = mcontext->memctl;
|
||||
mb->match_limit = mcontext->match_limit;
|
||||
mb->match_limit_depth = mcontext->depth_limit;
|
||||
mb->heap_limit = mcontext->heap_limit;
|
||||
}
|
||||
|
||||
if (mb->match_limit > re->limit_match)
|
||||
mb->match_limit = re->limit_match;
|
||||
|
||||
if (mb->match_limit_depth > re->limit_depth)
|
||||
mb->match_limit_depth = re->limit_depth;
|
||||
|
||||
if (mb->heap_limit > re->limit_heap)
|
||||
mb->heap_limit = re->limit_heap;
|
||||
|
||||
mb->start_code = (PCRE2_SPTR)((const uint8_t *)re + re->code_start);
|
||||
mb->tables = re->tables;
|
||||
mb->start_subject = subject;
|
||||
mb->end_subject = end_subject;
|
||||
mb->start_offset = start_offset;
|
||||
mb->allowemptypartial = (re->max_lookbehind > 0) ||
|
||||
(re->flags & PCRE2_MATCH_EMPTY) != 0;
|
||||
mb->moptions = options;
|
||||
mb->poptions = re->overall_options;
|
||||
mb->match_call_count = 0;
|
||||
mb->heap_used = 0;
|
||||
|
||||
/* Process the \R and newline settings. */
|
||||
|
||||
mb->bsr_convention = re->bsr_convention;
|
||||
mb->nltype = NLTYPE_FIXED;
|
||||
switch(re->newline_convention)
|
||||
{
|
||||
case PCRE2_NEWLINE_CR:
|
||||
mb->nllen = 1;
|
||||
mb->nl[0] = CHAR_CR;
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_LF:
|
||||
mb->nllen = 1;
|
||||
mb->nl[0] = CHAR_NL;
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_NUL:
|
||||
mb->nllen = 1;
|
||||
mb->nl[0] = CHAR_NUL;
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_CRLF:
|
||||
mb->nllen = 2;
|
||||
mb->nl[0] = CHAR_CR;
|
||||
mb->nl[1] = CHAR_NL;
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_ANY:
|
||||
mb->nltype = NLTYPE_ANY;
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_ANYCRLF:
|
||||
mb->nltype = NLTYPE_ANYCRLF;
|
||||
break;
|
||||
|
||||
default:
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return PCRE2_ERROR_INTERNAL;
|
||||
}
|
||||
|
||||
/* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
|
||||
we must also check that a starting offset does not point into the middle of a
|
||||
multiunit character. We check only the portion of the subject that is going to
|
||||
be inspected during matching - from the offset minus the maximum back reference
|
||||
to the given length. This saves time when a small part of a large subject is
|
||||
being matched by the use of a starting offset. Note that the maximum lookbehind
|
||||
is a number of characters, not code units. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||
{
|
||||
PCRE2_SPTR check_subject = start_match; /* start_match includes offset */
|
||||
|
||||
if (start_offset > 0)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
unsigned int i;
|
||||
if (start_match < end_subject && NOT_FIRSTCU(*start_match))
|
||||
return PCRE2_ERROR_BADUTFOFFSET;
|
||||
for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--)
|
||||
{
|
||||
check_subject--;
|
||||
while (check_subject > subject &&
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
(*check_subject & 0xc0) == 0x80)
|
||||
#else /* 16-bit */
|
||||
(*check_subject & 0xfc00) == 0xdc00)
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
check_subject--;
|
||||
}
|
||||
#else /* In the 32-bit library, one code unit equals one character. */
|
||||
check_subject -= re->max_lookbehind;
|
||||
if (check_subject < subject) check_subject = subject;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
}
|
||||
|
||||
/* Validate the relevant portion of the subject. After an error, adjust the
|
||||
offset to be an absolute offset in the whole string. */
|
||||
|
||||
match_data->rc = PRIV(valid_utf)(check_subject,
|
||||
length - (PCRE2_SIZE)(check_subject - subject), &(match_data->startchar));
|
||||
if (match_data->rc != 0)
|
||||
{
|
||||
match_data->startchar += (PCRE2_SIZE)(check_subject - subject);
|
||||
return match_data->rc;
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Set up the first code unit to match, if available. If there's no first code
|
||||
unit there may be a bitmap of possible first characters. */
|
||||
|
||||
if ((re->flags & PCRE2_FIRSTSET) != 0)
|
||||
{
|
||||
has_first_cu = TRUE;
|
||||
first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
|
||||
if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
|
||||
{
|
||||
first_cu2 = TABLE_GET(first_cu, mb->tables + fcc_offset, first_cu);
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (first_cu > 127 && !utf && (re->overall_options & PCRE2_UCP) != 0)
|
||||
first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
|
||||
#else
|
||||
if (first_cu > 127 && (utf || (re->overall_options & PCRE2_UCP) != 0))
|
||||
first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
|
||||
#endif
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
}
|
||||
else
|
||||
if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
|
||||
start_bits = re->start_bitmap;
|
||||
|
||||
/* There may be a "last known required code unit" set. */
|
||||
|
||||
if ((re->flags & PCRE2_LASTSET) != 0)
|
||||
{
|
||||
has_req_cu = TRUE;
|
||||
req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit);
|
||||
if ((re->flags & PCRE2_LASTCASELESS) != 0)
|
||||
{
|
||||
req_cu2 = TABLE_GET(req_cu, mb->tables + fcc_offset, req_cu);
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (req_cu > 127 && !utf && (re->overall_options & PCRE2_UCP) != 0)
|
||||
req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu);
|
||||
#else
|
||||
if (req_cu > 127 && (utf || (re->overall_options & PCRE2_UCP) != 0))
|
||||
req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu);
|
||||
#endif
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
}
|
||||
|
||||
/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
|
||||
free the memory that was obtained. */
|
||||
|
||||
if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
|
||||
{
|
||||
match_data->memctl.free((void *)match_data->subject,
|
||||
match_data->memctl.memory_data);
|
||||
match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
|
||||
}
|
||||
|
||||
/* Fill in fields that are always returned in the match data. */
|
||||
|
||||
match_data->code = re;
|
||||
match_data->subject = NULL; /* Default for no match */
|
||||
match_data->mark = NULL;
|
||||
match_data->matchedby = PCRE2_MATCHEDBY_DFA_INTERPRETER;
|
||||
|
||||
/* Call the main matching function, looping for a non-anchored regex after a
|
||||
failed match. If not restarting, perform certain optimizations at the start of
|
||||
a match. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
/* ----------------- Start of match optimizations ---------------- */
|
||||
|
||||
/* There are some optimizations that avoid running the match if a known
|
||||
starting point is not found, or if a known later code unit is not present.
|
||||
However, there is an option (settable at compile time) that disables
|
||||
these, for testing and for ensuring that all callouts do actually occur.
|
||||
The optimizations must also be avoided when restarting a DFA match. */
|
||||
|
||||
if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0 &&
|
||||
(options & PCRE2_DFA_RESTART) == 0)
|
||||
{
|
||||
/* If firstline is TRUE, the start of the match is constrained to the first
|
||||
line of a multiline string. That is, the match must be before or at the
|
||||
first newline following the start of matching. Temporarily adjust
|
||||
end_subject so that we stop the optimization scans for a first code unit
|
||||
immediately after the first character of a newline (the first code unit can
|
||||
legitimately be a newline). If the match fails at the newline, later code
|
||||
breaks this loop. */
|
||||
|
||||
if (firstline)
|
||||
{
|
||||
PCRE2_SPTR t = start_match;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
while (t < end_subject && !IS_NEWLINE(t))
|
||||
{
|
||||
t++;
|
||||
ACROSSCHAR(t < end_subject, t, t++);
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
while (t < end_subject && !IS_NEWLINE(t)) t++;
|
||||
end_subject = t;
|
||||
}
|
||||
|
||||
/* Anchored: check the first code unit if one is recorded. This may seem
|
||||
pointless but it can help in detecting a no match case without scanning for
|
||||
the required code unit. */
|
||||
|
||||
if (anchored)
|
||||
{
|
||||
if (has_first_cu || start_bits != NULL)
|
||||
{
|
||||
BOOL ok = start_match < end_subject;
|
||||
if (ok)
|
||||
{
|
||||
PCRE2_UCHAR c = UCHAR21TEST(start_match);
|
||||
ok = has_first_cu && (c == first_cu || c == first_cu2);
|
||||
if (!ok && start_bits != NULL)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (c > 255) c = 255;
|
||||
#endif
|
||||
ok = (start_bits[c/8] & (1u << (c&7))) != 0;
|
||||
}
|
||||
}
|
||||
if (!ok) break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Not anchored. Advance to a unique first code unit if there is one. */
|
||||
|
||||
else
|
||||
{
|
||||
if (has_first_cu)
|
||||
{
|
||||
if (first_cu != first_cu2) /* Caseless */
|
||||
{
|
||||
/* In 16-bit and 32_bit modes we have to do our own search, so can
|
||||
look for both cases at once. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
PCRE2_UCHAR smc;
|
||||
while (start_match < end_subject &&
|
||||
(smc = UCHAR21TEST(start_match)) != first_cu &&
|
||||
smc != first_cu2)
|
||||
start_match++;
|
||||
#else
|
||||
/* In 8-bit mode, the use of memchr() gives a big speed up, even
|
||||
though we have to call it twice in order to find the earliest
|
||||
occurrence of the code unit in either of its cases. Caching is used
|
||||
to remember the positions of previously found code units. This can
|
||||
make a huge difference when the strings are very long and only one
|
||||
case is actually present. */
|
||||
|
||||
PCRE2_SPTR pp1 = NULL;
|
||||
PCRE2_SPTR pp2 = NULL;
|
||||
PCRE2_SIZE searchlength = end_subject - start_match;
|
||||
|
||||
/* If we haven't got a previously found position for first_cu, or if
|
||||
the current starting position is later, we need to do a search. If
|
||||
the code unit is not found, set it to the end. */
|
||||
|
||||
if (memchr_found_first_cu == NULL ||
|
||||
start_match > memchr_found_first_cu)
|
||||
{
|
||||
pp1 = memchr(start_match, first_cu, searchlength);
|
||||
memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1;
|
||||
}
|
||||
|
||||
/* If the start is before a previously found position, use the
|
||||
previous position, or NULL if a previous search failed. */
|
||||
|
||||
else pp1 = (memchr_found_first_cu == end_subject)? NULL :
|
||||
memchr_found_first_cu;
|
||||
|
||||
/* Do the same thing for the other case. */
|
||||
|
||||
if (memchr_found_first_cu2 == NULL ||
|
||||
start_match > memchr_found_first_cu2)
|
||||
{
|
||||
pp2 = memchr(start_match, first_cu2, searchlength);
|
||||
memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2;
|
||||
}
|
||||
|
||||
else pp2 = (memchr_found_first_cu2 == end_subject)? NULL :
|
||||
memchr_found_first_cu2;
|
||||
|
||||
/* Set the start to the end of the subject if neither case was found.
|
||||
Otherwise, use the earlier found point. */
|
||||
|
||||
if (pp1 == NULL)
|
||||
start_match = (pp2 == NULL)? end_subject : pp2;
|
||||
else
|
||||
start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
|
||||
|
||||
#endif /* 8-bit handling */
|
||||
}
|
||||
|
||||
/* The caseful case is much simpler. */
|
||||
|
||||
else
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
while (start_match < end_subject && UCHAR21TEST(start_match) !=
|
||||
first_cu)
|
||||
start_match++;
|
||||
#else /* 8-bit code units */
|
||||
start_match = memchr(start_match, first_cu, end_subject - start_match);
|
||||
if (start_match == NULL) start_match = end_subject;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* If we can't find the required code unit, having reached the true end
|
||||
of the subject, break the bumpalong loop, to force a match failure,
|
||||
except when doing partial matching, when we let the next cycle run at
|
||||
the end of the subject. To see why, consider the pattern /(?<=abc)def/,
|
||||
which partially matches "abc", even though the string does not contain
|
||||
the starting character "d". If we have not reached the true end of the
|
||||
subject (PCRE2_FIRSTLINE caused end_subject to be temporarily modified)
|
||||
we also let the cycle run, because the matching string is legitimately
|
||||
allowed to start with the first code unit of a newline. */
|
||||
|
||||
if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0 &&
|
||||
start_match >= mb->end_subject)
|
||||
break;
|
||||
}
|
||||
|
||||
/* If there's no first code unit, advance to just after a linebreak for a
|
||||
multiline match if required. */
|
||||
|
||||
else if (startline)
|
||||
{
|
||||
if (start_match > mb->start_subject + start_offset)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
||||
{
|
||||
start_match++;
|
||||
ACROSSCHAR(start_match < end_subject, start_match, start_match++);
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
||||
start_match++;
|
||||
|
||||
/* If we have just passed a CR and the newline option is ANY or
|
||||
ANYCRLF, and we are now at a LF, advance the match position by one
|
||||
more code unit. */
|
||||
|
||||
if (start_match[-1] == CHAR_CR &&
|
||||
(mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
|
||||
start_match < end_subject &&
|
||||
UCHAR21TEST(start_match) == CHAR_NL)
|
||||
start_match++;
|
||||
}
|
||||
}
|
||||
|
||||
/* If there's no first code unit or a requirement for a multiline line
|
||||
start, advance to a non-unique first code unit if any have been
|
||||
identified. The bitmap contains only 256 bits. When code units are 16 or
|
||||
32 bits wide, all code units greater than 254 set the 255 bit. */
|
||||
|
||||
else if (start_bits != NULL)
|
||||
{
|
||||
while (start_match < end_subject)
|
||||
{
|
||||
uint32_t c = UCHAR21TEST(start_match);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (c > 255) c = 255;
|
||||
#endif
|
||||
if ((start_bits[c/8] & (1u << (c&7))) != 0) break;
|
||||
start_match++;
|
||||
}
|
||||
|
||||
/* See comment above in first_cu checking about the next line. */
|
||||
|
||||
if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0 &&
|
||||
start_match >= mb->end_subject)
|
||||
break;
|
||||
}
|
||||
} /* End of first code unit handling */
|
||||
|
||||
/* Restore fudged end_subject */
|
||||
|
||||
end_subject = mb->end_subject;
|
||||
|
||||
/* The following two optimizations are disabled for partial matching. */
|
||||
|
||||
if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0)
|
||||
{
|
||||
PCRE2_SPTR p;
|
||||
|
||||
/* The minimum matching length is a lower bound; no actual string of that
|
||||
length may actually match the pattern. Although the value is, strictly,
|
||||
in characters, we treat it as code units to avoid spending too much time
|
||||
in this optimization. */
|
||||
|
||||
if (end_subject - start_match < re->minlength) goto NOMATCH_EXIT;
|
||||
|
||||
/* If req_cu is set, we know that that code unit must appear in the
|
||||
subject for the match to succeed. If the first code unit is set, req_cu
|
||||
must be later in the subject; otherwise the test starts at the match
|
||||
point. This optimization can save a huge amount of backtracking in
|
||||
patterns with nested unlimited repeats that aren't going to match.
|
||||
Writing separate code for cased/caseless versions makes it go faster, as
|
||||
does using an autoincrement and backing off on a match. As in the case of
|
||||
the first code unit, using memchr() in the 8-bit library gives a big
|
||||
speed up. Unlike the first_cu check above, we do not need to call
|
||||
memchr() twice in the caseless case because we only need to check for the
|
||||
presence of the character in either case, not find the first occurrence.
|
||||
|
||||
The search can be skipped if the code unit was found later than the
|
||||
current starting point in a previous iteration of the bumpalong loop.
|
||||
|
||||
HOWEVER: when the subject string is very, very long, searching to its end
|
||||
can take a long time, and give bad performance on quite ordinary
|
||||
patterns. This showed up when somebody was matching something like
|
||||
/^\d+C/ on a 32-megabyte string... so we don't do this when the string is
|
||||
sufficiently long, but it's worth searching a lot more for unanchored
|
||||
patterns. */
|
||||
|
||||
p = start_match + (has_first_cu? 1:0);
|
||||
if (has_req_cu && p > req_cu_ptr)
|
||||
{
|
||||
PCRE2_SIZE check_length = end_subject - start_match;
|
||||
|
||||
if (check_length < REQ_CU_MAX ||
|
||||
(!anchored && check_length < REQ_CU_MAX * 1000))
|
||||
{
|
||||
if (req_cu != req_cu2) /* Caseless */
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
while (p < end_subject)
|
||||
{
|
||||
uint32_t pp = UCHAR21INCTEST(p);
|
||||
if (pp == req_cu || pp == req_cu2) { p--; break; }
|
||||
}
|
||||
#else /* 8-bit code units */
|
||||
PCRE2_SPTR pp = p;
|
||||
p = memchr(pp, req_cu, end_subject - pp);
|
||||
if (p == NULL)
|
||||
{
|
||||
p = memchr(pp, req_cu2, end_subject - pp);
|
||||
if (p == NULL) p = end_subject;
|
||||
}
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
|
||||
}
|
||||
|
||||
/* The caseful case */
|
||||
|
||||
else
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
while (p < end_subject)
|
||||
{
|
||||
if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
|
||||
}
|
||||
|
||||
#else /* 8-bit code units */
|
||||
p = memchr(p, req_cu, end_subject - p);
|
||||
if (p == NULL) p = end_subject;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* If we can't find the required code unit, break the matching loop,
|
||||
forcing a match failure. */
|
||||
|
||||
if (p >= end_subject) break;
|
||||
|
||||
/* If we have found the required code unit, save the point where we
|
||||
found it, so that we don't search again next time round the loop if
|
||||
the start hasn't passed this code unit yet. */
|
||||
|
||||
req_cu_ptr = p;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------ End of start of match optimizations ------------ */
|
||||
|
||||
/* Give no match if we have passed the bumpalong limit. */
|
||||
|
||||
if (start_match > bumpalong_limit) break;
|
||||
|
||||
/* OK, now we can do the business */
|
||||
|
||||
mb->start_used_ptr = start_match;
|
||||
mb->last_used_ptr = start_match;
|
||||
mb->recursive = NULL;
|
||||
|
||||
rc = internal_dfa_match(
|
||||
mb, /* fixed match data */
|
||||
mb->start_code, /* this subexpression's code */
|
||||
start_match, /* where we currently are */
|
||||
start_offset, /* start offset in subject */
|
||||
match_data->ovector, /* offset vector */
|
||||
(uint32_t)match_data->oveccount * 2, /* actual size of same */
|
||||
workspace, /* workspace vector */
|
||||
(int)wscount, /* size of same */
|
||||
0, /* function recurse level */
|
||||
base_recursion_workspace); /* initial workspace for recursion */
|
||||
|
||||
/* Anything other than "no match" means we are done, always; otherwise, carry
|
||||
on only if not anchored. */
|
||||
|
||||
if (rc != PCRE2_ERROR_NOMATCH || anchored)
|
||||
{
|
||||
if (rc == PCRE2_ERROR_PARTIAL && match_data->oveccount > 0)
|
||||
{
|
||||
match_data->ovector[0] = (PCRE2_SIZE)(start_match - subject);
|
||||
match_data->ovector[1] = (PCRE2_SIZE)(end_subject - subject);
|
||||
}
|
||||
match_data->subject_length = length;
|
||||
match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject);
|
||||
match_data->rightchar = (PCRE2_SIZE)(mb->last_used_ptr - subject);
|
||||
match_data->startchar = (PCRE2_SIZE)(start_match - subject);
|
||||
match_data->rc = rc;
|
||||
|
||||
if (rc >= 0 &&(options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
|
||||
{
|
||||
length = CU2BYTES(length + was_zero_terminated);
|
||||
match_data->subject = match_data->memctl.malloc(length,
|
||||
match_data->memctl.memory_data);
|
||||
if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
memcpy((void *)match_data->subject, subject, length);
|
||||
match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rc >= 0 || rc == PCRE2_ERROR_PARTIAL) match_data->subject = subject;
|
||||
}
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* Advance to the next subject character unless we are at the end of a line
|
||||
and firstline is set. */
|
||||
|
||||
if (firstline && IS_NEWLINE(start_match)) break;
|
||||
start_match++;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
ACROSSCHAR(start_match < end_subject, start_match, start_match++);
|
||||
}
|
||||
#endif
|
||||
if (start_match > end_subject) break;
|
||||
|
||||
/* If we have just passed a CR and we are now at a LF, and the pattern does
|
||||
not contain any explicit matches for \r or \n, and the newline option is CRLF
|
||||
or ANY or ANYCRLF, advance the match position by one more character. */
|
||||
|
||||
if (UCHAR21TEST(start_match - 1) == CHAR_CR &&
|
||||
start_match < end_subject &&
|
||||
UCHAR21TEST(start_match) == CHAR_NL &&
|
||||
(re->flags & PCRE2_HASCRORLF) == 0 &&
|
||||
(mb->nltype == NLTYPE_ANY ||
|
||||
mb->nltype == NLTYPE_ANYCRLF ||
|
||||
mb->nllen == 2))
|
||||
start_match++;
|
||||
|
||||
} /* "Bumpalong" loop */
|
||||
|
||||
NOMATCH_EXIT:
|
||||
rc = PCRE2_ERROR_NOMATCH;
|
||||
|
||||
EXIT:
|
||||
while (rws->next != NULL)
|
||||
{
|
||||
RWS_anchor *next = rws->next;
|
||||
rws->next = next->next;
|
||||
mb->memctl.free(next, mb->memctl.memory_data);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* These #undefs are here to enable unity builds with CMake. */
|
||||
|
||||
#undef NLBLOCK /* Block containing newline information */
|
||||
#undef PSSTART /* Field containing processed string start */
|
||||
#undef PSEND /* Field containing processed string end */
|
||||
|
||||
/* End of pcre2_dfa_match.c */
|
||||
@@ -1,297 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This is a freestanding support program to generate a file containing
|
||||
character tables for PCRE2. The tables are built using the pcre2_maketables()
|
||||
function, which is part of the PCRE2 API. By default, the system's "C" locale
|
||||
is used rather than what the building user happens to have set, but the -L
|
||||
option can be used to select the current locale from the LC_ALL environment
|
||||
variable. By default, the tables are written in source form, but if -b is
|
||||
given, they are written in binary. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <locale.h>
|
||||
|
||||
#define PCRE2_DFTABLES /* for pcre2_internal.h, pcre2_maketables.c */
|
||||
|
||||
#define PCRE2_CODE_UNIT_WIDTH 0 /* Must be set, but not relevant here */
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#include "pcre2_maketables.c"
|
||||
|
||||
|
||||
static const char *classlist[] =
|
||||
{
|
||||
"space", "xdigit", "digit", "upper", "lower",
|
||||
"word", "graph", "print", "punct", "cntrl"
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Usage *
|
||||
*************************************************/
|
||||
|
||||
static void
|
||||
usage(void)
|
||||
{
|
||||
(void)fprintf(stderr,
|
||||
"Usage: pcre2_dftables [options] <output file>\n"
|
||||
" -b Write output in binary (default is source code)\n"
|
||||
" -L Use locale from LC_ALL (default is \"C\" locale)\n"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Entry point *
|
||||
*************************************************/
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
FILE *f;
|
||||
int i;
|
||||
int nclass = 0;
|
||||
BOOL binary = FALSE;
|
||||
char *env = (char *)"C";
|
||||
const uint8_t *tables;
|
||||
const uint8_t *base_of_tables;
|
||||
|
||||
/* Process options */
|
||||
|
||||
for (i = 1; i < argc; i++)
|
||||
{
|
||||
char *arg = argv[i];
|
||||
if (*arg != '-') break;
|
||||
|
||||
if (strcmp(arg, "-help") == 0 || strcmp(arg, "--help") == 0)
|
||||
{
|
||||
usage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
else if (strcmp(arg, "-L") == 0)
|
||||
{
|
||||
if (setlocale(LC_ALL, "") == NULL)
|
||||
{
|
||||
(void)fprintf(stderr, "pcre2_dftables: setlocale() failed\n");
|
||||
return 1;
|
||||
}
|
||||
env = getenv("LC_ALL");
|
||||
}
|
||||
|
||||
else if (strcmp(arg, "-b") == 0)
|
||||
binary = TRUE;
|
||||
|
||||
else
|
||||
{
|
||||
(void)fprintf(stderr, "pcre2_dftables: unrecognized option %s\n", arg);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (i != argc - 1)
|
||||
{
|
||||
(void)fprintf(stderr, "pcre2_dftables: one filename argument is required\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Make the tables */
|
||||
|
||||
tables = maketables();
|
||||
base_of_tables = tables;
|
||||
|
||||
f = fopen(argv[i], "wb");
|
||||
if (f == NULL)
|
||||
{
|
||||
fprintf(stderr, "pcre2_dftables: failed to open %s for writing\n", argv[1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* If -b was specified, we write the tables in binary. */
|
||||
|
||||
if (binary)
|
||||
{
|
||||
int yield = 0;
|
||||
size_t len = fwrite(tables, 1, TABLES_LENGTH, f);
|
||||
if (len != TABLES_LENGTH)
|
||||
{
|
||||
(void)fprintf(stderr, "pcre2_dftables: fwrite() returned wrong length %d "
|
||||
"instead of %d\n", (int)len, TABLES_LENGTH);
|
||||
yield = 1;
|
||||
}
|
||||
fclose(f);
|
||||
free((void *)base_of_tables);
|
||||
return yield;
|
||||
}
|
||||
|
||||
/* Write the tables as source code for inclusion in the PCRE2 library. There
|
||||
are several fprintf() calls here, because gcc in pedantic mode complains about
|
||||
the very long string otherwise. */
|
||||
|
||||
(void)fprintf(f,
|
||||
"/*************************************************\n"
|
||||
"* Perl-Compatible Regular Expressions *\n"
|
||||
"*************************************************/\n\n"
|
||||
"/* This file was automatically written by the pcre2_dftables auxiliary\n"
|
||||
"program. It contains character tables that are used when no external\n"
|
||||
"tables are passed to PCRE2 by the application that calls it. The tables\n"
|
||||
"are used only for characters whose code values are less than 256, and\n"
|
||||
"only relevant if not in UCP mode. */\n\n");
|
||||
|
||||
(void)fprintf(f,
|
||||
"/* This set of tables was written in the %s locale. */\n\n", env);
|
||||
|
||||
(void)fprintf(f,
|
||||
"/* The pcre2_ftables program (which is distributed with PCRE2) can be used\n"
|
||||
"to build alternative versions of this file. This is necessary if you are\n"
|
||||
"running in an EBCDIC environment, or if you want to default to a different\n"
|
||||
"encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates\n"
|
||||
"these tables in the \"C\" locale by default. This happens automatically if\n"
|
||||
"PCRE2 is configured with --enable-rebuild-chartables. However, you can run\n"
|
||||
"pcre2_dftables manually with the -L option to build tables using the LC_ALL\n"
|
||||
"locale. */\n\n");
|
||||
|
||||
/* Force config.h in z/OS */
|
||||
|
||||
#if defined NATIVE_ZOS
|
||||
(void)fprintf(f,
|
||||
"/* For z/OS, config.h is forced */\n"
|
||||
"#ifndef HAVE_CONFIG_H\n"
|
||||
"#define HAVE_CONFIG_H 1\n"
|
||||
"#endif\n\n");
|
||||
#endif
|
||||
|
||||
(void)fprintf(f,
|
||||
"#ifdef HAVE_CONFIG_H\n"
|
||||
"#include \"config.h\"\n"
|
||||
"#endif\n\n"
|
||||
"#include \"pcre2_internal.h\"\n\n");
|
||||
|
||||
(void)fprintf(f,
|
||||
"const uint8_t PRIV(default_tables)[] = {\n\n"
|
||||
"/* This table is a lower casing table. */\n\n");
|
||||
|
||||
(void)fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||
fprintf(f, "%3d", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
(void)fprintf(f, ",\n\n");
|
||||
|
||||
(void)fprintf(f, "/* This table is a case flipping table. */\n\n");
|
||||
|
||||
(void)fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||
fprintf(f, "%3d", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
(void)fprintf(f, ",\n\n");
|
||||
|
||||
(void)fprintf(f,
|
||||
"/* This table contains bit maps for various character classes. Each map is 32\n"
|
||||
"bytes long and the bits run from the least significant end of each byte. The\n"
|
||||
"classes that have their own maps are: space, xdigit, digit, upper, lower, word,\n"
|
||||
"graph, print, punct, and cntrl. Other classes are built from combinations. */\n\n");
|
||||
|
||||
(void)fprintf(f, " ");
|
||||
for (i = 0; i < cbit_length; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0)
|
||||
{
|
||||
if ((i & 31) == 0) (void)fprintf(f, "\n");
|
||||
if ((i & 24) == 8) (void)fprintf(f, " /* %s */", classlist[nclass++]);
|
||||
(void)fprintf(f, "\n ");
|
||||
}
|
||||
(void)fprintf(f, "0x%02x", *tables++);
|
||||
if (i != cbit_length - 1) (void)fprintf(f, ",");
|
||||
}
|
||||
(void)fprintf(f, ",\n\n");
|
||||
|
||||
(void)fprintf(f,
|
||||
"/* This table identifies various classes of character by individual bits:\n"
|
||||
" 0x%02x white space character\n"
|
||||
" 0x%02x letter\n"
|
||||
" 0x%02x lower case letter\n"
|
||||
" 0x%02x decimal digit\n"
|
||||
" 0x%02x word (alphanumeric or '_')\n*/\n\n",
|
||||
ctype_space, ctype_letter, ctype_lcletter, ctype_digit, ctype_word);
|
||||
|
||||
(void)fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0)
|
||||
{
|
||||
(void)fprintf(f, " /* ");
|
||||
if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
|
||||
else (void)fprintf(f, "%3d-", i-8);
|
||||
if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
|
||||
else (void)fprintf(f, "%3d", i-1);
|
||||
(void)fprintf(f, " */\n ");
|
||||
}
|
||||
(void)fprintf(f, "0x%02x", *tables++);
|
||||
if (i != 255) (void)fprintf(f, ",");
|
||||
}
|
||||
|
||||
(void)fprintf(f, "};/* ");
|
||||
if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
|
||||
else (void)fprintf(f, "%3d-", i-8);
|
||||
if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
|
||||
else (void)fprintf(f, "%3d", i-1);
|
||||
(void)fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n");
|
||||
|
||||
fclose(f);
|
||||
free((void *)base_of_tables);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre2_dftables.c */
|
||||
@@ -1,367 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#define STRING(a) # a
|
||||
#define XSTRING(s) STRING(s)
|
||||
|
||||
/* The texts of compile-time error messages. Compile-time error numbers start
|
||||
at COMPILE_ERROR_BASE (100).
|
||||
|
||||
This used to be a table of strings, but in order to reduce the number of
|
||||
relocations needed when a shared library is loaded dynamically, it is now one
|
||||
long string. We cannot use a table of offsets, because the lengths of inserts
|
||||
such as XSTRING(MAX_NAME_SIZE) are not known. Instead,
|
||||
pcre2_get_error_message() counts through to the one it wants - this isn't a
|
||||
performance issue because these strings are used only when there is an error.
|
||||
|
||||
Each substring ends with \0 to insert a null character. This includes the final
|
||||
substring, so that the whole string ends with \0\0, which can be detected when
|
||||
counting through. */
|
||||
|
||||
static const unsigned char compile_error_texts[] =
|
||||
"no error\0"
|
||||
"\\ at end of pattern\0"
|
||||
"\\c at end of pattern\0"
|
||||
"unrecognized character follows \\\0"
|
||||
"numbers out of order in {} quantifier\0"
|
||||
/* 5 */
|
||||
"number too big in {} quantifier\0"
|
||||
"missing terminating ] for character class\0"
|
||||
"escape sequence is invalid in character class\0"
|
||||
"range out of order in character class\0"
|
||||
"quantifier does not follow a repeatable item\0"
|
||||
/* 10 */
|
||||
"internal error: unexpected repeat\0"
|
||||
"unrecognized character after (? or (?-\0"
|
||||
"POSIX named classes are supported only within a class\0"
|
||||
"POSIX collating elements are not supported\0"
|
||||
"missing closing parenthesis\0"
|
||||
/* 15 */
|
||||
"reference to non-existent subpattern\0"
|
||||
"pattern passed as NULL with non-zero length\0"
|
||||
"unrecognised compile-time option bit(s)\0"
|
||||
"missing ) after (?# comment\0"
|
||||
"parentheses are too deeply nested\0"
|
||||
/* 20 */
|
||||
"regular expression is too large\0"
|
||||
"failed to allocate heap memory\0"
|
||||
"unmatched closing parenthesis\0"
|
||||
"internal error: code overflow\0"
|
||||
"missing closing parenthesis for condition\0"
|
||||
/* 25 */
|
||||
"length of lookbehind assertion is not limited\0"
|
||||
"a relative value of zero is not allowed\0"
|
||||
"conditional subpattern contains more than two branches\0"
|
||||
"atomic assertion expected after (?( or (?(?C)\0"
|
||||
"digit expected after (?+ or (?-\0"
|
||||
/* 30 */
|
||||
"unknown POSIX class name\0"
|
||||
"internal error in pcre2_study(): should not occur\0"
|
||||
"this version of PCRE2 does not have Unicode support\0"
|
||||
"parentheses are too deeply nested (stack check)\0"
|
||||
"character code point value in \\x{} or \\o{} is too large\0"
|
||||
/* 35 */
|
||||
"lookbehind is too complicated\0"
|
||||
"\\C is not allowed in a lookbehind assertion in UTF-" XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0"
|
||||
"PCRE2 does not support \\F, \\L, \\l, \\N{name}, \\U, or \\u\0"
|
||||
"number after (?C is greater than 255\0"
|
||||
"closing parenthesis for (?C expected\0"
|
||||
/* 40 */
|
||||
"invalid escape sequence in (*VERB) name\0"
|
||||
"unrecognized character after (?P\0"
|
||||
"syntax error in subpattern name (missing terminator?)\0"
|
||||
"two named subpatterns have the same name (PCRE2_DUPNAMES not set)\0"
|
||||
"subpattern name must start with a non-digit\0"
|
||||
/* 45 */
|
||||
"this version of PCRE2 does not have support for \\P, \\p, or \\X\0"
|
||||
"malformed \\P or \\p sequence\0"
|
||||
"unknown property after \\P or \\p\0"
|
||||
"subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " code units)\0"
|
||||
"too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
|
||||
/* 50 */
|
||||
"invalid range in character class\0"
|
||||
"octal value is greater than \\377 in 8-bit non-UTF-8 mode\0"
|
||||
"internal error: overran compiling workspace\0"
|
||||
"internal error: previously-checked referenced subpattern not found\0"
|
||||
"DEFINE subpattern contains more than one branch\0"
|
||||
/* 55 */
|
||||
"missing opening brace after \\o\0"
|
||||
"internal error: unknown newline setting\0"
|
||||
"\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
|
||||
"(?R (recursive pattern call) must be followed by a closing parenthesis\0"
|
||||
/* "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" */
|
||||
"obsolete error (should not occur)\0" /* Was the above */
|
||||
/* 60 */
|
||||
"(*VERB) not recognized or malformed\0"
|
||||
"subpattern number is too big\0"
|
||||
"subpattern name expected\0"
|
||||
"internal error: parsed pattern overflow\0"
|
||||
"non-octal character in \\o{} (closing brace missing?)\0"
|
||||
/* 65 */
|
||||
"different names for subpatterns of the same number are not allowed\0"
|
||||
"(*MARK) must have an argument\0"
|
||||
"non-hex character in \\x{} (closing brace missing?)\0"
|
||||
#ifndef EBCDIC
|
||||
"\\c must be followed by a printable ASCII character\0"
|
||||
#else
|
||||
"\\c must be followed by a letter or one of [\\]^_?\0"
|
||||
#endif
|
||||
"\\k is not followed by a braced, angle-bracketed, or quoted name\0"
|
||||
/* 70 */
|
||||
"internal error: unknown meta code in check_lookbehinds()\0"
|
||||
"\\N is not supported in a class\0"
|
||||
"callout string is too long\0"
|
||||
"disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
|
||||
"using UTF is disabled by the application\0"
|
||||
/* 75 */
|
||||
"using UCP is disabled by the application\0"
|
||||
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
|
||||
"character code point value in \\u.... sequence is too large\0"
|
||||
"digits missing after \\x or in \\x{} or \\o{} or \\N{U+}\0"
|
||||
"syntax error or number too big in (?(VERSION condition\0"
|
||||
/* 80 */
|
||||
"internal error: unknown opcode in auto_possessify()\0"
|
||||
"missing terminating delimiter for callout with string argument\0"
|
||||
"unrecognized string delimiter follows (?C\0"
|
||||
"using \\C is disabled by the application\0"
|
||||
"(?| and/or (?J: or (?x: parentheses are too deeply nested\0"
|
||||
/* 85 */
|
||||
"using \\C is disabled in this PCRE2 library\0"
|
||||
"regular expression is too complicated\0"
|
||||
"lookbehind assertion is too long\0"
|
||||
"pattern string is longer than the limit set by the application\0"
|
||||
"internal error: unknown code in parsed pattern\0"
|
||||
/* 90 */
|
||||
"internal error: bad code value in parsed_skip()\0"
|
||||
"PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0"
|
||||
"invalid option bits with PCRE2_LITERAL\0"
|
||||
"\\N{U+dddd} is supported only in Unicode (UTF) mode\0"
|
||||
"invalid hyphen in option setting\0"
|
||||
/* 95 */
|
||||
"(*alpha_assertion) not recognized\0"
|
||||
"script runs require Unicode support, which this version of PCRE2 does not have\0"
|
||||
"too many capturing groups (maximum 65535)\0"
|
||||
"octal digit missing after \\0 (PCRE2_EXTRA_NO_BS0 is set)\0"
|
||||
"\\K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)\0"
|
||||
/* 100 */
|
||||
"branch too long in variable-length lookbehind assertion\0"
|
||||
"compiled pattern would be longer than the limit set by the application\0"
|
||||
"octal value given by \\ddd is greater than \\377 (forbidden by PCRE2_EXTRA_PYTHON_OCTAL)\0"
|
||||
"using callouts is disabled by the application\0"
|
||||
"PCRE2_EXTRA_TURKISH_CASING require Unicode (UTF or UCP) mode\0"
|
||||
/* 105 */
|
||||
"PCRE2_EXTRA_TURKISH_CASING requires UTF in 8-bit mode\0"
|
||||
"PCRE2_EXTRA_TURKISH_CASING and PCRE2_EXTRA_CASELESS_RESTRICT are not compatible\0"
|
||||
"extended character class nesting is too deep\0"
|
||||
"invalid operator in extended character class\0"
|
||||
"unexpected operator in extended character class (no preceding operand)\0"
|
||||
/* 110 */
|
||||
"expected operand after operator in extended character class\0"
|
||||
"square brackets needed to clarify operator precedence in extended character class\0"
|
||||
"missing terminating ] for extended character class (note '[' must be escaped under PCRE2_ALT_EXTENDED_CLASS)\0"
|
||||
"unexpected expression in extended character class (no preceding operator)\0"
|
||||
"empty expression in extended character class\0"
|
||||
/* 115 */
|
||||
"terminating ] with no following closing parenthesis in (?[...]\0"
|
||||
"unexpected character in (?[...]) extended character class\0"
|
||||
;
|
||||
|
||||
/* Match-time and UTF error texts are in the same format. */
|
||||
|
||||
static const unsigned char match_error_texts[] =
|
||||
"no error\0"
|
||||
"no match\0"
|
||||
"partial match\0"
|
||||
"UTF-8 error: 1 byte missing at end\0"
|
||||
"UTF-8 error: 2 bytes missing at end\0"
|
||||
/* 5 */
|
||||
"UTF-8 error: 3 bytes missing at end\0"
|
||||
"UTF-8 error: 4 bytes missing at end\0"
|
||||
"UTF-8 error: 5 bytes missing at end\0"
|
||||
"UTF-8 error: byte 2 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 3 top bits not 0x80\0"
|
||||
/* 10 */
|
||||
"UTF-8 error: byte 4 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 5 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 6 top bits not 0x80\0"
|
||||
"UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"
|
||||
"UTF-8 error: 6-byte character is not allowed (RFC 3629)\0"
|
||||
/* 15 */
|
||||
"UTF-8 error: code points greater than 0x10ffff are not defined\0"
|
||||
"UTF-8 error: code points 0xd800-0xdfff are not defined\0"
|
||||
"UTF-8 error: overlong 2-byte sequence\0"
|
||||
"UTF-8 error: overlong 3-byte sequence\0"
|
||||
"UTF-8 error: overlong 4-byte sequence\0"
|
||||
/* 20 */
|
||||
"UTF-8 error: overlong 5-byte sequence\0"
|
||||
"UTF-8 error: overlong 6-byte sequence\0"
|
||||
"UTF-8 error: isolated byte with 0x80 bit set\0"
|
||||
"UTF-8 error: illegal byte (0xfe or 0xff)\0"
|
||||
"UTF-16 error: missing low surrogate at end\0"
|
||||
/* 25 */
|
||||
"UTF-16 error: invalid low surrogate\0"
|
||||
"UTF-16 error: isolated low surrogate\0"
|
||||
"UTF-32 error: code points 0xd800-0xdfff are not defined\0"
|
||||
"UTF-32 error: code points greater than 0x10ffff are not defined\0"
|
||||
"bad data value\0"
|
||||
/* 30 */
|
||||
"patterns do not all use the same character tables\0"
|
||||
"magic number missing\0"
|
||||
"pattern compiled in wrong mode: 8/16/32-bit error\0"
|
||||
"bad offset value\0"
|
||||
"bad option value\0"
|
||||
/* 35 */
|
||||
"invalid replacement string\0"
|
||||
"bad offset into UTF string\0"
|
||||
"callout error code\0" /* Never returned by PCRE2 itself */
|
||||
"invalid data in workspace for DFA restart\0"
|
||||
"too much recursion for DFA matching\0"
|
||||
/* 40 */
|
||||
"backreference condition or recursion test is not supported for DFA matching\0"
|
||||
"function is not supported for DFA matching\0"
|
||||
"pattern contains an item that is not supported for DFA matching\0"
|
||||
"workspace size exceeded in DFA matching\0"
|
||||
"internal error - pattern overwritten?\0"
|
||||
/* 45 */
|
||||
"bad JIT option\0"
|
||||
"JIT stack limit reached\0"
|
||||
"match limit exceeded\0"
|
||||
"no more memory\0"
|
||||
"unknown substring\0"
|
||||
/* 50 */
|
||||
"non-unique substring name\0"
|
||||
"NULL argument passed with non-zero length\0"
|
||||
"nested recursion at the same subject position\0"
|
||||
"matching depth limit exceeded\0"
|
||||
"requested value is not available\0"
|
||||
/* 55 */
|
||||
"requested value is not set\0"
|
||||
"offset limit set without PCRE2_USE_OFFSET_LIMIT\0"
|
||||
"bad escape sequence in replacement string\0"
|
||||
"expected closing curly bracket in replacement string\0"
|
||||
"bad substitution in replacement string\0"
|
||||
/* 60 */
|
||||
"match with end before start or start moved backwards is not supported\0"
|
||||
"too many replacements (more than INT_MAX)\0"
|
||||
"bad serialized data\0"
|
||||
"heap limit exceeded\0"
|
||||
"invalid syntax\0"
|
||||
/* 65 */
|
||||
"internal error - duplicate substitution match\0"
|
||||
"PCRE2_MATCH_INVALID_UTF is not supported for DFA matching\0"
|
||||
"INTERNAL ERROR: invalid substring offset\0"
|
||||
"feature is not supported by the JIT compiler\0"
|
||||
"error performing replacement case transformation\0"
|
||||
/* 70 */
|
||||
"replacement too large (longer than PCRE2_SIZE)\0"
|
||||
;
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return error message *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies an error message into a buffer whose units are of an
|
||||
appropriate width. Error numbers are positive for compile-time errors, and
|
||||
negative for match-time errors (except for UTF errors), but the numbers are all
|
||||
distinct.
|
||||
|
||||
Arguments:
|
||||
enumber error number
|
||||
buffer where to put the message (zero terminated)
|
||||
size size of the buffer in code units
|
||||
|
||||
Returns: length of message if all is well
|
||||
negative on error
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, PCRE2_SIZE size)
|
||||
{
|
||||
const unsigned char *message;
|
||||
PCRE2_SIZE i;
|
||||
int n;
|
||||
|
||||
if (size == 0) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
if (enumber >= COMPILE_ERROR_BASE) /* Compile error */
|
||||
{
|
||||
message = compile_error_texts;
|
||||
n = enumber - COMPILE_ERROR_BASE;
|
||||
}
|
||||
else if (enumber < 0) /* Match or UTF error */
|
||||
{
|
||||
message = match_error_texts;
|
||||
n = -enumber;
|
||||
}
|
||||
else /* Invalid error number */
|
||||
{
|
||||
message = (const unsigned char *)"\0"; /* Empty message list */
|
||||
n = 1;
|
||||
}
|
||||
|
||||
for (; n > 0; n--)
|
||||
{
|
||||
while (*message++ != CHAR_NUL) {};
|
||||
if (*message == CHAR_NUL) return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
|
||||
for (i = 0; *message != 0; i++)
|
||||
{
|
||||
if (i >= size - 1)
|
||||
{
|
||||
buffer[i] = 0; /* Terminate partial message */
|
||||
return PCRE2_ERROR_NOMEMORY;
|
||||
}
|
||||
buffer[i] = *message++;
|
||||
}
|
||||
|
||||
buffer[i] = 0;
|
||||
return (int)i;
|
||||
}
|
||||
|
||||
/* End of pcre2_error.c */
|
||||
@@ -1,162 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains an internal function that is used to match a Unicode
|
||||
extended grapheme sequence. It is used by both pcre2_match() and
|
||||
pcre2_dfa_match(). However, it is called only when Unicode support is being
|
||||
compiled. Nevertheless, we provide a dummy function when there is no Unicode
|
||||
support, because some compilers do not like functionless source files. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/* Dummy function */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
PCRE2_SPTR
|
||||
PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
|
||||
PCRE2_SPTR end_subject, BOOL utf, int *xcount)
|
||||
{
|
||||
(void)c;
|
||||
(void)eptr;
|
||||
(void)start_subject;
|
||||
(void)end_subject;
|
||||
(void)utf;
|
||||
(void)xcount;
|
||||
return NULL;
|
||||
}
|
||||
#else
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match an extended grapheme sequence *
|
||||
*************************************************/
|
||||
|
||||
/* NOTE: The logic contained in this function is replicated in three special-
|
||||
purpose functions in the pcre2_jit_compile.c module. If the logic below is
|
||||
changed, they must be kept in step so that the interpreter and the JIT have the
|
||||
same behaviour.
|
||||
|
||||
Arguments:
|
||||
c the first character
|
||||
eptr pointer to next character
|
||||
start_subject pointer to start of subject
|
||||
end_subject pointer to end of subject
|
||||
utf TRUE if in UTF mode
|
||||
xcount pointer to count of additional characters,
|
||||
or NULL if count not needed
|
||||
|
||||
Returns: pointer after the end of the sequence
|
||||
*/
|
||||
|
||||
PCRE2_SPTR
|
||||
PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
|
||||
PCRE2_SPTR end_subject, BOOL utf, int *xcount)
|
||||
{
|
||||
BOOL was_ep_ZWJ = FALSE;
|
||||
int lgb = UCD_GRAPHBREAK(c);
|
||||
|
||||
while (eptr < end_subject)
|
||||
{
|
||||
int rgb;
|
||||
int len = 1;
|
||||
if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
|
||||
rgb = UCD_GRAPHBREAK(c);
|
||||
if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
|
||||
|
||||
/* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
|
||||
preceded by Extended Pictographic. */
|
||||
|
||||
if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
|
||||
break;
|
||||
|
||||
/* Not breaking between Regional Indicators is allowed only if there
|
||||
are an even number of preceding RIs. */
|
||||
|
||||
if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
|
||||
{
|
||||
int ricount = 0;
|
||||
PCRE2_SPTR bptr = eptr - 1;
|
||||
if (utf) BACKCHAR(bptr);
|
||||
|
||||
/* bptr is pointing to the left-hand character */
|
||||
|
||||
while (bptr > start_subject)
|
||||
{
|
||||
bptr--;
|
||||
if (utf)
|
||||
{
|
||||
BACKCHAR(bptr);
|
||||
GETCHAR(c, bptr);
|
||||
}
|
||||
else
|
||||
c = *bptr;
|
||||
if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
|
||||
ricount++;
|
||||
}
|
||||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
||||
}
|
||||
|
||||
/* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
|
||||
between; see next statement). */
|
||||
|
||||
was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
|
||||
|
||||
/* If Extend follows Extended_Pictographic, do not update lgb; this allows
|
||||
any number of them before a following ZWJ. */
|
||||
|
||||
if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) lgb = rgb;
|
||||
|
||||
eptr += len;
|
||||
if (xcount != NULL) *xcount += 1;
|
||||
}
|
||||
|
||||
return eptr;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* End of pcre2_extuni.c */
|
||||
@@ -1,220 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains a single function that scans through a compiled pattern
|
||||
until it finds a capturing bracket with the given number, or, if the number is
|
||||
negative, an instance of OP_REVERSE or OP_VREVERSE for a lookbehind. The
|
||||
function is called from pcre2_compile.c and also from pcre2_study.c when
|
||||
finding the minimum matching length. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Scan compiled regex for specific bracket *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
code points to start of expression
|
||||
utf TRUE in UTF mode
|
||||
number the required bracket number or negative to find a lookbehind
|
||||
|
||||
Returns: pointer to the opcode for the bracket, or NULL if not found
|
||||
*/
|
||||
|
||||
PCRE2_SPTR
|
||||
PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
PCRE2_UCHAR c = *code;
|
||||
|
||||
if (c == OP_END) return NULL;
|
||||
|
||||
/* XCLASS is used for classes that cannot be represented just by a bit map.
|
||||
This includes negated single high-valued characters. ECLASS is used for
|
||||
classes that use set operations internally. CALLOUT_STR is used for
|
||||
callouts with string arguments. In each case the length in the table is
|
||||
zero; the actual length is stored in the compiled code. */
|
||||
|
||||
if (c == OP_XCLASS || c == OP_ECLASS) code += GET(code, 1);
|
||||
else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
|
||||
|
||||
/* Handle lookbehind */
|
||||
|
||||
else if (c == OP_REVERSE || c == OP_VREVERSE)
|
||||
{
|
||||
if (number < 0) return code;
|
||||
code += PRIV(OP_lengths)[c];
|
||||
}
|
||||
|
||||
/* Handle capturing bracket */
|
||||
|
||||
else if (c == OP_CBRA || c == OP_SCBRA ||
|
||||
c == OP_CBRAPOS || c == OP_SCBRAPOS)
|
||||
{
|
||||
int n = (int)GET2(code, 1+LINK_SIZE);
|
||||
if (n == number) return code;
|
||||
code += PRIV(OP_lengths)[c];
|
||||
}
|
||||
|
||||
/* Otherwise, we can get the item's length from the table, except that for
|
||||
repeated character types, we have to test for \p and \P, which have an extra
|
||||
two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
|
||||
must add in its length. */
|
||||
|
||||
else
|
||||
{
|
||||
switch(c)
|
||||
{
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEPOSQUERY:
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
|
||||
break;
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEPOSUPTO:
|
||||
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
|
||||
code += 2;
|
||||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
code += code[1];
|
||||
break;
|
||||
}
|
||||
|
||||
/* Add in the fixed length from the table */
|
||||
|
||||
code += PRIV(OP_lengths)[c];
|
||||
|
||||
/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
|
||||
followed by a multi-byte character. The length in the table is a minimum, so
|
||||
we have to arrange to skip the extra bytes. */
|
||||
|
||||
#ifdef MAYBE_UTF_MULTI
|
||||
if (utf) switch(c)
|
||||
{
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
case OP_NOT:
|
||||
case OP_NOTI:
|
||||
case OP_EXACT:
|
||||
case OP_EXACTI:
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTEXACTI:
|
||||
case OP_UPTO:
|
||||
case OP_UPTOI:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_MINUPTO:
|
||||
case OP_MINUPTOI:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_POSUPTO:
|
||||
case OP_POSUPTOI:
|
||||
case OP_NOTPOSUPTO:
|
||||
case OP_NOTPOSUPTOI:
|
||||
case OP_STAR:
|
||||
case OP_STARI:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTSTARI:
|
||||
case OP_MINSTAR:
|
||||
case OP_MINSTARI:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_POSSTAR:
|
||||
case OP_POSSTARI:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_PLUS:
|
||||
case OP_PLUSI:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_MINPLUS:
|
||||
case OP_MINPLUSI:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_POSPLUS:
|
||||
case OP_POSPLUSI:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_QUERY:
|
||||
case OP_QUERYI:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_MINQUERY:
|
||||
case OP_MINQUERYI:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_POSQUERY:
|
||||
case OP_POSQUERYI:
|
||||
case OP_NOTPOSQUERY:
|
||||
case OP_NOTPOSQUERYI:
|
||||
if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
|
||||
break;
|
||||
}
|
||||
#else
|
||||
(void)(utf); /* Keep compiler happy by referencing function argument */
|
||||
#endif /* MAYBE_UTF_MULTI */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_find_bracket.c */
|
||||
@@ -1,804 +0,0 @@
|
||||
/***************************************************************************
|
||||
Fuzzer driver for PCRE2. Given an arbitrary string of bytes and a length, it
|
||||
tries to compile and match it, deriving options from the string itself. If
|
||||
STANDALONE is defined, a main program that calls the driver with the contents
|
||||
of specified files is compiled, and commentary on what is happening is output.
|
||||
If an argument starts with '=' the rest of it it is taken as a literal string
|
||||
rather than a file name. This allows easy testing of short strings.
|
||||
|
||||
Written by Philip Hazel, October 2016
|
||||
Updated February 2024 (Addison Crump added 16-bit/32-bit and JIT support)
|
||||
Further updates March/April/May 2024 by PH
|
||||
***************************************************************************/
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
/* stack size adjustment */
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
|
||||
#define STACK_SIZE_MB 256
|
||||
#define JIT_SIZE_LIMIT (200 * 1024)
|
||||
|
||||
#ifndef PCRE2_CODE_UNIT_WIDTH
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
#endif
|
||||
|
||||
#include "config.h"
|
||||
#include "pcre2.h"
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#define MAX_MATCH_SIZE 1000
|
||||
|
||||
#define DFA_WORKSPACE_COUNT 100
|
||||
|
||||
/* When adding new compile or match options, remember to update the functions
|
||||
below that output them. */
|
||||
|
||||
#define ALLOWED_COMPILE_OPTIONS \
|
||||
(PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
|
||||
PCRE2_ALT_EXTENDED_CLASS|PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT| \
|
||||
PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
|
||||
PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED| \
|
||||
PCRE2_EXTENDED_MORE|PCRE2_FIRSTLINE| \
|
||||
PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
|
||||
PCRE2_NO_AUTO_CAPTURE| \
|
||||
PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
|
||||
PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
|
||||
PCRE2_UTF)
|
||||
|
||||
#define ALLOWED_MATCH_OPTIONS \
|
||||
(PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
|
||||
PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
|
||||
PCRE2_PARTIAL_SOFT)
|
||||
|
||||
#define BASE_MATCH_OPTIONS \
|
||||
(PCRE2_NO_JIT|PCRE2_DISABLE_RECURSELOOP_CHECK)
|
||||
|
||||
|
||||
#if defined(SUPPORT_DIFF_FUZZ) || defined(STANDALONE)
|
||||
static void print_compile_options(FILE *stream, uint32_t compile_options)
|
||||
{
|
||||
fprintf(stream, "Compile options %s%.8x =",
|
||||
(compile_options == PCRE2_NEVER_BACKSLASH_C)? "(base) " : "",
|
||||
compile_options);
|
||||
|
||||
fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
|
||||
((compile_options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
|
||||
((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
|
||||
((compile_options & PCRE2_ALT_EXTENDED_CLASS) != 0)? "alt_extended_class" : "",
|
||||
((compile_options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
|
||||
((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
|
||||
((compile_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
|
||||
((compile_options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
|
||||
((compile_options & PCRE2_CASELESS) != 0)? " caseless" : "",
|
||||
((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
|
||||
((compile_options & PCRE2_DOTALL) != 0)? " dotall" : "",
|
||||
((compile_options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
|
||||
((compile_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
|
||||
((compile_options & PCRE2_EXTENDED) != 0)? " extended" : "",
|
||||
((compile_options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
|
||||
((compile_options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
|
||||
((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
|
||||
((compile_options & PCRE2_MULTILINE) != 0)? " multiline" : "",
|
||||
((compile_options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
|
||||
((compile_options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
|
||||
((compile_options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
|
||||
((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
|
||||
((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
|
||||
((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
|
||||
((compile_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
|
||||
((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
|
||||
((compile_options & PCRE2_UCP) != 0)? " ucp" : "",
|
||||
((compile_options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
|
||||
((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
|
||||
((compile_options & PCRE2_UTF) != 0)? " utf" : "");
|
||||
}
|
||||
|
||||
static void print_match_options(FILE *stream, uint32_t match_options)
|
||||
{
|
||||
fprintf(stream, "Match options %s%.8x =",
|
||||
(match_options == BASE_MATCH_OPTIONS)? "(base) " : "", match_options);
|
||||
|
||||
fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s\n",
|
||||
((match_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
|
||||
((match_options & PCRE2_DISABLE_RECURSELOOP_CHECK) != 0)? " disable_recurseloop_check" : "",
|
||||
((match_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
|
||||
((match_options & PCRE2_NO_JIT) != 0)? " no_jit" : "",
|
||||
((match_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
|
||||
((match_options & PCRE2_NOTBOL) != 0)? " notbol" : "",
|
||||
((match_options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
|
||||
((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
|
||||
((match_options & PCRE2_NOTEOL) != 0)? " noteol" : "",
|
||||
((match_options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
|
||||
((match_options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
|
||||
}
|
||||
|
||||
|
||||
/* This function can print an error message at all code unit widths. */
|
||||
|
||||
static void print_error(FILE *f, int errorcode, const char *text, ...)
|
||||
{
|
||||
PCRE2_UCHAR buffer[256];
|
||||
PCRE2_UCHAR *p = buffer;
|
||||
va_list ap;
|
||||
va_start(ap, text);
|
||||
vfprintf(f, text, ap);
|
||||
va_end(ap);
|
||||
pcre2_get_error_message(errorcode, buffer, 256);
|
||||
while (*p != 0) fprintf(f, "%c", *p++);
|
||||
printf("\n");
|
||||
}
|
||||
#endif /* defined(SUPPORT_DIFF_FUZZ || defined(STANDALONE) */
|
||||
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
#ifdef SUPPORT_DIFF_FUZZ
|
||||
static void dump_matches(FILE *stream, int count, pcre2_match_data *match_data)
|
||||
{
|
||||
int errorcode;
|
||||
|
||||
for (int index = 0; index < count; index++)
|
||||
{
|
||||
PCRE2_UCHAR *bufferptr = NULL;
|
||||
PCRE2_SIZE bufflen = 0;
|
||||
|
||||
errorcode = pcre2_substring_get_bynumber(match_data, index, &bufferptr,
|
||||
&bufflen);
|
||||
|
||||
if (errorcode >= 0)
|
||||
{
|
||||
fprintf(stream, "Match %d (hex encoded): ", index);
|
||||
for (PCRE2_SIZE i = 0; i < bufflen; i++)
|
||||
{
|
||||
fprintf(stream, "%02x", bufferptr[i]);
|
||||
}
|
||||
fprintf(stream, "\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
print_error(stream, errorcode, "Match %d failed: ", index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* This function describes the current test case being evaluated, then aborts */
|
||||
|
||||
static void describe_failure(
|
||||
const char *task,
|
||||
const PCRE2_UCHAR *data,
|
||||
PCRE2_SIZE size,
|
||||
uint32_t compile_options,
|
||||
uint32_t match_options,
|
||||
int errorcode,
|
||||
int errorcode_jit,
|
||||
int matches,
|
||||
int matches_jit,
|
||||
pcre2_match_data *match_data,
|
||||
pcre2_match_data *match_data_jit
|
||||
) {
|
||||
|
||||
fprintf(stderr, "Encountered failure while performing %s; context:\n", task);
|
||||
|
||||
fprintf(stderr, "Pattern/sample string (hex encoded): ");
|
||||
for (size_t i = 0; i < size; i++)
|
||||
{
|
||||
fprintf(stderr, "%02x", data[i]);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
print_compile_options(stderr, compile_options);
|
||||
print_match_options(stderr, match_options);
|
||||
|
||||
if (errorcode < 0)
|
||||
{
|
||||
print_error(stderr, errorcode, "Non-JIT'd operation emitted an error: ");
|
||||
}
|
||||
|
||||
if (matches >= 0)
|
||||
{
|
||||
fprintf(stderr, "Non-JIT'd operation did not emit an error.\n");
|
||||
if (match_data != NULL)
|
||||
{
|
||||
fprintf(stderr, "%d matches discovered by non-JIT'd regex:\n", matches);
|
||||
dump_matches(stderr, matches, match_data);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (errorcode_jit < 0)
|
||||
{
|
||||
print_error(stderr, errorcode_jit, "JIT'd operation emitted error %d:",
|
||||
errorcode_jit);
|
||||
}
|
||||
|
||||
if (matches_jit >= 0)
|
||||
{
|
||||
fprintf(stderr, "JIT'd operation did not emit an error.\n");
|
||||
if (match_data_jit != NULL)
|
||||
{
|
||||
fprintf(stderr, "%d matches discovered by JIT'd regex:\n", matches_jit);
|
||||
dump_matches(stderr, matches_jit, match_data_jit);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
abort();
|
||||
}
|
||||
#endif /* SUPPORT_DIFF_FUZZ */
|
||||
#endif /* SUPPORT_JIT */
|
||||
|
||||
/* This is the callout function. Its only purpose is to halt matching if there
|
||||
are more than 100 callouts, as one way of stopping too much time being spent on
|
||||
fruitless matches. The callout data is a pointer to the counter. */
|
||||
|
||||
static int callout_function(pcre2_callout_block *cb, void *callout_data)
|
||||
{
|
||||
(void)cb; /* Avoid unused parameter warning */
|
||||
*((uint32_t *)callout_data) += 1;
|
||||
return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0;
|
||||
}
|
||||
|
||||
/* Putting in this apparently unnecessary prototype prevents gcc from giving a
|
||||
"no previous prototype" warning when compiling at high warning level. */
|
||||
|
||||
int LLVMFuzzerInitialize(int *, char ***);
|
||||
|
||||
int LLVMFuzzerTestOneInput(unsigned char *, size_t);
|
||||
|
||||
int LLVMFuzzerInitialize(int *argc, char ***argv)
|
||||
{
|
||||
int rc;
|
||||
struct rlimit rlim;
|
||||
getrlimit(RLIMIT_STACK, &rlim);
|
||||
rlim.rlim_cur = STACK_SIZE_MB * 1024 * 1024;
|
||||
if (rlim.rlim_cur > rlim.rlim_max)
|
||||
{
|
||||
fprintf(stderr, "Hard stack size limit is too small\n");
|
||||
_exit(1);
|
||||
}
|
||||
rc = setrlimit(RLIMIT_STACK, &rlim);
|
||||
if (rc != 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to expand stack size\n");
|
||||
_exit(1);
|
||||
}
|
||||
|
||||
(void)argc; /* Avoid "unused parameter" warnings */
|
||||
(void)argv;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Here's the driving function. */
|
||||
|
||||
int LLVMFuzzerTestOneInput(unsigned char *data, size_t size)
|
||||
{
|
||||
PCRE2_UCHAR *wdata;
|
||||
PCRE2_UCHAR *newwdata = NULL;
|
||||
uint32_t compile_options;
|
||||
uint32_t match_options;
|
||||
uint64_t random_options;
|
||||
pcre2_match_data *match_data = NULL;
|
||||
#ifdef SUPPORT_JIT
|
||||
pcre2_match_data *match_data_jit = NULL;
|
||||
#endif
|
||||
pcre2_compile_context *compile_context = NULL;
|
||||
pcre2_match_context *match_context = NULL;
|
||||
size_t match_size;
|
||||
int dfa_workspace[DFA_WORKSPACE_COUNT];
|
||||
|
||||
if (size < sizeof(random_options)) return -1;
|
||||
|
||||
random_options = *(uint64_t *)(data);
|
||||
data += sizeof(random_options);
|
||||
wdata = (PCRE2_UCHAR *)data;
|
||||
size -= sizeof(random_options);
|
||||
size /= PCRE2_CODE_UNIT_WIDTH / 8;
|
||||
|
||||
/* PCRE2 compiles quantified groups by replicating them. In certain cases of
|
||||
very large quantifiers this can lead to unacceptably long JIT compile times. To
|
||||
get around this, we scan the data string for large quantifiers that follow a
|
||||
closing parenthesis, and reduce the value of the quantifier to 10, assuming
|
||||
that this will make minimal difference to the detection of bugs.
|
||||
|
||||
Do the same for quantifiers that follow a closing square bracket, because
|
||||
classes that contain a number of non-ascii characters can take a lot of time
|
||||
when matching.
|
||||
|
||||
We have to make a copy of the input because oss-fuzz complains if we overwrite
|
||||
the original. Start the scan at the second character so there can be a
|
||||
lookbehind for a backslash, and end it before the end so that the next
|
||||
character can be checked for an opening brace. */
|
||||
|
||||
if (size > 3)
|
||||
{
|
||||
newwdata = malloc(size * sizeof(PCRE2_UCHAR));
|
||||
memcpy(newwdata, wdata, size * sizeof(PCRE2_UCHAR));
|
||||
wdata = newwdata;
|
||||
|
||||
for (size_t i = 1; i < size - 2; i++)
|
||||
{
|
||||
size_t j;
|
||||
|
||||
if ((wdata[i] != ')' && wdata[i] != ']') || wdata[i-1] == '\\' ||
|
||||
wdata[i+1] != '{')
|
||||
continue;
|
||||
i++; /* Points to '{' */
|
||||
|
||||
/* Loop for two values in a quantifier. Offset i points to brace or comma
|
||||
at the start of the loop. */
|
||||
|
||||
for (int ii = 0; ii < 2; ii++)
|
||||
{
|
||||
int q = 0;
|
||||
|
||||
if (i >= size - 1) goto END_QSCAN; /* Can happen for , */
|
||||
|
||||
/* Ignore leading spaces. */
|
||||
|
||||
while (wdata[i+1] == ' ' || wdata[i+1] == '\t')
|
||||
{
|
||||
i++;
|
||||
if (i >= size - 1) goto END_QSCAN;
|
||||
}
|
||||
|
||||
/* Ignore non-significant leading zeros. */
|
||||
|
||||
while (wdata[i+1] == '0' && i+2 < size && wdata[i+2] >= '0' &&
|
||||
wdata[i+2] <= '9')
|
||||
{
|
||||
i++;
|
||||
if (i >= size - 1) goto END_QSCAN;
|
||||
}
|
||||
|
||||
/* Scan for a number ending in brace, or comma in the first iteration,
|
||||
optionally preceded by space. */
|
||||
|
||||
for (j = i + 1; j < size && j < i + 7; j++)
|
||||
{
|
||||
if (wdata[j] == ' ' || wdata[j] == '\t')
|
||||
{
|
||||
j++;
|
||||
while (j < size && (wdata[j] == ' ' || wdata[j] == '\t')) j++;
|
||||
if (j >= size) goto OUTERLOOP;
|
||||
if (wdata[j] != '}' && wdata[j] != ',') goto OUTERLOOP;
|
||||
}
|
||||
if (wdata[j] == '}' || (ii == 0 && wdata[j] == ',')) break;
|
||||
|
||||
if (wdata[j] < '0' || wdata[j] > '9')
|
||||
{
|
||||
j--; /* Ensure this character is checked next. The */
|
||||
goto OUTERLOOP; /* string might be (e.g.) "){9){234}" */
|
||||
}
|
||||
q = q * 10 + (wdata[j] - '0');
|
||||
}
|
||||
|
||||
if (j >= size) goto END_QSCAN; /* End of data */
|
||||
|
||||
/* Hit ',' or '}' or read 6 digits. Six digits is a number > 65536 which
|
||||
is the maximum quantifier. Leave such numbers alone. */
|
||||
|
||||
if (j >= i + 7 || q > 65535) goto OUTERLOOP;
|
||||
|
||||
/* Limit the quantifier size to 10 */
|
||||
|
||||
if (q > 10)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
printf("Reduced quantifier value %d to 10.\n", q);
|
||||
#endif
|
||||
for (size_t k = i + 1; k < j; k++) wdata[k] = '0';
|
||||
wdata[j - 2] = '1';
|
||||
}
|
||||
|
||||
/* Advance to end of number and break if reached closing brace (continue
|
||||
after comma, which is only valid in the first time round this loop). */
|
||||
|
||||
i = j;
|
||||
if (wdata[i] == '}') break;
|
||||
}
|
||||
|
||||
/* Continue along the data string */
|
||||
|
||||
OUTERLOOP:
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
END_QSCAN:
|
||||
|
||||
/* Limiting the length of the subject for matching stops fruitless searches
|
||||
in large trees taking too much time. */
|
||||
|
||||
match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size;
|
||||
|
||||
/* Create a compile context, and set a limit on the size of the compiled
|
||||
pattern. This stops the fuzzer using vast amounts of memory. */
|
||||
|
||||
compile_context = pcre2_compile_context_create(NULL);
|
||||
if (compile_context == NULL)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
fprintf(stderr, "** Failed to create compile context block\n");
|
||||
#endif
|
||||
abort();
|
||||
}
|
||||
pcre2_set_max_pattern_compiled_length(compile_context, 10*1024*1024);
|
||||
|
||||
/* Ensure that all undefined option bits are zero (waste of time trying them)
|
||||
and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the
|
||||
input is valid UTF. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is
|
||||
no reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set
|
||||
because \C in random patterns is highly likely to cause a crash. */
|
||||
|
||||
compile_options = ((random_options >> 32) & ALLOWED_COMPILE_OPTIONS) |
|
||||
PCRE2_NEVER_BACKSLASH_C;
|
||||
match_options = (((uint32_t)random_options) & ALLOWED_MATCH_OPTIONS) |
|
||||
BASE_MATCH_OPTIONS;
|
||||
|
||||
/* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not
|
||||
allowed together and just give an immediate error return. */
|
||||
|
||||
if (((compile_options|match_options) & PCRE2_ENDANCHORED) != 0)
|
||||
match_options &= ~(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT);
|
||||
|
||||
/* Do the compile with and without the options, and after a successful compile,
|
||||
likewise do the match with and without the options. */
|
||||
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
uint32_t callout_count;
|
||||
int errorcode;
|
||||
#ifdef SUPPORT_JIT
|
||||
int errorcode_jit;
|
||||
#ifdef SUPPORT_DIFF_FUZZ
|
||||
int matches = 0;
|
||||
int matches_jit = 0;
|
||||
#endif
|
||||
#endif
|
||||
PCRE2_SIZE erroroffset;
|
||||
pcre2_code *code;
|
||||
|
||||
#ifdef STANDALONE
|
||||
printf("\n");
|
||||
print_compile_options(stdout, compile_options);
|
||||
#endif
|
||||
|
||||
code = pcre2_compile((PCRE2_SPTR)wdata, (PCRE2_SIZE)size, compile_options,
|
||||
&errorcode, &erroroffset, compile_context);
|
||||
|
||||
/* Compilation succeeded */
|
||||
|
||||
if (code != NULL)
|
||||
{
|
||||
int j;
|
||||
uint32_t save_match_options = match_options;
|
||||
|
||||
/* Call JIT compile only if the compiled pattern is not too big. */
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
int jit_ret = -1;
|
||||
if (((struct pcre2_real_code *)code)->blocksize <= JIT_SIZE_LIMIT)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
printf("Compile succeeded; calling JIT compile\n");
|
||||
#endif
|
||||
jit_ret = pcre2_jit_compile(code, PCRE2_JIT_COMPLETE);
|
||||
#ifdef STANDALONE
|
||||
if (jit_ret < 0) printf("JIT compile error %d\n", jit_ret);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
printf("Not calling JIT: compiled pattern is too long "
|
||||
"(%ld bytes; limit=%d)\n",
|
||||
((struct pcre2_real_code *)code)->blocksize, JIT_SIZE_LIMIT);
|
||||
#endif
|
||||
}
|
||||
#endif /* SUPPORT_JIT */
|
||||
|
||||
/* Create match data and context blocks only when we first need them. Set
|
||||
low match and depth limits to avoid wasting too much searching large
|
||||
pattern trees. Almost all matches are going to fail. */
|
||||
|
||||
if (match_data == NULL)
|
||||
{
|
||||
match_data = pcre2_match_data_create(32, NULL);
|
||||
#ifdef SUPPORT_JIT
|
||||
match_data_jit = pcre2_match_data_create(32, NULL);
|
||||
if (match_data == NULL || match_data_jit == NULL)
|
||||
#else
|
||||
if (match_data == NULL)
|
||||
#endif
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
fprintf(stderr, "** Failed to create match data block\n");
|
||||
#endif
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
if (match_context == NULL)
|
||||
{
|
||||
match_context = pcre2_match_context_create(NULL);
|
||||
if (match_context == NULL)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
fprintf(stderr, "** Failed to create match context block\n");
|
||||
#endif
|
||||
abort();
|
||||
}
|
||||
(void)pcre2_set_match_limit(match_context, 100);
|
||||
(void)pcre2_set_depth_limit(match_context, 100);
|
||||
(void)pcre2_set_callout(match_context, callout_function, &callout_count);
|
||||
}
|
||||
|
||||
/* Match twice, with and without options. */
|
||||
|
||||
#ifdef STANDALONE
|
||||
printf("\n");
|
||||
#endif
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
print_match_options(stdout, match_options);
|
||||
#endif
|
||||
|
||||
callout_count = 0;
|
||||
errorcode = pcre2_match(code, (PCRE2_SPTR)wdata, (PCRE2_SIZE)match_size, 0,
|
||||
match_options, match_data, match_context);
|
||||
|
||||
#ifdef STANDALONE
|
||||
if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
|
||||
print_error(stdout, errorcode, "Match failed: error %d: ", errorcode);
|
||||
#endif
|
||||
|
||||
/* If JIT is enabled, do a JIT match and, if appropriately compiled, compare
|
||||
with the interpreter. */
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
if (jit_ret >= 0)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
printf("Matching with JIT\n");
|
||||
#endif
|
||||
callout_count = 0;
|
||||
errorcode_jit = pcre2_match(code, (PCRE2_SPTR)wdata, (PCRE2_SIZE)match_size, 0,
|
||||
match_options & ~PCRE2_NO_JIT, match_data_jit, match_context);
|
||||
|
||||
#ifdef STANDALONE
|
||||
if (errorcode_jit >= 0)
|
||||
printf("Match returned %d\n", errorcode_jit);
|
||||
else
|
||||
print_error(stdout, errorcode_jit, "JIT match failed: error %d: ",
|
||||
errorcode_jit);
|
||||
#else
|
||||
(void)errorcode_jit; /* Avoid compiler warning */
|
||||
#endif /* STANDALONE */
|
||||
|
||||
/* With differential matching enabled, compare with interpreter. */
|
||||
|
||||
#ifdef SUPPORT_DIFF_FUZZ
|
||||
matches = errorcode;
|
||||
matches_jit = errorcode_jit;
|
||||
|
||||
if (errorcode_jit != errorcode)
|
||||
{
|
||||
if (!(errorcode < 0 && errorcode_jit < 0) &&
|
||||
errorcode != PCRE2_ERROR_MATCHLIMIT && errorcode != PCRE2_ERROR_CALLOUT &&
|
||||
errorcode_jit != PCRE2_ERROR_MATCHLIMIT && errorcode_jit != PCRE2_ERROR_JIT_STACKLIMIT && errorcode_jit != PCRE2_ERROR_CALLOUT)
|
||||
{
|
||||
describe_failure("match errorcode comparison", wdata, size, compile_options, match_options, errorcode, errorcode_jit, matches, matches_jit, match_data, match_data_jit);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int index = 0; index < errorcode; index++)
|
||||
{
|
||||
PCRE2_UCHAR *bufferptr, *bufferptr_jit;
|
||||
PCRE2_SIZE bufflen, bufflen_jit;
|
||||
|
||||
bufferptr = bufferptr_jit = NULL;
|
||||
bufflen = bufflen_jit = 0;
|
||||
|
||||
errorcode = pcre2_substring_get_bynumber(match_data, (uint32_t) index, &bufferptr, &bufflen);
|
||||
errorcode_jit = pcre2_substring_get_bynumber(match_data_jit, (uint32_t) index, &bufferptr_jit, &bufflen_jit);
|
||||
|
||||
if (errorcode != errorcode_jit)
|
||||
{
|
||||
describe_failure("match entry errorcode comparison", wdata, size,
|
||||
compile_options, match_options, errorcode, errorcode_jit,
|
||||
matches, matches_jit, match_data, match_data_jit);
|
||||
}
|
||||
|
||||
if (errorcode >= 0)
|
||||
{
|
||||
if (bufflen != bufflen_jit)
|
||||
{
|
||||
describe_failure("match entry length comparison", wdata, size,
|
||||
compile_options, match_options, errorcode, errorcode_jit,
|
||||
matches, matches_jit, match_data, match_data_jit);
|
||||
}
|
||||
|
||||
if (memcmp(bufferptr, bufferptr_jit, bufflen) != 0)
|
||||
{
|
||||
describe_failure("match entry content comparison", wdata, size,
|
||||
compile_options, match_options, errorcode, errorcode_jit,
|
||||
matches, matches_jit, match_data, match_data_jit);
|
||||
}
|
||||
}
|
||||
|
||||
pcre2_substring_free(bufferptr);
|
||||
pcre2_substring_free(bufferptr_jit);
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_DIFF_FUZZ */
|
||||
}
|
||||
#endif /* SUPPORT_JIT */
|
||||
|
||||
if (match_options == BASE_MATCH_OPTIONS) break; /* Don't do same twice */
|
||||
match_options = BASE_MATCH_OPTIONS; /* For second time */
|
||||
}
|
||||
|
||||
/* Match with DFA twice, with and without options, but remove options that
|
||||
are not allowed with DFA. */
|
||||
|
||||
match_options = save_match_options & ~BASE_MATCH_OPTIONS;
|
||||
|
||||
#ifdef STANDALONE
|
||||
printf("\n");
|
||||
#endif
|
||||
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
printf("DFA match options %.8x =", match_options);
|
||||
printf("%s%s%s%s%s%s%s%s%s\n",
|
||||
((match_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
|
||||
((match_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
|
||||
((match_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
|
||||
((match_options & PCRE2_NOTBOL) != 0)? " notbol" : "",
|
||||
((match_options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
|
||||
((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
|
||||
((match_options & PCRE2_NOTEOL) != 0)? " noteol" : "",
|
||||
((match_options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
|
||||
((match_options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
|
||||
#endif
|
||||
|
||||
callout_count = 0;
|
||||
errorcode = pcre2_dfa_match(code, (PCRE2_SPTR)wdata,
|
||||
(PCRE2_SIZE)match_size, 0, match_options, match_data,
|
||||
match_context, dfa_workspace, DFA_WORKSPACE_COUNT);
|
||||
|
||||
#ifdef STANDALONE
|
||||
if (errorcode >= 0)
|
||||
printf("Match returned %d\n", errorcode);
|
||||
else
|
||||
print_error(stdout, errorcode, "DFA match failed: error %d: ", errorcode);
|
||||
#endif
|
||||
|
||||
if (match_options == 0) break; /* No point doing same twice */
|
||||
match_options = 0; /* For second time */
|
||||
}
|
||||
|
||||
match_options = save_match_options; /* Reset for the second compile */
|
||||
pcre2_code_free(code);
|
||||
}
|
||||
|
||||
/* Compilation failed */
|
||||
|
||||
else
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
print_error(stdout, errorcode, "Error %d at offset %lu: ", errorcode,
|
||||
erroroffset);
|
||||
#else
|
||||
if (errorcode == PCRE2_ERROR_INTERNAL) abort();
|
||||
#endif
|
||||
}
|
||||
|
||||
if (compile_options == PCRE2_NEVER_BACKSLASH_C) break; /* Avoid same twice */
|
||||
compile_options = PCRE2_NEVER_BACKSLASH_C; /* For second time */
|
||||
}
|
||||
|
||||
/* Tidy up before exiting */
|
||||
|
||||
if (match_data != NULL) pcre2_match_data_free(match_data);
|
||||
#ifdef SUPPORT_JIT
|
||||
if (match_data_jit != NULL) pcre2_match_data_free(match_data_jit);
|
||||
#endif
|
||||
free(newwdata);
|
||||
if (match_context != NULL) pcre2_match_context_free(match_context);
|
||||
if (compile_context != NULL) pcre2_compile_context_free(compile_context);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Optional main program. */
|
||||
|
||||
#ifdef STANDALONE
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
LLVMFuzzerInitialize(&argc, &argv);
|
||||
|
||||
if (argc < 2)
|
||||
{
|
||||
printf("** No arguments given\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (int i = 1; i < argc; i++)
|
||||
{
|
||||
size_t filelen;
|
||||
size_t readsize;
|
||||
unsigned char *buffer;
|
||||
FILE *f;
|
||||
|
||||
/* Handle a literal string. Copy to an exact size buffer so that checks for
|
||||
overrunning work. */
|
||||
|
||||
if (argv[i][0] == '=')
|
||||
{
|
||||
readsize = strlen(argv[i]) - 1;
|
||||
printf("------ <Literal> ------\n");
|
||||
printf("Length = %lu\n", readsize);
|
||||
printf("%.*s\n", (int)readsize, argv[i]+1);
|
||||
buffer = (unsigned char *)malloc(readsize);
|
||||
if (buffer == NULL)
|
||||
printf("** Failed to allocate %lu bytes of memory\n", readsize);
|
||||
else
|
||||
{
|
||||
memcpy(buffer, argv[i]+1, readsize);
|
||||
LLVMFuzzerTestOneInput(buffer, readsize);
|
||||
free(buffer);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle a string given in a file */
|
||||
|
||||
f = fopen(argv[i], "rb");
|
||||
if (f == NULL)
|
||||
{
|
||||
printf("** Failed to open %s: %s\n", argv[i], strerror(errno));
|
||||
continue;
|
||||
}
|
||||
|
||||
printf("------ %s ------\n", argv[i]);
|
||||
|
||||
fseek(f, 0, SEEK_END);
|
||||
filelen = ftell(f);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
|
||||
buffer = (unsigned char *)malloc(filelen);
|
||||
if (buffer == NULL)
|
||||
{
|
||||
printf("** Failed to allocate %lu bytes of memory\n", filelen);
|
||||
fclose(f);
|
||||
continue;
|
||||
}
|
||||
|
||||
readsize = fread(buffer, 1, filelen, f);
|
||||
fclose(f);
|
||||
|
||||
if (readsize != filelen)
|
||||
printf("** File size is %lu but fread() returned %lu\n", filelen, readsize);
|
||||
else
|
||||
{
|
||||
printf("Length = %lu\n", filelen);
|
||||
LLVMFuzzerTestOneInput(buffer, filelen);
|
||||
}
|
||||
free(buffer);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* STANDALONE */
|
||||
|
||||
/* End */
|
||||
@@ -1,2235 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE2 is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef PCRE2_INTERNAL_H_IDEMPOTENT_GUARD
|
||||
#define PCRE2_INTERNAL_H_IDEMPOTENT_GUARD
|
||||
|
||||
/* We do not support both EBCDIC and Unicode at the same time. The "configure"
|
||||
script prevents both being selected, but not everybody uses "configure". EBCDIC
|
||||
is only supported for the 8-bit library, but the check for this has to be later
|
||||
in this file, because the first part is not width-dependent, and is included by
|
||||
pcre2test.c with CODE_UNIT_WIDTH == 0. */
|
||||
|
||||
#if defined EBCDIC && defined SUPPORT_UNICODE
|
||||
#error The use of both EBCDIC and SUPPORT_UNICODE is not supported.
|
||||
#endif
|
||||
|
||||
/* When compiling one of the libraries, the value of PCRE2_CODE_UNIT_WIDTH must
|
||||
be 8, 16, or 32. AutoTools and CMake ensure that this is always the case, but
|
||||
other other building methods may not, so here is a check. It is cut out when
|
||||
building pcre2test, bcause that sets the value to zero. No other source should
|
||||
be including this file. There is no explicit way of forcing a compile to be
|
||||
abandoned, but trying to include a non-existent file seems cleanest. Otherwise
|
||||
there will be many irrelevant consequential errors. */
|
||||
|
||||
#if (!defined PCRE2_BUILDING_PCRE2TEST && !defined PCRE2_DFTABLES) && \
|
||||
(!defined PCRE2_CODE_UNIT_WIDTH || \
|
||||
(PCRE2_CODE_UNIT_WIDTH != 8 && \
|
||||
PCRE2_CODE_UNIT_WIDTH != 16 && \
|
||||
PCRE2_CODE_UNIT_WIDTH != 32))
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be defined as 8, 16, or 32.
|
||||
#include <AbandonCompile>
|
||||
#endif
|
||||
|
||||
|
||||
/* Standard C headers */
|
||||
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/* Macros to make boolean values more obvious. The #ifndef is to pacify
|
||||
compiler warnings in environments where these macros are defined elsewhere.
|
||||
Unfortunately, there is no way to do the same for the typedef. */
|
||||
|
||||
typedef int BOOL;
|
||||
#ifndef FALSE
|
||||
#define FALSE 0
|
||||
#define TRUE 1
|
||||
#endif
|
||||
|
||||
/* Helper macro for static (compile-time) assertions. Can be used inside
|
||||
functions, or at the top-level of a file. */
|
||||
#define STATIC_ASSERT_JOIN(a,b) a ## b
|
||||
#define STATIC_ASSERT(cond, msg) \
|
||||
typedef int STATIC_ASSERT_JOIN(static_assertion_,msg)[(cond)?1:-1]
|
||||
|
||||
/* Valgrind (memcheck) support */
|
||||
|
||||
#ifdef SUPPORT_VALGRIND
|
||||
#include <valgrind/memcheck.h>
|
||||
#endif
|
||||
|
||||
/* -ftrivial-auto-var-init support supports initializing all local variables
|
||||
to avoid some classes of bug, but this can cause an unacceptable slowdown
|
||||
for large on-stack arrays in hot functions. This macro lets us annotate
|
||||
such arrays. */
|
||||
|
||||
#ifdef HAVE_ATTRIBUTE_UNINITIALIZED
|
||||
#define PCRE2_KEEP_UNINITIALIZED __attribute__((uninitialized))
|
||||
#else
|
||||
#define PCRE2_KEEP_UNINITIALIZED
|
||||
#endif
|
||||
|
||||
/* Older versions of MSVC lack snprintf(). This define allows for
|
||||
warning/error-free compilation and testing with MSVC compilers back to at least
|
||||
MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
|
||||
/* When compiling a DLL for Windows, the exported symbols have to be declared
|
||||
using some MS magic. I found some useful information on this web page:
|
||||
http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the
|
||||
information there, using __declspec(dllexport) without "extern" we have a
|
||||
definition; with "extern" we have a declaration. The settings here override the
|
||||
setting in pcre2.h (which is included below); it defines only PCRE2_EXP_DECL,
|
||||
which is all that is needed for applications (they just import the symbols). We
|
||||
use:
|
||||
|
||||
PCRE2_EXP_DECL for declarations
|
||||
PCRE2_EXP_DEFN for definitions
|
||||
|
||||
The reason for wrapping this in #ifndef PCRE2_EXP_DECL is so that pcre2test,
|
||||
which is an application, but needs to import this file in order to "peek" at
|
||||
internals, can #include pcre2.h first to get an application's-eye view.
|
||||
|
||||
In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon,
|
||||
special-purpose environments) might want to stick other stuff in front of
|
||||
exported symbols. That's why, in the non-Windows case, we set PCRE2_EXP_DEFN
|
||||
only if it is not already set. */
|
||||
|
||||
#ifndef PCRE2_EXP_DECL
|
||||
# ifdef _WIN32
|
||||
# ifndef PCRE2_STATIC
|
||||
# define PCRE2_EXP_DECL extern __declspec(dllexport)
|
||||
# define PCRE2_EXP_DEFN __declspec(dllexport)
|
||||
# else
|
||||
# define PCRE2_EXP_DECL extern PCRE2_EXPORT
|
||||
# define PCRE2_EXP_DEFN
|
||||
# endif
|
||||
# else
|
||||
# ifdef __cplusplus
|
||||
# define PCRE2_EXP_DECL extern "C" PCRE2_EXPORT
|
||||
# else
|
||||
# define PCRE2_EXP_DECL extern PCRE2_EXPORT
|
||||
# endif
|
||||
# ifndef PCRE2_EXP_DEFN
|
||||
# define PCRE2_EXP_DEFN PCRE2_EXP_DECL
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Include the public PCRE2 header and the definitions of UCP character
|
||||
property values. This must follow the setting of PCRE2_EXP_DECL above. */
|
||||
|
||||
#include "pcre2.h"
|
||||
#include "pcre2_ucp.h"
|
||||
|
||||
/* When PCRE2 is compiled as a C++ library, the subject pointer can be replaced
|
||||
with a custom type. This makes it possible, for example, to allow pcre2_match()
|
||||
to process subject strings that are discontinuous by using a smart pointer
|
||||
class. It must always be possible to inspect all of the subject string in
|
||||
pcre2_match() because of the way it backtracks. */
|
||||
|
||||
/* WARNING: This is as yet untested for PCRE2. */
|
||||
|
||||
#ifdef CUSTOM_SUBJECT_PTR
|
||||
#undef PCRE2_SPTR
|
||||
#define PCRE2_SPTR CUSTOM_SUBJECT_PTR
|
||||
#endif
|
||||
|
||||
/* When checking for integer overflow, we need to handle large integers.
|
||||
If a 64-bit integer type is available, we can use that.
|
||||
Otherwise we have to cast to double, which of course requires floating point
|
||||
arithmetic. Handle this by defining a macro for the appropriate type. */
|
||||
|
||||
#if defined INT64_MAX || defined int64_t
|
||||
#define INT64_OR_DOUBLE int64_t
|
||||
#else
|
||||
#define INT64_OR_DOUBLE double
|
||||
#endif
|
||||
|
||||
/* External (in the C sense) functions and tables that are private to the
|
||||
libraries are always referenced using the PRIV macro. This makes it possible
|
||||
for pcre2test.c to include some of the source files from the libraries using a
|
||||
different PRIV definition to avoid name clashes. It also makes it clear in the
|
||||
code that a non-static object is being referenced. */
|
||||
|
||||
#ifndef PRIV
|
||||
#define PRIV(name) _pcre2_##name
|
||||
#endif
|
||||
|
||||
/* When compiling for use with the Virtual Pascal compiler, these functions
|
||||
need to have their names changed. PCRE2 must be compiled with the -DVPCOMPAT
|
||||
option on the command line. */
|
||||
|
||||
#ifdef VPCOMPAT
|
||||
#define strlen(s) _strlen(s)
|
||||
#define strncmp(s1,s2,m) _strncmp(s1,s2,m)
|
||||
#define memcmp(s,c,n) _memcmp(s,c,n)
|
||||
#define memcpy(d,s,n) _memcpy(d,s,n)
|
||||
#define memmove(d,s,n) _memmove(d,s,n)
|
||||
#define memset(s,c,n) _memset(s,c,n)
|
||||
#else /* VPCOMPAT */
|
||||
|
||||
/* Otherwise, to cope with SunOS4 and other systems that lack memmove(), define
|
||||
a macro that calls an emulating function. */
|
||||
|
||||
#ifndef HAVE_MEMMOVE
|
||||
#undef memmove /* Some systems may have a macro */
|
||||
#define memmove(a, b, c) PRIV(memmove)(a, b, c)
|
||||
#endif /* not HAVE_MEMMOVE */
|
||||
#endif /* not VPCOMPAT */
|
||||
|
||||
/* This is an unsigned int value that no UTF character can ever have, as
|
||||
Unicode doesn't go beyond 0x0010ffff. */
|
||||
|
||||
#define NOTACHAR 0xffffffff
|
||||
|
||||
/* This is the largest valid UTF/Unicode code point. */
|
||||
|
||||
#define MAX_UTF_CODE_POINT 0x10ffff
|
||||
|
||||
/* Compile-time positive error numbers (all except UTF errors, which are
|
||||
negative) start at this value. It should probably never be changed, in case
|
||||
some application is checking for specific numbers. There is a copy of this
|
||||
#define in pcre2posix.c (which now no longer includes this file). Ideally, a
|
||||
way of having a single definition should be found, but as the number is
|
||||
unlikely to change, this is not a pressing issue. The original reason for
|
||||
having a base other than 0 was to keep the absolute values of compile-time and
|
||||
run-time error numbers numerically different, but in the event the code does
|
||||
not rely on this. */
|
||||
|
||||
#define COMPILE_ERROR_BASE 100
|
||||
|
||||
/* The initial frames vector for remembering pcre2_match() backtracking points
|
||||
is allocated on the heap, of this size (bytes) or ten times the frame size if
|
||||
larger, unless the heap limit is smaller. Typical frame sizes are a few hundred
|
||||
bytes (it depends on the number of capturing parentheses) so 20KiB handles
|
||||
quite a few frames. A larger vector on the heap is obtained for matches that
|
||||
need more frames, subject to the heap limit. */
|
||||
|
||||
#define START_FRAMES_SIZE 20480
|
||||
|
||||
/* For DFA matching, an initial internal workspace vector is allocated on the
|
||||
stack. The heap is used only if this turns out to be too small. */
|
||||
|
||||
#define DFA_START_RWS_SIZE 30720
|
||||
|
||||
/* Define the default BSR convention. */
|
||||
|
||||
#ifdef BSR_ANYCRLF
|
||||
#define BSR_DEFAULT PCRE2_BSR_ANYCRLF
|
||||
#else
|
||||
#define BSR_DEFAULT PCRE2_BSR_UNICODE
|
||||
#endif
|
||||
|
||||
|
||||
/* ---------------- Basic UTF-8 macros ---------------- */
|
||||
|
||||
/* These UTF-8 macros are always defined because they are used in pcre2test for
|
||||
handling wide characters in 16-bit and 32-bit modes, even if an 8-bit library
|
||||
is not supported. */
|
||||
|
||||
/* Tests whether a UTF-8 code point needs extra bytes to decode. */
|
||||
|
||||
#define HASUTF8EXTRALEN(c) ((c) >= 0xc0)
|
||||
|
||||
/* The following macros were originally written in the form of loops that used
|
||||
data from the tables whose names start with PRIV(utf8_table). They were
|
||||
rewritten by a user so as not to use loops, because in some environments this
|
||||
gives a significant performance advantage, and it seems never to do any harm.
|
||||
*/
|
||||
|
||||
/* Base macro to pick up the remaining bytes of a UTF-8 character, not
|
||||
advancing the pointer. */
|
||||
|
||||
#define GETUTF8(c, eptr) \
|
||||
{ \
|
||||
if ((c & 0x20u) == 0) \
|
||||
c = ((c & 0x1fu) << 6) | (eptr[1] & 0x3fu); \
|
||||
else if ((c & 0x10u) == 0) \
|
||||
c = ((c & 0x0fu) << 12) | ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
|
||||
else if ((c & 0x08u) == 0) \
|
||||
c = ((c & 0x07u) << 18) | ((eptr[1] & 0x3fu) << 12) | \
|
||||
((eptr[2] & 0x3fu) << 6) | (eptr[3] & 0x3fu); \
|
||||
else if ((c & 0x04u) == 0) \
|
||||
c = ((c & 0x03u) << 24) | ((eptr[1] & 0x3fu) << 18) | \
|
||||
((eptr[2] & 0x3fu) << 12) | ((eptr[3] & 0x3fu) << 6) | \
|
||||
(eptr[4] & 0x3fu); \
|
||||
else \
|
||||
c = ((c & 0x01u) << 30) | ((eptr[1] & 0x3fu) << 24) | \
|
||||
((eptr[2] & 0x3fu) << 18) | ((eptr[3] & 0x3fu) << 12) | \
|
||||
((eptr[4] & 0x3fu) << 6) | (eptr[5] & 0x3fu); \
|
||||
}
|
||||
|
||||
/* Base macro to pick up the remaining bytes of a UTF-8 character, advancing
|
||||
the pointer. */
|
||||
|
||||
#define GETUTF8INC(c, eptr) \
|
||||
{ \
|
||||
if ((c & 0x20u) == 0) \
|
||||
c = ((c & 0x1fu) << 6) | (*eptr++ & 0x3fu); \
|
||||
else if ((c & 0x10u) == 0) \
|
||||
{ \
|
||||
c = ((c & 0x0fu) << 12) | ((*eptr & 0x3fu) << 6) | (eptr[1] & 0x3fu); \
|
||||
eptr += 2; \
|
||||
} \
|
||||
else if ((c & 0x08u) == 0) \
|
||||
{ \
|
||||
c = ((c & 0x07u) << 18) | ((*eptr & 0x3fu) << 12) | \
|
||||
((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
|
||||
eptr += 3; \
|
||||
} \
|
||||
else if ((c & 0x04u) == 0) \
|
||||
{ \
|
||||
c = ((c & 0x03u) << 24) | ((*eptr & 0x3fu) << 18) | \
|
||||
((eptr[1] & 0x3fu) << 12) | ((eptr[2] & 0x3fu) << 6) | \
|
||||
(eptr[3] & 0x3fu); \
|
||||
eptr += 4; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
c = ((c & 0x01u) << 30) | ((*eptr & 0x3fu) << 24) | \
|
||||
((eptr[1] & 0x3fu) << 18) | ((eptr[2] & 0x3fu) << 12) | \
|
||||
((eptr[3] & 0x3fu) << 6) | (eptr[4] & 0x3fu); \
|
||||
eptr += 5; \
|
||||
} \
|
||||
}
|
||||
|
||||
/* Base macro to pick up the remaining bytes of a UTF-8 character, not
|
||||
advancing the pointer, incrementing the length. */
|
||||
|
||||
#define GETUTF8LEN(c, eptr, len) \
|
||||
{ \
|
||||
if ((c & 0x20u) == 0) \
|
||||
{ \
|
||||
c = ((c & 0x1fu) << 6) | (eptr[1] & 0x3fu); \
|
||||
len++; \
|
||||
} \
|
||||
else if ((c & 0x10u) == 0) \
|
||||
{ \
|
||||
c = ((c & 0x0fu) << 12) | ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
|
||||
len += 2; \
|
||||
} \
|
||||
else if ((c & 0x08u) == 0) \
|
||||
{\
|
||||
c = ((c & 0x07u) << 18) | ((eptr[1] & 0x3fu) << 12) | \
|
||||
((eptr[2] & 0x3fu) << 6) | (eptr[3] & 0x3fu); \
|
||||
len += 3; \
|
||||
} \
|
||||
else if ((c & 0x04u) == 0) \
|
||||
{ \
|
||||
c = ((c & 0x03u) << 24) | ((eptr[1] & 0x3fu) << 18) | \
|
||||
((eptr[2] & 0x3fu) << 12) | ((eptr[3] & 0x3fu) << 6) | \
|
||||
(eptr[4] & 0x3fu); \
|
||||
len += 4; \
|
||||
} \
|
||||
else \
|
||||
{\
|
||||
c = ((c & 0x01u) << 30) | ((eptr[1] & 0x3fu) << 24) | \
|
||||
((eptr[2] & 0x3fu) << 18) | ((eptr[3] & 0x3fu) << 12) | \
|
||||
((eptr[4] & 0x3fu) << 6) | (eptr[5] & 0x3fu); \
|
||||
len += 5; \
|
||||
} \
|
||||
}
|
||||
|
||||
/* --------------- Whitespace macros ---------------- */
|
||||
|
||||
/* Tests for Unicode horizontal and vertical whitespace characters must check a
|
||||
number of different values. Using a switch statement for this generates the
|
||||
fastest code (no loop, no memory access), and there are several places in the
|
||||
interpreter code where this happens. In order to ensure that all the case lists
|
||||
remain in step, we use macros so that there is only one place where the lists
|
||||
are defined.
|
||||
|
||||
These values are also required as lists in pcre2_compile.c when processing \h,
|
||||
\H, \v and \V in a character class. The lists are defined in pcre2_tables.c,
|
||||
but macros that define the values are here so that all the definitions are
|
||||
together. The lists must be in ascending character order, terminated by
|
||||
NOTACHAR (which is 0xffffffff).
|
||||
|
||||
Any changes should ensure that the various macros are kept in step with each
|
||||
other. NOTE: The values also appear in pcre2_jit_compile.c. */
|
||||
|
||||
/* -------------- ASCII/Unicode environments -------------- */
|
||||
|
||||
#ifndef EBCDIC
|
||||
|
||||
/* Character U+180E (Mongolian Vowel Separator) is not included in the list of
|
||||
spaces in the Unicode file PropList.txt, and Perl does not recognize it as a
|
||||
space. However, in many other sources it is listed as a space and has been in
|
||||
PCRE (both APIs) for a long time. */
|
||||
|
||||
#define HSPACE_LIST \
|
||||
CHAR_HT, CHAR_SPACE, CHAR_NBSP, \
|
||||
0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \
|
||||
0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \
|
||||
NOTACHAR
|
||||
|
||||
#define HSPACE_MULTIBYTE_CASES \
|
||||
case 0x1680: /* OGHAM SPACE MARK */ \
|
||||
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ \
|
||||
case 0x2000: /* EN QUAD */ \
|
||||
case 0x2001: /* EM QUAD */ \
|
||||
case 0x2002: /* EN SPACE */ \
|
||||
case 0x2003: /* EM SPACE */ \
|
||||
case 0x2004: /* THREE-PER-EM SPACE */ \
|
||||
case 0x2005: /* FOUR-PER-EM SPACE */ \
|
||||
case 0x2006: /* SIX-PER-EM SPACE */ \
|
||||
case 0x2007: /* FIGURE SPACE */ \
|
||||
case 0x2008: /* PUNCTUATION SPACE */ \
|
||||
case 0x2009: /* THIN SPACE */ \
|
||||
case 0x200A: /* HAIR SPACE */ \
|
||||
case 0x202f: /* NARROW NO-BREAK SPACE */ \
|
||||
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ \
|
||||
case 0x3000 /* IDEOGRAPHIC SPACE */
|
||||
|
||||
#define HSPACE_BYTE_CASES \
|
||||
case CHAR_HT: \
|
||||
case CHAR_SPACE: \
|
||||
case CHAR_NBSP
|
||||
|
||||
#define HSPACE_CASES \
|
||||
HSPACE_BYTE_CASES: \
|
||||
HSPACE_MULTIBYTE_CASES
|
||||
|
||||
#define VSPACE_LIST \
|
||||
CHAR_LF, CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, 0x2028, 0x2029, NOTACHAR
|
||||
|
||||
#define VSPACE_MULTIBYTE_CASES \
|
||||
case 0x2028: /* LINE SEPARATOR */ \
|
||||
case 0x2029 /* PARAGRAPH SEPARATOR */
|
||||
|
||||
#define VSPACE_BYTE_CASES \
|
||||
case CHAR_LF: \
|
||||
case CHAR_VT: \
|
||||
case CHAR_FF: \
|
||||
case CHAR_CR: \
|
||||
case CHAR_NEL
|
||||
|
||||
#define VSPACE_CASES \
|
||||
VSPACE_BYTE_CASES: \
|
||||
VSPACE_MULTIBYTE_CASES
|
||||
|
||||
/* -------------- EBCDIC environments -------------- */
|
||||
|
||||
#else
|
||||
#define HSPACE_LIST CHAR_HT, CHAR_SPACE, CHAR_NBSP, NOTACHAR
|
||||
|
||||
#define HSPACE_BYTE_CASES \
|
||||
case CHAR_HT: \
|
||||
case CHAR_SPACE: \
|
||||
case CHAR_NBSP
|
||||
|
||||
#define HSPACE_CASES HSPACE_BYTE_CASES
|
||||
|
||||
#ifdef EBCDIC_NL25
|
||||
#define VSPACE_LIST \
|
||||
CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, CHAR_LF, NOTACHAR
|
||||
#else
|
||||
#define VSPACE_LIST \
|
||||
CHAR_VT, CHAR_FF, CHAR_CR, CHAR_LF, CHAR_NEL, NOTACHAR
|
||||
#endif
|
||||
|
||||
#define VSPACE_BYTE_CASES \
|
||||
case CHAR_LF: \
|
||||
case CHAR_VT: \
|
||||
case CHAR_FF: \
|
||||
case CHAR_CR: \
|
||||
case CHAR_NEL
|
||||
|
||||
#define VSPACE_CASES VSPACE_BYTE_CASES
|
||||
#endif /* EBCDIC */
|
||||
|
||||
/* -------------- End of whitespace macros -------------- */
|
||||
|
||||
|
||||
/* PCRE2 is able to support several different kinds of newline (CR, LF, CRLF,
|
||||
"any" and "anycrlf" at present). The following macros are used to package up
|
||||
testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various
|
||||
modules to indicate in which datablock the parameters exist, and what the
|
||||
start/end of string field names are. */
|
||||
|
||||
#define NLTYPE_FIXED 0 /* Newline is a fixed length string */
|
||||
#define NLTYPE_ANY 1 /* Newline is any Unicode line ending */
|
||||
#define NLTYPE_ANYCRLF 2 /* Newline is CR, LF, or CRLF */
|
||||
|
||||
/* This macro checks for a newline at the given position */
|
||||
|
||||
#define IS_NEWLINE(p) \
|
||||
((NLBLOCK->nltype != NLTYPE_FIXED)? \
|
||||
((p) < NLBLOCK->PSEND && \
|
||||
PRIV(is_newline)((p), NLBLOCK->nltype, NLBLOCK->PSEND, \
|
||||
&(NLBLOCK->nllen), utf)) \
|
||||
: \
|
||||
((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
|
||||
UCHAR21TEST(p) == NLBLOCK->nl[0] && \
|
||||
(NLBLOCK->nllen == 1 || UCHAR21TEST(p+1) == NLBLOCK->nl[1]) \
|
||||
) \
|
||||
)
|
||||
|
||||
/* This macro checks for a newline immediately preceding the given position */
|
||||
|
||||
#define WAS_NEWLINE(p) \
|
||||
((NLBLOCK->nltype != NLTYPE_FIXED)? \
|
||||
((p) > NLBLOCK->PSSTART && \
|
||||
PRIV(was_newline)((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \
|
||||
&(NLBLOCK->nllen), utf)) \
|
||||
: \
|
||||
((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
|
||||
UCHAR21TEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] && \
|
||||
(NLBLOCK->nllen == 1 || UCHAR21TEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \
|
||||
) \
|
||||
)
|
||||
|
||||
/* Private flags containing information about the compiled pattern. The first
|
||||
three must not be changed, because whichever is set is actually the number of
|
||||
bytes in a code unit in that mode. */
|
||||
|
||||
#define PCRE2_MODE8 0x00000001u /* compiled in 8 bit mode */
|
||||
#define PCRE2_MODE16 0x00000002u /* compiled in 16 bit mode */
|
||||
#define PCRE2_MODE32 0x00000004u /* compiled in 32 bit mode */
|
||||
#define PCRE2_FIRSTSET 0x00000010u /* first_code unit is set */
|
||||
#define PCRE2_FIRSTCASELESS 0x00000020u /* caseless first code unit */
|
||||
#define PCRE2_FIRSTMAPSET 0x00000040u /* bitmap of first code units is set */
|
||||
#define PCRE2_LASTSET 0x00000080u /* last code unit is set */
|
||||
#define PCRE2_LASTCASELESS 0x00000100u /* caseless last code unit */
|
||||
#define PCRE2_STARTLINE 0x00000200u /* start after \n for multiline */
|
||||
#define PCRE2_JCHANGED 0x00000400u /* j option used in pattern */
|
||||
#define PCRE2_HASCRORLF 0x00000800u /* explicit \r or \n in pattern */
|
||||
#define PCRE2_HASTHEN 0x00001000u /* pattern contains (*THEN) */
|
||||
#define PCRE2_MATCH_EMPTY 0x00002000u /* pattern can match empty string */
|
||||
#define PCRE2_BSR_SET 0x00004000u /* BSR was set in the pattern */
|
||||
#define PCRE2_NL_SET 0x00008000u /* newline was set in the pattern */
|
||||
#define PCRE2_NOTEMPTY_SET 0x00010000u /* (*NOTEMPTY) used ) keep */
|
||||
#define PCRE2_NE_ATST_SET 0x00020000u /* (*NOTEMPTY_ATSTART) used) together */
|
||||
#define PCRE2_DEREF_TABLES 0x00040000u /* release character tables */
|
||||
#define PCRE2_NOJIT 0x00080000u /* (*NOJIT) used */
|
||||
#define PCRE2_HASBKPORX 0x00100000u /* contains \P, \p, or \X */
|
||||
#define PCRE2_DUPCAPUSED 0x00200000u /* contains (?| */
|
||||
#define PCRE2_HASBKC 0x00400000u /* contains \C */
|
||||
#define PCRE2_HASACCEPT 0x00800000u /* contains (*ACCEPT) */
|
||||
|
||||
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
|
||||
|
||||
/* Values for the matchedby field in a match data block. */
|
||||
|
||||
enum { PCRE2_MATCHEDBY_INTERPRETER, /* pcre2_match() */
|
||||
PCRE2_MATCHEDBY_DFA_INTERPRETER, /* pcre2_dfa_match() */
|
||||
PCRE2_MATCHEDBY_JIT }; /* pcre2_jit_match() */
|
||||
|
||||
/* Values for the flags field in a match data block. */
|
||||
|
||||
#define PCRE2_MD_COPIED_SUBJECT 0x01u
|
||||
|
||||
/* Magic number to provide a small check against being handed junk. */
|
||||
|
||||
#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */
|
||||
|
||||
/* The maximum remaining length of subject we are prepared to search for a
|
||||
req_unit match from an anchored pattern. In 8-bit mode, memchr() is used and is
|
||||
much faster than the search loop that has to be used in 16-bit and 32-bit
|
||||
modes. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define REQ_CU_MAX 5000
|
||||
#else
|
||||
#define REQ_CU_MAX 2000
|
||||
#endif
|
||||
|
||||
/* The maximum nesting depth for Unicode character class sets.
|
||||
Currently fixed. Warning: the interpreter relies on this so it can encode
|
||||
the operand stack in a uint32_t. A nesting limit of 15 implies (15*2+1)=31
|
||||
stack operands required, due to the fact that we have two (and only two)
|
||||
levels of operator precedence. In the UTS#18 syntax, you can write 'x&&y[z]'
|
||||
and in Perl syntax you can write '(?[ x - y & (z) ])', both of which imply
|
||||
pushing the match results for x & y to the stack. */
|
||||
|
||||
#define ECLASS_NEST_LIMIT 15
|
||||
|
||||
/* Offsets for the bitmap tables in the cbits set of tables. Each table
|
||||
contains a set of bits for a class map. Some classes are built by combining
|
||||
these tables. */
|
||||
|
||||
#define cbit_space 0 /* [:space:] or \s */
|
||||
#define cbit_xdigit 32 /* [:xdigit:] */
|
||||
#define cbit_digit 64 /* [:digit:] or \d */
|
||||
#define cbit_upper 96 /* [:upper:] */
|
||||
#define cbit_lower 128 /* [:lower:] */
|
||||
#define cbit_word 160 /* [:word:] or \w */
|
||||
#define cbit_graph 192 /* [:graph:] */
|
||||
#define cbit_print 224 /* [:print:] */
|
||||
#define cbit_punct 256 /* [:punct:] */
|
||||
#define cbit_cntrl 288 /* [:cntrl:] */
|
||||
#define cbit_length 320 /* Length of the cbits table */
|
||||
|
||||
/* Bit definitions for entries in the ctypes table. Do not change these values
|
||||
without checking pcre2_jit_compile.c, which has an assertion to ensure that
|
||||
ctype_word has the value 16. */
|
||||
|
||||
#define ctype_space 0x01
|
||||
#define ctype_letter 0x02
|
||||
#define ctype_lcletter 0x04
|
||||
#define ctype_digit 0x08
|
||||
#define ctype_word 0x10 /* alphanumeric or '_' */
|
||||
|
||||
/* Offsets of the various tables from the base tables pointer, and
|
||||
total length of the tables. */
|
||||
|
||||
#define lcc_offset 0 /* Lower case */
|
||||
#define fcc_offset 256 /* Flip case */
|
||||
#define cbits_offset 512 /* Character classes */
|
||||
#define ctypes_offset (cbits_offset + cbit_length) /* Character types */
|
||||
#define TABLES_LENGTH (ctypes_offset + 256)
|
||||
|
||||
/* Private flags used in compile_context.optimization_flags */
|
||||
|
||||
#define PCRE2_OPTIM_AUTO_POSSESS 0x00000001u
|
||||
#define PCRE2_OPTIM_DOTSTAR_ANCHOR 0x00000002u
|
||||
#define PCRE2_OPTIM_START_OPTIMIZE 0x00000004u
|
||||
|
||||
#define PCRE2_OPTIMIZATION_ALL 0x00000007u
|
||||
|
||||
/* -------------------- Character and string names ------------------------ */
|
||||
|
||||
/* If PCRE2 is to support UTF-8 on EBCDIC platforms, we cannot use normal
|
||||
character constants like '*' because the compiler would emit their EBCDIC code,
|
||||
which is different from their ASCII/UTF-8 code. Instead we define macros for
|
||||
the characters so that they always use the ASCII/UTF-8 code when UTF-8 support
|
||||
is enabled. When UTF-8 support is not enabled, the definitions use character
|
||||
literals. Both character and string versions of each character are needed, and
|
||||
there are some longer strings as well.
|
||||
|
||||
This means that, on EBCDIC platforms, the PCRE2 library can handle either
|
||||
EBCDIC, or UTF-8, but not both. To support both in the same compiled library
|
||||
would need different lookups depending on whether PCRE2_UTF was set or not.
|
||||
This would make it impossible to use characters in switch/case statements,
|
||||
which would reduce performance. For a theoretical use (which nobody has asked
|
||||
for) in a minority area (EBCDIC platforms), this is not sensible. Any
|
||||
application that did need both could compile two versions of the library, using
|
||||
macros to give the functions distinct names. */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
|
||||
/* UTF-8 support is not enabled; use the platform-dependent character literals
|
||||
so that PCRE2 works in both ASCII and EBCDIC environments, but only in non-UTF
|
||||
mode. Newline characters are problematic in EBCDIC. Though it has CR and LF
|
||||
characters, a common practice has been to use its NL (0x15) character as the
|
||||
line terminator in C-like processing environments. However, sometimes the LF
|
||||
(0x25) character is used instead, according to this Unicode document:
|
||||
|
||||
http://unicode.org/standard/reports/tr13/tr13-5.html
|
||||
|
||||
PCRE2 defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25
|
||||
instead. Whichever is *not* chosen is defined as NEL.
|
||||
|
||||
In both ASCII and EBCDIC environments, CHAR_NL and CHAR_LF are synonyms for the
|
||||
same code point. */
|
||||
|
||||
#ifdef EBCDIC
|
||||
|
||||
#ifndef EBCDIC_NL25
|
||||
#define CHAR_NL '\x15'
|
||||
#define CHAR_NEL '\x25'
|
||||
#define STR_NL "\x15"
|
||||
#define STR_NEL "\x25"
|
||||
#else
|
||||
#define CHAR_NL '\x25'
|
||||
#define CHAR_NEL '\x15'
|
||||
#define STR_NL "\x25"
|
||||
#define STR_NEL "\x15"
|
||||
#endif
|
||||
|
||||
#define CHAR_LF CHAR_NL
|
||||
#define STR_LF STR_NL
|
||||
|
||||
#define CHAR_ESC '\047'
|
||||
#define CHAR_DEL '\007'
|
||||
#define CHAR_NBSP ((unsigned char)'\x41')
|
||||
#define STR_ESC "\047"
|
||||
#define STR_DEL "\007"
|
||||
|
||||
#else /* Not EBCDIC */
|
||||
|
||||
/* In ASCII/Unicode, linefeed is '\n' and we equate this to NL for
|
||||
compatibility. NEL is the Unicode newline character; make sure it is
|
||||
a positive value. */
|
||||
|
||||
#define CHAR_LF '\n'
|
||||
#define CHAR_NL CHAR_LF
|
||||
#define CHAR_NEL ((unsigned char)'\x85')
|
||||
#define CHAR_ESC '\033'
|
||||
#define CHAR_DEL '\177'
|
||||
#define CHAR_NBSP ((unsigned char)'\xa0')
|
||||
|
||||
#define STR_LF "\n"
|
||||
#define STR_NL STR_LF
|
||||
#define STR_NEL "\x85"
|
||||
#define STR_ESC "\033"
|
||||
#define STR_DEL "\177"
|
||||
|
||||
#endif /* EBCDIC */
|
||||
|
||||
/* The remaining definitions work in both environments. */
|
||||
|
||||
#define CHAR_NUL '\0'
|
||||
#define CHAR_HT '\t'
|
||||
#define CHAR_VT '\v'
|
||||
#define CHAR_FF '\f'
|
||||
#define CHAR_CR '\r'
|
||||
#define CHAR_BS '\b'
|
||||
#define CHAR_BEL '\a'
|
||||
|
||||
#define CHAR_SPACE ' '
|
||||
#define CHAR_EXCLAMATION_MARK '!'
|
||||
#define CHAR_QUOTATION_MARK '"'
|
||||
#define CHAR_NUMBER_SIGN '#'
|
||||
#define CHAR_DOLLAR_SIGN '$'
|
||||
#define CHAR_PERCENT_SIGN '%'
|
||||
#define CHAR_AMPERSAND '&'
|
||||
#define CHAR_APOSTROPHE '\''
|
||||
#define CHAR_LEFT_PARENTHESIS '('
|
||||
#define CHAR_RIGHT_PARENTHESIS ')'
|
||||
#define CHAR_ASTERISK '*'
|
||||
#define CHAR_PLUS '+'
|
||||
#define CHAR_COMMA ','
|
||||
#define CHAR_MINUS '-'
|
||||
#define CHAR_DOT '.'
|
||||
#define CHAR_SLASH '/'
|
||||
#define CHAR_0 '0'
|
||||
#define CHAR_1 '1'
|
||||
#define CHAR_2 '2'
|
||||
#define CHAR_3 '3'
|
||||
#define CHAR_4 '4'
|
||||
#define CHAR_5 '5'
|
||||
#define CHAR_6 '6'
|
||||
#define CHAR_7 '7'
|
||||
#define CHAR_8 '8'
|
||||
#define CHAR_9 '9'
|
||||
#define CHAR_COLON ':'
|
||||
#define CHAR_SEMICOLON ';'
|
||||
#define CHAR_LESS_THAN_SIGN '<'
|
||||
#define CHAR_EQUALS_SIGN '='
|
||||
#define CHAR_GREATER_THAN_SIGN '>'
|
||||
#define CHAR_QUESTION_MARK '?'
|
||||
#define CHAR_COMMERCIAL_AT '@'
|
||||
#define CHAR_A 'A'
|
||||
#define CHAR_B 'B'
|
||||
#define CHAR_C 'C'
|
||||
#define CHAR_D 'D'
|
||||
#define CHAR_E 'E'
|
||||
#define CHAR_F 'F'
|
||||
#define CHAR_G 'G'
|
||||
#define CHAR_H 'H'
|
||||
#define CHAR_I 'I'
|
||||
#define CHAR_J 'J'
|
||||
#define CHAR_K 'K'
|
||||
#define CHAR_L 'L'
|
||||
#define CHAR_M 'M'
|
||||
#define CHAR_N 'N'
|
||||
#define CHAR_O 'O'
|
||||
#define CHAR_P 'P'
|
||||
#define CHAR_Q 'Q'
|
||||
#define CHAR_R 'R'
|
||||
#define CHAR_S 'S'
|
||||
#define CHAR_T 'T'
|
||||
#define CHAR_U 'U'
|
||||
#define CHAR_V 'V'
|
||||
#define CHAR_W 'W'
|
||||
#define CHAR_X 'X'
|
||||
#define CHAR_Y 'Y'
|
||||
#define CHAR_Z 'Z'
|
||||
#define CHAR_LEFT_SQUARE_BRACKET '['
|
||||
#define CHAR_BACKSLASH '\\'
|
||||
#define CHAR_RIGHT_SQUARE_BRACKET ']'
|
||||
#define CHAR_CIRCUMFLEX_ACCENT '^'
|
||||
#define CHAR_UNDERSCORE '_'
|
||||
#define CHAR_GRAVE_ACCENT '`'
|
||||
#define CHAR_a 'a'
|
||||
#define CHAR_b 'b'
|
||||
#define CHAR_c 'c'
|
||||
#define CHAR_d 'd'
|
||||
#define CHAR_e 'e'
|
||||
#define CHAR_f 'f'
|
||||
#define CHAR_g 'g'
|
||||
#define CHAR_h 'h'
|
||||
#define CHAR_i 'i'
|
||||
#define CHAR_j 'j'
|
||||
#define CHAR_k 'k'
|
||||
#define CHAR_l 'l'
|
||||
#define CHAR_m 'm'
|
||||
#define CHAR_n 'n'
|
||||
#define CHAR_o 'o'
|
||||
#define CHAR_p 'p'
|
||||
#define CHAR_q 'q'
|
||||
#define CHAR_r 'r'
|
||||
#define CHAR_s 's'
|
||||
#define CHAR_t 't'
|
||||
#define CHAR_u 'u'
|
||||
#define CHAR_v 'v'
|
||||
#define CHAR_w 'w'
|
||||
#define CHAR_x 'x'
|
||||
#define CHAR_y 'y'
|
||||
#define CHAR_z 'z'
|
||||
#define CHAR_LEFT_CURLY_BRACKET '{'
|
||||
#define CHAR_VERTICAL_LINE '|'
|
||||
#define CHAR_RIGHT_CURLY_BRACKET '}'
|
||||
#define CHAR_TILDE '~'
|
||||
|
||||
#define STR_HT "\t"
|
||||
#define STR_VT "\v"
|
||||
#define STR_FF "\f"
|
||||
#define STR_CR "\r"
|
||||
#define STR_BS "\b"
|
||||
#define STR_BEL "\a"
|
||||
|
||||
#define STR_SPACE " "
|
||||
#define STR_EXCLAMATION_MARK "!"
|
||||
#define STR_QUOTATION_MARK "\""
|
||||
#define STR_NUMBER_SIGN "#"
|
||||
#define STR_DOLLAR_SIGN "$"
|
||||
#define STR_PERCENT_SIGN "%"
|
||||
#define STR_AMPERSAND "&"
|
||||
#define STR_APOSTROPHE "'"
|
||||
#define STR_LEFT_PARENTHESIS "("
|
||||
#define STR_RIGHT_PARENTHESIS ")"
|
||||
#define STR_ASTERISK "*"
|
||||
#define STR_PLUS "+"
|
||||
#define STR_COMMA ","
|
||||
#define STR_MINUS "-"
|
||||
#define STR_DOT "."
|
||||
#define STR_SLASH "/"
|
||||
#define STR_0 "0"
|
||||
#define STR_1 "1"
|
||||
#define STR_2 "2"
|
||||
#define STR_3 "3"
|
||||
#define STR_4 "4"
|
||||
#define STR_5 "5"
|
||||
#define STR_6 "6"
|
||||
#define STR_7 "7"
|
||||
#define STR_8 "8"
|
||||
#define STR_9 "9"
|
||||
#define STR_COLON ":"
|
||||
#define STR_SEMICOLON ";"
|
||||
#define STR_LESS_THAN_SIGN "<"
|
||||
#define STR_EQUALS_SIGN "="
|
||||
#define STR_GREATER_THAN_SIGN ">"
|
||||
#define STR_QUESTION_MARK "?"
|
||||
#define STR_COMMERCIAL_AT "@"
|
||||
#define STR_A "A"
|
||||
#define STR_B "B"
|
||||
#define STR_C "C"
|
||||
#define STR_D "D"
|
||||
#define STR_E "E"
|
||||
#define STR_F "F"
|
||||
#define STR_G "G"
|
||||
#define STR_H "H"
|
||||
#define STR_I "I"
|
||||
#define STR_J "J"
|
||||
#define STR_K "K"
|
||||
#define STR_L "L"
|
||||
#define STR_M "M"
|
||||
#define STR_N "N"
|
||||
#define STR_O "O"
|
||||
#define STR_P "P"
|
||||
#define STR_Q "Q"
|
||||
#define STR_R "R"
|
||||
#define STR_S "S"
|
||||
#define STR_T "T"
|
||||
#define STR_U "U"
|
||||
#define STR_V "V"
|
||||
#define STR_W "W"
|
||||
#define STR_X "X"
|
||||
#define STR_Y "Y"
|
||||
#define STR_Z "Z"
|
||||
#define STR_LEFT_SQUARE_BRACKET "["
|
||||
#define STR_BACKSLASH "\\"
|
||||
#define STR_RIGHT_SQUARE_BRACKET "]"
|
||||
#define STR_CIRCUMFLEX_ACCENT "^"
|
||||
#define STR_UNDERSCORE "_"
|
||||
#define STR_GRAVE_ACCENT "`"
|
||||
#define STR_a "a"
|
||||
#define STR_b "b"
|
||||
#define STR_c "c"
|
||||
#define STR_d "d"
|
||||
#define STR_e "e"
|
||||
#define STR_f "f"
|
||||
#define STR_g "g"
|
||||
#define STR_h "h"
|
||||
#define STR_i "i"
|
||||
#define STR_j "j"
|
||||
#define STR_k "k"
|
||||
#define STR_l "l"
|
||||
#define STR_m "m"
|
||||
#define STR_n "n"
|
||||
#define STR_o "o"
|
||||
#define STR_p "p"
|
||||
#define STR_q "q"
|
||||
#define STR_r "r"
|
||||
#define STR_s "s"
|
||||
#define STR_t "t"
|
||||
#define STR_u "u"
|
||||
#define STR_v "v"
|
||||
#define STR_w "w"
|
||||
#define STR_x "x"
|
||||
#define STR_y "y"
|
||||
#define STR_z "z"
|
||||
#define STR_LEFT_CURLY_BRACKET "{"
|
||||
#define STR_VERTICAL_LINE "|"
|
||||
#define STR_RIGHT_CURLY_BRACKET "}"
|
||||
#define STR_TILDE "~"
|
||||
|
||||
#define STRING_ACCEPT0 "ACCEPT\0"
|
||||
#define STRING_COMMIT0 "COMMIT\0"
|
||||
#define STRING_F0 "F\0"
|
||||
#define STRING_FAIL0 "FAIL\0"
|
||||
#define STRING_MARK0 "MARK\0"
|
||||
#define STRING_PRUNE0 "PRUNE\0"
|
||||
#define STRING_SKIP0 "SKIP\0"
|
||||
#define STRING_THEN "THEN"
|
||||
|
||||
#define STRING_atomic0 "atomic\0"
|
||||
#define STRING_pla0 "pla\0"
|
||||
#define STRING_plb0 "plb\0"
|
||||
#define STRING_napla0 "napla\0"
|
||||
#define STRING_naplb0 "naplb\0"
|
||||
#define STRING_nla0 "nla\0"
|
||||
#define STRING_nlb0 "nlb\0"
|
||||
#define STRING_scs0 "scs\0"
|
||||
#define STRING_sr0 "sr\0"
|
||||
#define STRING_asr0 "asr\0"
|
||||
#define STRING_positive_lookahead0 "positive_lookahead\0"
|
||||
#define STRING_positive_lookbehind0 "positive_lookbehind\0"
|
||||
#define STRING_non_atomic_positive_lookahead0 "non_atomic_positive_lookahead\0"
|
||||
#define STRING_non_atomic_positive_lookbehind0 "non_atomic_positive_lookbehind\0"
|
||||
#define STRING_negative_lookahead0 "negative_lookahead\0"
|
||||
#define STRING_negative_lookbehind0 "negative_lookbehind\0"
|
||||
#define STRING_script_run0 "script_run\0"
|
||||
#define STRING_atomic_script_run "atomic_script_run"
|
||||
#define STRING_scan_substring0 "scan_substring\0"
|
||||
|
||||
#define STRING_alpha0 "alpha\0"
|
||||
#define STRING_lower0 "lower\0"
|
||||
#define STRING_upper0 "upper\0"
|
||||
#define STRING_alnum0 "alnum\0"
|
||||
#define STRING_ascii0 "ascii\0"
|
||||
#define STRING_blank0 "blank\0"
|
||||
#define STRING_cntrl0 "cntrl\0"
|
||||
#define STRING_digit0 "digit\0"
|
||||
#define STRING_graph0 "graph\0"
|
||||
#define STRING_print0 "print\0"
|
||||
#define STRING_punct0 "punct\0"
|
||||
#define STRING_space0 "space\0"
|
||||
#define STRING_word0 "word\0"
|
||||
#define STRING_xdigit "xdigit"
|
||||
|
||||
#define STRING_DEFINE "DEFINE"
|
||||
#define STRING_VERSION "VERSION"
|
||||
#define STRING_WEIRD_STARTWORD "[:<:]]"
|
||||
#define STRING_WEIRD_ENDWORD "[:>:]]"
|
||||
|
||||
#define STRING_CR_RIGHTPAR "CR)"
|
||||
#define STRING_LF_RIGHTPAR "LF)"
|
||||
#define STRING_CRLF_RIGHTPAR "CRLF)"
|
||||
#define STRING_ANY_RIGHTPAR "ANY)"
|
||||
#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"
|
||||
#define STRING_NUL_RIGHTPAR "NUL)"
|
||||
#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
|
||||
#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
|
||||
#define STRING_UTF8_RIGHTPAR "UTF8)"
|
||||
#define STRING_UTF16_RIGHTPAR "UTF16)"
|
||||
#define STRING_UTF32_RIGHTPAR "UTF32)"
|
||||
#define STRING_UTF_RIGHTPAR "UTF)"
|
||||
#define STRING_UCP_RIGHTPAR "UCP)"
|
||||
#define STRING_NO_AUTO_POSSESS_RIGHTPAR "NO_AUTO_POSSESS)"
|
||||
#define STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR "NO_DOTSTAR_ANCHOR)"
|
||||
#define STRING_NO_JIT_RIGHTPAR "NO_JIT)"
|
||||
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
|
||||
#define STRING_NOTEMPTY_RIGHTPAR "NOTEMPTY)"
|
||||
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)"
|
||||
#define STRING_CASELESS_RESTRICT_RIGHTPAR "CASELESS_RESTRICT)"
|
||||
#define STRING_TURKISH_CASING_RIGHTPAR "TURKISH_CASING)"
|
||||
#define STRING_LIMIT_HEAP_EQ "LIMIT_HEAP="
|
||||
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
|
||||
#define STRING_LIMIT_DEPTH_EQ "LIMIT_DEPTH="
|
||||
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
|
||||
#define STRING_MARK "MARK"
|
||||
|
||||
#define STRING_bc "bc"
|
||||
#define STRING_bidiclass "bidiclass"
|
||||
#define STRING_sc "sc"
|
||||
#define STRING_script "script"
|
||||
#define STRING_scriptextensions "scriptextensions"
|
||||
#define STRING_scx "scx"
|
||||
|
||||
#else /* SUPPORT_UNICODE */
|
||||
|
||||
/* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This
|
||||
works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode
|
||||
only. */
|
||||
|
||||
#define CHAR_HT '\011'
|
||||
#define CHAR_VT '\013'
|
||||
#define CHAR_FF '\014'
|
||||
#define CHAR_CR '\015'
|
||||
#define CHAR_LF '\012'
|
||||
#define CHAR_NL CHAR_LF
|
||||
#define CHAR_NEL ((unsigned char)'\x85')
|
||||
#define CHAR_BS '\010'
|
||||
#define CHAR_BEL '\007'
|
||||
#define CHAR_ESC '\033'
|
||||
#define CHAR_DEL '\177'
|
||||
|
||||
#define CHAR_NUL '\0'
|
||||
#define CHAR_SPACE '\040'
|
||||
#define CHAR_EXCLAMATION_MARK '\041'
|
||||
#define CHAR_QUOTATION_MARK '\042'
|
||||
#define CHAR_NUMBER_SIGN '\043'
|
||||
#define CHAR_DOLLAR_SIGN '\044'
|
||||
#define CHAR_PERCENT_SIGN '\045'
|
||||
#define CHAR_AMPERSAND '\046'
|
||||
#define CHAR_APOSTROPHE '\047'
|
||||
#define CHAR_LEFT_PARENTHESIS '\050'
|
||||
#define CHAR_RIGHT_PARENTHESIS '\051'
|
||||
#define CHAR_ASTERISK '\052'
|
||||
#define CHAR_PLUS '\053'
|
||||
#define CHAR_COMMA '\054'
|
||||
#define CHAR_MINUS '\055'
|
||||
#define CHAR_DOT '\056'
|
||||
#define CHAR_SLASH '\057'
|
||||
#define CHAR_0 '\060'
|
||||
#define CHAR_1 '\061'
|
||||
#define CHAR_2 '\062'
|
||||
#define CHAR_3 '\063'
|
||||
#define CHAR_4 '\064'
|
||||
#define CHAR_5 '\065'
|
||||
#define CHAR_6 '\066'
|
||||
#define CHAR_7 '\067'
|
||||
#define CHAR_8 '\070'
|
||||
#define CHAR_9 '\071'
|
||||
#define CHAR_COLON '\072'
|
||||
#define CHAR_SEMICOLON '\073'
|
||||
#define CHAR_LESS_THAN_SIGN '\074'
|
||||
#define CHAR_EQUALS_SIGN '\075'
|
||||
#define CHAR_GREATER_THAN_SIGN '\076'
|
||||
#define CHAR_QUESTION_MARK '\077'
|
||||
#define CHAR_COMMERCIAL_AT '\100'
|
||||
#define CHAR_A '\101'
|
||||
#define CHAR_B '\102'
|
||||
#define CHAR_C '\103'
|
||||
#define CHAR_D '\104'
|
||||
#define CHAR_E '\105'
|
||||
#define CHAR_F '\106'
|
||||
#define CHAR_G '\107'
|
||||
#define CHAR_H '\110'
|
||||
#define CHAR_I '\111'
|
||||
#define CHAR_J '\112'
|
||||
#define CHAR_K '\113'
|
||||
#define CHAR_L '\114'
|
||||
#define CHAR_M '\115'
|
||||
#define CHAR_N '\116'
|
||||
#define CHAR_O '\117'
|
||||
#define CHAR_P '\120'
|
||||
#define CHAR_Q '\121'
|
||||
#define CHAR_R '\122'
|
||||
#define CHAR_S '\123'
|
||||
#define CHAR_T '\124'
|
||||
#define CHAR_U '\125'
|
||||
#define CHAR_V '\126'
|
||||
#define CHAR_W '\127'
|
||||
#define CHAR_X '\130'
|
||||
#define CHAR_Y '\131'
|
||||
#define CHAR_Z '\132'
|
||||
#define CHAR_LEFT_SQUARE_BRACKET '\133'
|
||||
#define CHAR_BACKSLASH '\134'
|
||||
#define CHAR_RIGHT_SQUARE_BRACKET '\135'
|
||||
#define CHAR_CIRCUMFLEX_ACCENT '\136'
|
||||
#define CHAR_UNDERSCORE '\137'
|
||||
#define CHAR_GRAVE_ACCENT '\140'
|
||||
#define CHAR_a '\141'
|
||||
#define CHAR_b '\142'
|
||||
#define CHAR_c '\143'
|
||||
#define CHAR_d '\144'
|
||||
#define CHAR_e '\145'
|
||||
#define CHAR_f '\146'
|
||||
#define CHAR_g '\147'
|
||||
#define CHAR_h '\150'
|
||||
#define CHAR_i '\151'
|
||||
#define CHAR_j '\152'
|
||||
#define CHAR_k '\153'
|
||||
#define CHAR_l '\154'
|
||||
#define CHAR_m '\155'
|
||||
#define CHAR_n '\156'
|
||||
#define CHAR_o '\157'
|
||||
#define CHAR_p '\160'
|
||||
#define CHAR_q '\161'
|
||||
#define CHAR_r '\162'
|
||||
#define CHAR_s '\163'
|
||||
#define CHAR_t '\164'
|
||||
#define CHAR_u '\165'
|
||||
#define CHAR_v '\166'
|
||||
#define CHAR_w '\167'
|
||||
#define CHAR_x '\170'
|
||||
#define CHAR_y '\171'
|
||||
#define CHAR_z '\172'
|
||||
#define CHAR_LEFT_CURLY_BRACKET '\173'
|
||||
#define CHAR_VERTICAL_LINE '\174'
|
||||
#define CHAR_RIGHT_CURLY_BRACKET '\175'
|
||||
#define CHAR_TILDE '\176'
|
||||
#define CHAR_NBSP ((unsigned char)'\xa0')
|
||||
|
||||
#define STR_HT "\011"
|
||||
#define STR_VT "\013"
|
||||
#define STR_FF "\014"
|
||||
#define STR_CR "\015"
|
||||
#define STR_NL "\012"
|
||||
#define STR_BS "\010"
|
||||
#define STR_BEL "\007"
|
||||
#define STR_ESC "\033"
|
||||
#define STR_DEL "\177"
|
||||
|
||||
#define STR_SPACE "\040"
|
||||
#define STR_EXCLAMATION_MARK "\041"
|
||||
#define STR_QUOTATION_MARK "\042"
|
||||
#define STR_NUMBER_SIGN "\043"
|
||||
#define STR_DOLLAR_SIGN "\044"
|
||||
#define STR_PERCENT_SIGN "\045"
|
||||
#define STR_AMPERSAND "\046"
|
||||
#define STR_APOSTROPHE "\047"
|
||||
#define STR_LEFT_PARENTHESIS "\050"
|
||||
#define STR_RIGHT_PARENTHESIS "\051"
|
||||
#define STR_ASTERISK "\052"
|
||||
#define STR_PLUS "\053"
|
||||
#define STR_COMMA "\054"
|
||||
#define STR_MINUS "\055"
|
||||
#define STR_DOT "\056"
|
||||
#define STR_SLASH "\057"
|
||||
#define STR_0 "\060"
|
||||
#define STR_1 "\061"
|
||||
#define STR_2 "\062"
|
||||
#define STR_3 "\063"
|
||||
#define STR_4 "\064"
|
||||
#define STR_5 "\065"
|
||||
#define STR_6 "\066"
|
||||
#define STR_7 "\067"
|
||||
#define STR_8 "\070"
|
||||
#define STR_9 "\071"
|
||||
#define STR_COLON "\072"
|
||||
#define STR_SEMICOLON "\073"
|
||||
#define STR_LESS_THAN_SIGN "\074"
|
||||
#define STR_EQUALS_SIGN "\075"
|
||||
#define STR_GREATER_THAN_SIGN "\076"
|
||||
#define STR_QUESTION_MARK "\077"
|
||||
#define STR_COMMERCIAL_AT "\100"
|
||||
#define STR_A "\101"
|
||||
#define STR_B "\102"
|
||||
#define STR_C "\103"
|
||||
#define STR_D "\104"
|
||||
#define STR_E "\105"
|
||||
#define STR_F "\106"
|
||||
#define STR_G "\107"
|
||||
#define STR_H "\110"
|
||||
#define STR_I "\111"
|
||||
#define STR_J "\112"
|
||||
#define STR_K "\113"
|
||||
#define STR_L "\114"
|
||||
#define STR_M "\115"
|
||||
#define STR_N "\116"
|
||||
#define STR_O "\117"
|
||||
#define STR_P "\120"
|
||||
#define STR_Q "\121"
|
||||
#define STR_R "\122"
|
||||
#define STR_S "\123"
|
||||
#define STR_T "\124"
|
||||
#define STR_U "\125"
|
||||
#define STR_V "\126"
|
||||
#define STR_W "\127"
|
||||
#define STR_X "\130"
|
||||
#define STR_Y "\131"
|
||||
#define STR_Z "\132"
|
||||
#define STR_LEFT_SQUARE_BRACKET "\133"
|
||||
#define STR_BACKSLASH "\134"
|
||||
#define STR_RIGHT_SQUARE_BRACKET "\135"
|
||||
#define STR_CIRCUMFLEX_ACCENT "\136"
|
||||
#define STR_UNDERSCORE "\137"
|
||||
#define STR_GRAVE_ACCENT "\140"
|
||||
#define STR_a "\141"
|
||||
#define STR_b "\142"
|
||||
#define STR_c "\143"
|
||||
#define STR_d "\144"
|
||||
#define STR_e "\145"
|
||||
#define STR_f "\146"
|
||||
#define STR_g "\147"
|
||||
#define STR_h "\150"
|
||||
#define STR_i "\151"
|
||||
#define STR_j "\152"
|
||||
#define STR_k "\153"
|
||||
#define STR_l "\154"
|
||||
#define STR_m "\155"
|
||||
#define STR_n "\156"
|
||||
#define STR_o "\157"
|
||||
#define STR_p "\160"
|
||||
#define STR_q "\161"
|
||||
#define STR_r "\162"
|
||||
#define STR_s "\163"
|
||||
#define STR_t "\164"
|
||||
#define STR_u "\165"
|
||||
#define STR_v "\166"
|
||||
#define STR_w "\167"
|
||||
#define STR_x "\170"
|
||||
#define STR_y "\171"
|
||||
#define STR_z "\172"
|
||||
#define STR_LEFT_CURLY_BRACKET "\173"
|
||||
#define STR_VERTICAL_LINE "\174"
|
||||
#define STR_RIGHT_CURLY_BRACKET "\175"
|
||||
#define STR_TILDE "\176"
|
||||
|
||||
#define STRING_ACCEPT0 STR_A STR_C STR_C STR_E STR_P STR_T "\0"
|
||||
#define STRING_COMMIT0 STR_C STR_O STR_M STR_M STR_I STR_T "\0"
|
||||
#define STRING_F0 STR_F "\0"
|
||||
#define STRING_FAIL0 STR_F STR_A STR_I STR_L "\0"
|
||||
#define STRING_MARK0 STR_M STR_A STR_R STR_K "\0"
|
||||
#define STRING_PRUNE0 STR_P STR_R STR_U STR_N STR_E "\0"
|
||||
#define STRING_SKIP0 STR_S STR_K STR_I STR_P "\0"
|
||||
#define STRING_THEN STR_T STR_H STR_E STR_N
|
||||
|
||||
#define STRING_atomic0 STR_a STR_t STR_o STR_m STR_i STR_c "\0"
|
||||
#define STRING_pla0 STR_p STR_l STR_a "\0"
|
||||
#define STRING_plb0 STR_p STR_l STR_b "\0"
|
||||
#define STRING_napla0 STR_n STR_a STR_p STR_l STR_a "\0"
|
||||
#define STRING_naplb0 STR_n STR_a STR_p STR_l STR_b "\0"
|
||||
#define STRING_nla0 STR_n STR_l STR_a "\0"
|
||||
#define STRING_nlb0 STR_n STR_l STR_b "\0"
|
||||
#define STRING_scs0 STR_s STR_c STR_s "\0"
|
||||
#define STRING_sr0 STR_s STR_r "\0"
|
||||
#define STRING_asr0 STR_a STR_s STR_r "\0"
|
||||
#define STRING_positive_lookahead0 STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
|
||||
#define STRING_positive_lookbehind0 STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
|
||||
#define STRING_non_atomic_positive_lookahead0 STR_n STR_o STR_n STR_UNDERSCORE STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
|
||||
#define STRING_non_atomic_positive_lookbehind0 STR_n STR_o STR_n STR_UNDERSCORE STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
|
||||
#define STRING_negative_lookahead0 STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
|
||||
#define STRING_negative_lookbehind0 STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
|
||||
#define STRING_script_run0 STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n "\0"
|
||||
#define STRING_atomic_script_run STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n
|
||||
#define STRING_scan_substring0 STR_s STR_c STR_a STR_n STR_UNDERSCORE STR_s STR_u STR_b STR_s STR_t STR_r STR_i STR_n STR_g "\0"
|
||||
|
||||
#define STRING_alpha0 STR_a STR_l STR_p STR_h STR_a "\0"
|
||||
#define STRING_lower0 STR_l STR_o STR_w STR_e STR_r "\0"
|
||||
#define STRING_upper0 STR_u STR_p STR_p STR_e STR_r "\0"
|
||||
#define STRING_alnum0 STR_a STR_l STR_n STR_u STR_m "\0"
|
||||
#define STRING_ascii0 STR_a STR_s STR_c STR_i STR_i "\0"
|
||||
#define STRING_blank0 STR_b STR_l STR_a STR_n STR_k "\0"
|
||||
#define STRING_cntrl0 STR_c STR_n STR_t STR_r STR_l "\0"
|
||||
#define STRING_digit0 STR_d STR_i STR_g STR_i STR_t "\0"
|
||||
#define STRING_graph0 STR_g STR_r STR_a STR_p STR_h "\0"
|
||||
#define STRING_print0 STR_p STR_r STR_i STR_n STR_t "\0"
|
||||
#define STRING_punct0 STR_p STR_u STR_n STR_c STR_t "\0"
|
||||
#define STRING_space0 STR_s STR_p STR_a STR_c STR_e "\0"
|
||||
#define STRING_word0 STR_w STR_o STR_r STR_d "\0"
|
||||
#define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t
|
||||
|
||||
#define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E
|
||||
#define STRING_VERSION STR_V STR_E STR_R STR_S STR_I STR_O STR_N
|
||||
#define STRING_WEIRD_STARTWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
|
||||
#define STRING_WEIRD_ENDWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
|
||||
|
||||
#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS
|
||||
#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
|
||||
#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NUL_RIGHTPAR STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
|
||||
#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF16_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF32_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NO_AUTO_POSSESS_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_A STR_U STR_T STR_O STR_UNDERSCORE STR_P STR_O STR_S STR_S STR_E STR_S STR_S STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_D STR_O STR_T STR_S STR_T STR_A STR_R STR_UNDERSCORE STR_A STR_N STR_C STR_H STR_O STR_R STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NO_JIT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_J STR_I STR_T STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NOTEMPTY_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS
|
||||
#define STRING_CASELESS_RESTRICT_RIGHTPAR STR_C STR_A STR_S STR_E STR_L STR_E STR_S STR_S STR_UNDERSCORE STR_R STR_E STR_S STR_T STR_R STR_I STR_C STR_T STR_RIGHT_PARENTHESIS
|
||||
#define STRING_TURKISH_CASING_RIGHTPAR STR_T STR_U STR_R STR_K STR_I STR_S STR_H STR_UNDERSCORE STR_C STR_A STR_S STR_I STR_N STR_G STR_RIGHT_PARENTHESIS
|
||||
#define STRING_LIMIT_HEAP_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_H STR_E STR_A STR_P STR_EQUALS_SIGN
|
||||
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
|
||||
#define STRING_LIMIT_DEPTH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_D STR_E STR_P STR_T STR_H STR_EQUALS_SIGN
|
||||
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
|
||||
#define STRING_MARK STR_M STR_A STR_R STR_K
|
||||
|
||||
#define STRING_bc STR_b STR_c
|
||||
#define STRING_bidiclass STR_b STR_i STR_d STR_i STR_c STR_l STR_a STR_s STR_s
|
||||
#define STRING_sc STR_s STR_c
|
||||
#define STRING_script STR_s STR_c STR_r STR_i STR_p STR_t
|
||||
#define STRING_scriptextensions STR_s STR_c STR_r STR_i STR_p STR_t STR_e STR_x STR_t STR_e STR_n STR_s STR_i STR_o STR_n STR_s
|
||||
#define STRING_scx STR_s STR_c STR_x
|
||||
|
||||
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* -------------------- End of character and string names -------------------*/
|
||||
|
||||
/* -------------------- Definitions for compiled patterns -------------------*/
|
||||
|
||||
/* Codes for different types of Unicode property. If these definitions are
|
||||
changed, the autopossessifying table in pcre2_auto_possess.c must be updated to
|
||||
match. */
|
||||
|
||||
#define PT_LAMP 0 /* L& - the union of Lu, Ll, Lt */
|
||||
#define PT_GC 1 /* Specified general characteristic (e.g. L) */
|
||||
#define PT_PC 2 /* Specified particular characteristic (e.g. Lu) */
|
||||
#define PT_SC 3 /* Script only (e.g. Han) */
|
||||
#define PT_SCX 4 /* Script extensions (includes SC) */
|
||||
#define PT_ALNUM 5 /* Alphanumeric - the union of L and N */
|
||||
#define PT_SPACE 6 /* Perl space - general category Z plus 9,10,12,13 */
|
||||
#define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */
|
||||
#define PT_WORD 8 /* Word - L, N, Mn, or Pc */
|
||||
#define PT_CLIST 9 /* Pseudo-property: match character list */
|
||||
#define PT_UCNC 10 /* Universal Character nameable character */
|
||||
#define PT_BIDICL 11 /* Specified bidi class */
|
||||
#define PT_BOOL 12 /* Boolean property */
|
||||
#define PT_ANY 13 /* Must be the last entry!
|
||||
Any property - matches all chars */
|
||||
#define PT_TABSIZE PT_ANY /* Size of square table for autopossessify tests */
|
||||
|
||||
/* The following special properties are used only in XCLASS items, when POSIX
|
||||
classes are specified and PCRE2_UCP is set - in other words, for Unicode
|
||||
handling of these classes. They are not available via the \p or \P escapes like
|
||||
those in the above list, and so they do not take part in the autopossessifying
|
||||
table. */
|
||||
|
||||
#define PT_PXGRAPH 14 /* [:graph:] - characters that mark the paper */
|
||||
#define PT_PXPRINT 15 /* [:print:] - [:graph:] plus non-control spaces */
|
||||
#define PT_PXPUNCT 16 /* [:punct:] - punctuation characters */
|
||||
#define PT_PXXDIGIT 17 /* [:xdigit:] - hex digits */
|
||||
|
||||
/* This value is used when parsing \p and \P escapes to indicate that neither
|
||||
\p{script:...} nor \p{scx:...} has been encountered. */
|
||||
|
||||
#define PT_NOTSCRIPT 255
|
||||
|
||||
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
|
||||
contain characters with values greater than 255. */
|
||||
|
||||
#define XCL_NOT 0x01 /* Flag: this is a negative class */
|
||||
#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */
|
||||
#define XCL_HASPROP 0x04 /* Flag: property checks are present. */
|
||||
|
||||
#define XCL_END 0 /* Marks end of individual items */
|
||||
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */
|
||||
#define XCL_RANGE 2 /* A range (two multibyte chars) follows */
|
||||
#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */
|
||||
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
|
||||
/* This value represents the beginning of character lists. The value
|
||||
is 16 bit long, and stored as a high and low byte pair in 8 bit mode.
|
||||
The lower 12 bit contains information about character lists (see later). */
|
||||
#define XCL_LIST (sizeof(PCRE2_UCHAR) == 1 ? 0x10 : 0x1000)
|
||||
|
||||
/* When a character class contains many characters/ranges,
|
||||
they are stored in character lists. There are four character
|
||||
lists which contain characters/ranges within a given range.
|
||||
|
||||
The name, character range and item size for each list:
|
||||
Low16 [0x100 - 0x7fff] 16 bit items
|
||||
High16 [0x8000 - 0xffff] 16 bit items
|
||||
Low32 [0x10000 - 0x7fffffff] 32 bit items
|
||||
High32 [0x80000000 - 0xffffffff] 32 bit items
|
||||
|
||||
The Low32 character list is used only when utf encoding or 32 bit
|
||||
character width is enabled, and the High32 character is used only
|
||||
when 32 bit character width is enabled.
|
||||
|
||||
Each character list contain items. The lowest bit represents that
|
||||
an item is the beginning of a range (bit is cleared), or not (bit
|
||||
is set). The other bits represent the character shifted left by
|
||||
one, so its highest bit is discarded. Due to the layout of character
|
||||
lists, the highest bit of a character is always known:
|
||||
|
||||
Low16 and Low32: the highest bit is always zero
|
||||
High16 and High32: the highest bit is always one
|
||||
|
||||
The items are ordered in increasing order, so binary search can be
|
||||
used to find the lower bound of an input character. The lower bound
|
||||
is the highest item, which value is less or equal than the input
|
||||
character. If the lower bit of the item is cleard, or the character
|
||||
stored in the item equals to the input character, the input
|
||||
character is in the character list. */
|
||||
|
||||
/* Character list constants. */
|
||||
#define XCL_CHAR_LIST_LOW_16_START 0x100
|
||||
#define XCL_CHAR_LIST_LOW_16_END 0x7fff
|
||||
#define XCL_CHAR_LIST_LOW_16_ADD 0x0
|
||||
|
||||
#define XCL_CHAR_LIST_HIGH_16_START 0x8000
|
||||
#define XCL_CHAR_LIST_HIGH_16_END 0xffff
|
||||
#define XCL_CHAR_LIST_HIGH_16_ADD 0x8000
|
||||
|
||||
#define XCL_CHAR_LIST_LOW_32_START 0x10000
|
||||
#define XCL_CHAR_LIST_LOW_32_END 0x7fffffff
|
||||
#define XCL_CHAR_LIST_LOW_32_ADD 0x0
|
||||
|
||||
#define XCL_CHAR_LIST_HIGH_32_START 0x80000000
|
||||
#define XCL_CHAR_LIST_HIGH_32_END 0xffffffff
|
||||
#define XCL_CHAR_LIST_HIGH_32_ADD 0x80000000
|
||||
|
||||
/* Mask for getting the descriptors of character list ranges.
|
||||
Each descriptor has XCL_TYPE_BIT_LEN bits, and can be processed
|
||||
by XCL_BEGIN_WITH_RANGE and XCL_ITEM_COUNT_MASK macros. */
|
||||
#define XCL_TYPE_MASK 0xfff
|
||||
#define XCL_TYPE_BIT_LEN 3
|
||||
/* If this bit is set, the first item of the character list is the
|
||||
end of a range, which started before the starting character of the
|
||||
character list. */
|
||||
#define XCL_BEGIN_WITH_RANGE 0x4
|
||||
/* Number of items in the character list: 0, 1, or 2. The value 3
|
||||
represents that the item count is stored at the begining of the
|
||||
character list. The item count has the same width as the items
|
||||
in the character list (e.g. 16 bit for Low16 and High16 lists). */
|
||||
#define XCL_ITEM_COUNT_MASK 0x3
|
||||
/* Shift and flag for constructing character list items. The XCL_CHAR_END
|
||||
is set, when the item is not the beginning of a range. The XCL_CHAR_SHIFT
|
||||
can be used to encode / decode the character value stored in an item. */
|
||||
#define XCL_CHAR_END 0x1
|
||||
#define XCL_CHAR_SHIFT 1
|
||||
|
||||
/* Flag bits for an extended class (OP_ECLASS), which is used for complex
|
||||
character matches such as [\p{Greek} && \p{Ll}]. */
|
||||
|
||||
#define ECL_MAP 0x01 /* Flag: a 32-byte map is present */
|
||||
|
||||
/* Type tags for the items stored in an extended class (OP_ECLASS). These items
|
||||
follow the OP_ECLASS's flag char and bitmap, and represent a Reverse Polish
|
||||
Notation list of operands and operators manipulating a stack of bits. */
|
||||
|
||||
#define ECL_AND 1 /* Pop two from the stack, AND, and push result. */
|
||||
#define ECL_OR 2 /* Pop two from the stack, OR, and push result. */
|
||||
#define ECL_XOR 3 /* Pop two from the stack, XOR, and push result. */
|
||||
#define ECL_NOT 4 /* Pop one from the stack, NOT, and push result. */
|
||||
#define ECL_XCLASS 5 /* XCLASS nested within ECLASS; match and push result. */
|
||||
#define ECL_ANY 6 /* Temporary, only used during compilation. */
|
||||
#define ECL_NONE 7 /* Temporary, only used during compilation. */
|
||||
|
||||
/* These are escaped items that aren't just an encoding of a particular data
|
||||
value such as \n. They must have non-zero values, as check_escape() returns 0
|
||||
for a data character. In the escapes[] table in pcre2_compile.c their values
|
||||
are negated in order to distinguish them from data values.
|
||||
|
||||
They must appear here in the same order as in the opcode definitions below, up
|
||||
to ESC_z. There's a dummy for OP_ALLANY because it corresponds to "." in DOTALL
|
||||
mode rather than an escape sequence. It is also used for [^] in JavaScript
|
||||
compatibility mode, and for \C in non-utf mode. In non-DOTALL mode, "." behaves
|
||||
like \N.
|
||||
|
||||
ESC_ub is a special return from check_escape() when, in BSUX mode, \u{ is not
|
||||
followed by hex digits and }, in which case it should mean a literal "u"
|
||||
followed by a literal "{". This hack is necessary for cases like \u{ 12}
|
||||
because without it, this is interpreted as u{12} now that spaces are allowed in
|
||||
quantifiers.
|
||||
|
||||
Negative numbers are used to encode a backreference (\1, \2, \3, etc.) in
|
||||
check_escape(). There are tests in the code for an escape greater than ESC_b
|
||||
and less than ESC_Z to detect the types that may be repeated. These are the
|
||||
types that consume characters. If any new escapes are put in between that don't
|
||||
consume a character, that code will have to change. */
|
||||
|
||||
enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
|
||||
ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,
|
||||
ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z,
|
||||
ESC_E, ESC_Q, ESC_g, ESC_k, ESC_ub };
|
||||
|
||||
|
||||
/********************** Opcode definitions ******************/
|
||||
|
||||
/****** NOTE NOTE NOTE ******
|
||||
|
||||
Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in
|
||||
order to the list of escapes immediately above. Furthermore, values up to
|
||||
OP_DOLLM must not be changed without adjusting the table called autoposstab in
|
||||
pcre2_auto_possess.c.
|
||||
|
||||
Whenever this list is updated, the two macro definitions that follow must be
|
||||
updated to match. The possessification table called "opcode_possessify" in
|
||||
pcre2_compile.c must also be updated, and also the tables called "coptable"
|
||||
and "poptable" in pcre2_dfa_match.c.
|
||||
|
||||
****** NOTE NOTE NOTE ******/
|
||||
|
||||
|
||||
/* The values between FIRST_AUTOTAB_OP and LAST_AUTOTAB_RIGHT_OP, inclusive,
|
||||
are used in a table for deciding whether a repeated character type can be
|
||||
auto-possessified. */
|
||||
|
||||
#define FIRST_AUTOTAB_OP OP_NOT_DIGIT
|
||||
#define LAST_AUTOTAB_LEFT_OP OP_EXTUNI
|
||||
#define LAST_AUTOTAB_RIGHT_OP OP_DOLLM
|
||||
|
||||
enum {
|
||||
OP_END, /* 0 End of pattern */
|
||||
|
||||
/* Values corresponding to backslashed metacharacters */
|
||||
|
||||
OP_SOD, /* 1 Start of data: \A */
|
||||
OP_SOM, /* 2 Start of match (subject + offset): \G */
|
||||
OP_SET_SOM, /* 3 Set start of match (\K) */
|
||||
OP_NOT_WORD_BOUNDARY, /* 4 \B -- see also OP_NOT_UCP_WORD_BOUNDARY */
|
||||
OP_WORD_BOUNDARY, /* 5 \b -- see also OP_UCP_WORD_BOUNDARY */
|
||||
OP_NOT_DIGIT, /* 6 \D */
|
||||
OP_DIGIT, /* 7 \d */
|
||||
OP_NOT_WHITESPACE, /* 8 \S */
|
||||
OP_WHITESPACE, /* 9 \s */
|
||||
OP_NOT_WORDCHAR, /* 10 \W */
|
||||
OP_WORDCHAR, /* 11 \w */
|
||||
|
||||
OP_ANY, /* 12 Match any character except newline (\N) */
|
||||
OP_ALLANY, /* 13 Match any character */
|
||||
OP_ANYBYTE, /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */
|
||||
OP_NOTPROP, /* 15 \P (not Unicode property) */
|
||||
OP_PROP, /* 16 \p (Unicode property) */
|
||||
OP_ANYNL, /* 17 \R (any newline sequence) */
|
||||
OP_NOT_HSPACE, /* 18 \H (not horizontal whitespace) */
|
||||
OP_HSPACE, /* 19 \h (horizontal whitespace) */
|
||||
OP_NOT_VSPACE, /* 20 \V (not vertical whitespace) */
|
||||
OP_VSPACE, /* 21 \v (vertical whitespace) */
|
||||
OP_EXTUNI, /* 22 \X (extended Unicode sequence */
|
||||
OP_EODN, /* 23 End of data or \n at end of data (\Z) */
|
||||
OP_EOD, /* 24 End of data (\z) */
|
||||
|
||||
/* Line end assertions */
|
||||
|
||||
OP_DOLL, /* 25 End of line - not multiline */
|
||||
OP_DOLLM, /* 26 End of line - multiline */
|
||||
OP_CIRC, /* 27 Start of line - not multiline */
|
||||
OP_CIRCM, /* 28 Start of line - multiline */
|
||||
|
||||
/* Single characters; caseful must precede the caseless ones, and these
|
||||
must remain in this order, and adjacent. */
|
||||
|
||||
OP_CHAR, /* 29 Match one character, casefully */
|
||||
OP_CHARI, /* 30 Match one character, caselessly */
|
||||
OP_NOT, /* 31 Match one character, not the given one, casefully */
|
||||
OP_NOTI, /* 32 Match one character, not the given one, caselessly */
|
||||
|
||||
/* The following sets of 13 opcodes must always be kept in step because
|
||||
the offset from the first one is used to generate the others. */
|
||||
|
||||
/* Repeated characters; caseful must precede the caseless ones */
|
||||
|
||||
OP_STAR, /* 33 The maximizing and minimizing versions of */
|
||||
OP_MINSTAR, /* 34 these six opcodes must come in pairs, with */
|
||||
OP_PLUS, /* 35 the minimizing one second. */
|
||||
OP_MINPLUS, /* 36 */
|
||||
OP_QUERY, /* 37 */
|
||||
OP_MINQUERY, /* 38 */
|
||||
|
||||
OP_UPTO, /* 39 From 0 to n matches of one character, caseful*/
|
||||
OP_MINUPTO, /* 40 */
|
||||
OP_EXACT, /* 41 Exactly n matches */
|
||||
|
||||
OP_POSSTAR, /* 42 Possessified star, caseful */
|
||||
OP_POSPLUS, /* 43 Possessified plus, caseful */
|
||||
OP_POSQUERY, /* 44 Posesssified query, caseful */
|
||||
OP_POSUPTO, /* 45 Possessified upto, caseful */
|
||||
|
||||
/* Repeated characters; caseless must follow the caseful ones */
|
||||
|
||||
OP_STARI, /* 46 */
|
||||
OP_MINSTARI, /* 47 */
|
||||
OP_PLUSI, /* 48 */
|
||||
OP_MINPLUSI, /* 49 */
|
||||
OP_QUERYI, /* 50 */
|
||||
OP_MINQUERYI, /* 51 */
|
||||
|
||||
OP_UPTOI, /* 52 From 0 to n matches of one character, caseless */
|
||||
OP_MINUPTOI, /* 53 */
|
||||
OP_EXACTI, /* 54 */
|
||||
|
||||
OP_POSSTARI, /* 55 Possessified star, caseless */
|
||||
OP_POSPLUSI, /* 56 Possessified plus, caseless */
|
||||
OP_POSQUERYI, /* 57 Posesssified query, caseless */
|
||||
OP_POSUPTOI, /* 58 Possessified upto, caseless */
|
||||
|
||||
/* The negated ones must follow the non-negated ones, and match them */
|
||||
/* Negated repeated character, caseful; must precede the caseless ones */
|
||||
|
||||
OP_NOTSTAR, /* 59 The maximizing and minimizing versions of */
|
||||
OP_NOTMINSTAR, /* 60 these six opcodes must come in pairs, with */
|
||||
OP_NOTPLUS, /* 61 the minimizing one second. They must be in */
|
||||
OP_NOTMINPLUS, /* 62 exactly the same order as those above. */
|
||||
OP_NOTQUERY, /* 63 */
|
||||
OP_NOTMINQUERY, /* 64 */
|
||||
|
||||
OP_NOTUPTO, /* 65 From 0 to n matches, caseful */
|
||||
OP_NOTMINUPTO, /* 66 */
|
||||
OP_NOTEXACT, /* 67 Exactly n matches */
|
||||
|
||||
OP_NOTPOSSTAR, /* 68 Possessified versions, caseful */
|
||||
OP_NOTPOSPLUS, /* 69 */
|
||||
OP_NOTPOSQUERY, /* 70 */
|
||||
OP_NOTPOSUPTO, /* 71 */
|
||||
|
||||
/* Negated repeated character, caseless; must follow the caseful ones */
|
||||
|
||||
OP_NOTSTARI, /* 72 */
|
||||
OP_NOTMINSTARI, /* 73 */
|
||||
OP_NOTPLUSI, /* 74 */
|
||||
OP_NOTMINPLUSI, /* 75 */
|
||||
OP_NOTQUERYI, /* 76 */
|
||||
OP_NOTMINQUERYI, /* 77 */
|
||||
|
||||
OP_NOTUPTOI, /* 78 From 0 to n matches, caseless */
|
||||
OP_NOTMINUPTOI, /* 79 */
|
||||
OP_NOTEXACTI, /* 80 Exactly n matches */
|
||||
|
||||
OP_NOTPOSSTARI, /* 81 Possessified versions, caseless */
|
||||
OP_NOTPOSPLUSI, /* 82 */
|
||||
OP_NOTPOSQUERYI, /* 83 */
|
||||
OP_NOTPOSUPTOI, /* 84 */
|
||||
|
||||
/* Character types */
|
||||
|
||||
OP_TYPESTAR, /* 85 The maximizing and minimizing versions of */
|
||||
OP_TYPEMINSTAR, /* 86 these six opcodes must come in pairs, with */
|
||||
OP_TYPEPLUS, /* 87 the minimizing one second. These codes must */
|
||||
OP_TYPEMINPLUS, /* 88 be in exactly the same order as those above. */
|
||||
OP_TYPEQUERY, /* 89 */
|
||||
OP_TYPEMINQUERY, /* 90 */
|
||||
|
||||
OP_TYPEUPTO, /* 91 From 0 to n matches */
|
||||
OP_TYPEMINUPTO, /* 92 */
|
||||
OP_TYPEEXACT, /* 93 Exactly n matches */
|
||||
|
||||
OP_TYPEPOSSTAR, /* 94 Possessified versions */
|
||||
OP_TYPEPOSPLUS, /* 95 */
|
||||
OP_TYPEPOSQUERY, /* 96 */
|
||||
OP_TYPEPOSUPTO, /* 97 */
|
||||
|
||||
/* These are used for character classes and back references; only the
|
||||
first six are the same as the sets above. */
|
||||
|
||||
OP_CRSTAR, /* 98 The maximizing and minimizing versions of */
|
||||
OP_CRMINSTAR, /* 99 all these opcodes must come in pairs, with */
|
||||
OP_CRPLUS, /* 100 the minimizing one second. These codes must */
|
||||
OP_CRMINPLUS, /* 101 be in exactly the same order as those above. */
|
||||
OP_CRQUERY, /* 102 */
|
||||
OP_CRMINQUERY, /* 103 */
|
||||
|
||||
OP_CRRANGE, /* 104 These are different to the three sets above. */
|
||||
OP_CRMINRANGE, /* 105 */
|
||||
|
||||
OP_CRPOSSTAR, /* 106 Possessified versions */
|
||||
OP_CRPOSPLUS, /* 107 */
|
||||
OP_CRPOSQUERY, /* 108 */
|
||||
OP_CRPOSRANGE, /* 109 */
|
||||
|
||||
/* End of quantifier opcodes */
|
||||
|
||||
OP_CLASS, /* 110 Match a character class, chars < 256 only */
|
||||
OP_NCLASS, /* 111 Same, but the bitmap was created from a negative
|
||||
class - the difference is relevant only when a
|
||||
character > 255 is encountered. */
|
||||
OP_XCLASS, /* 112 Extended class for handling > 255 chars within the
|
||||
class. This does both positive and negative. */
|
||||
OP_ECLASS, /* 113 Really-extended class, for handling logical
|
||||
expressions computed over characters. */
|
||||
OP_REF, /* 114 Match a back reference, casefully */
|
||||
OP_REFI, /* 115 Match a back reference, caselessly */
|
||||
OP_DNREF, /* 116 Match a duplicate name backref, casefully */
|
||||
OP_DNREFI, /* 117 Match a duplicate name backref, caselessly */
|
||||
OP_RECURSE, /* 118 Match a numbered subpattern (possibly recursive) */
|
||||
OP_CALLOUT, /* 119 Call out to external function if provided */
|
||||
OP_CALLOUT_STR, /* 120 Call out with string argument */
|
||||
|
||||
OP_ALT, /* 121 Start of alternation */
|
||||
OP_KET, /* 122 End of group that doesn't have an unbounded repeat */
|
||||
OP_KETRMAX, /* 123 These two must remain together and in this */
|
||||
OP_KETRMIN, /* 124 order. They are for groups the repeat for ever. */
|
||||
OP_KETRPOS, /* 125 Possessive unlimited repeat. */
|
||||
|
||||
/* The assertions must come before BRA, CBRA, ONCE, and COND. */
|
||||
|
||||
OP_REVERSE, /* 126 Move pointer back - used in lookbehind assertions */
|
||||
OP_VREVERSE, /* 127 Move pointer back - variable */
|
||||
OP_ASSERT, /* 128 Positive lookahead */
|
||||
OP_ASSERT_NOT, /* 129 Negative lookahead */
|
||||
OP_ASSERTBACK, /* 130 Positive lookbehind */
|
||||
OP_ASSERTBACK_NOT, /* 131 Negative lookbehind */
|
||||
OP_ASSERT_NA, /* 132 Positive non-atomic lookahead */
|
||||
OP_ASSERTBACK_NA, /* 133 Positive non-atomic lookbehind */
|
||||
OP_ASSERT_SCS, /* 134 Scan substring */
|
||||
|
||||
/* ONCE, SCRIPT_RUN, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come
|
||||
immediately after the assertions, with ONCE first, as there's a test for >=
|
||||
ONCE for a subpattern that isn't an assertion. The POS versions must
|
||||
immediately follow the non-POS versions in each case. */
|
||||
|
||||
OP_ONCE, /* 135 Atomic group, contains captures */
|
||||
OP_SCRIPT_RUN, /* 136 Non-capture, but check characters' scripts */
|
||||
OP_BRA, /* 137 Start of non-capturing bracket */
|
||||
OP_BRAPOS, /* 138 Ditto, with unlimited, possessive repeat */
|
||||
OP_CBRA, /* 139 Start of capturing bracket */
|
||||
OP_CBRAPOS, /* 140 Ditto, with unlimited, possessive repeat */
|
||||
OP_COND, /* 141 Conditional group */
|
||||
|
||||
/* These five must follow the previous five, in the same order. There's a
|
||||
check for >= SBRA to distinguish the two sets. */
|
||||
|
||||
OP_SBRA, /* 142 Start of non-capturing bracket, check empty */
|
||||
OP_SBRAPOS, /* 143 Ditto, with unlimited, possessive repeat */
|
||||
OP_SCBRA, /* 144 Start of capturing bracket, check empty */
|
||||
OP_SCBRAPOS, /* 145 Ditto, with unlimited, possessive repeat */
|
||||
OP_SCOND, /* 146 Conditional group, check empty */
|
||||
|
||||
/* The next two pairs must (respectively) be kept together. */
|
||||
|
||||
OP_CREF, /* 147 Used to hold a capture number as condition */
|
||||
OP_DNCREF, /* 148 Used to point to duplicate names as a condition */
|
||||
OP_RREF, /* 149 Used to hold a recursion number as condition */
|
||||
OP_DNRREF, /* 150 Used to point to duplicate names as a condition */
|
||||
OP_FALSE, /* 151 Always false (used by DEFINE and VERSION) */
|
||||
OP_TRUE, /* 152 Always true (used by VERSION) */
|
||||
|
||||
OP_BRAZERO, /* 153 These two must remain together and in this */
|
||||
OP_BRAMINZERO, /* 154 order. */
|
||||
OP_BRAPOSZERO, /* 155 */
|
||||
|
||||
/* These are backtracking control verbs */
|
||||
|
||||
OP_MARK, /* 156 always has an argument */
|
||||
OP_PRUNE, /* 157 */
|
||||
OP_PRUNE_ARG, /* 158 same, but with argument */
|
||||
OP_SKIP, /* 159 */
|
||||
OP_SKIP_ARG, /* 160 same, but with argument */
|
||||
OP_THEN, /* 161 */
|
||||
OP_THEN_ARG, /* 162 same, but with argument */
|
||||
OP_COMMIT, /* 163 */
|
||||
OP_COMMIT_ARG, /* 164 same, but with argument */
|
||||
|
||||
/* These are forced failure and success verbs. FAIL and ACCEPT do accept an
|
||||
argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL)
|
||||
without the need for a special opcode. */
|
||||
|
||||
OP_FAIL, /* 165 */
|
||||
OP_ACCEPT, /* 166 */
|
||||
OP_ASSERT_ACCEPT, /* 167 Used inside assertions */
|
||||
OP_CLOSE, /* 168 Used before OP_ACCEPT to close open captures */
|
||||
|
||||
/* This is used to skip a subpattern with a {0} quantifier */
|
||||
|
||||
OP_SKIPZERO, /* 169 */
|
||||
|
||||
/* This is used to identify a DEFINE group during compilation so that it can
|
||||
be checked for having only one branch. It is changed to OP_FALSE before
|
||||
compilation finishes. */
|
||||
|
||||
OP_DEFINE, /* 170 */
|
||||
|
||||
/* These opcodes replace their normal counterparts in UCP mode when
|
||||
PCRE2_EXTRA_ASCII_BSW is not set. */
|
||||
|
||||
OP_NOT_UCP_WORD_BOUNDARY, /* 171 */
|
||||
OP_UCP_WORD_BOUNDARY, /* 172 */
|
||||
|
||||
/* This is not an opcode, but is used to check that tables indexed by opcode
|
||||
are the correct length, in order to catch updating errors - there have been
|
||||
some in the past. */
|
||||
|
||||
OP_TABLE_LENGTH
|
||||
|
||||
};
|
||||
|
||||
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
|
||||
definitions that follow must also be updated to match. There are also tables
|
||||
called "opcode_possessify" in pcre2_compile.c and "coptable" and "poptable" in
|
||||
pcre2_dfa_match.c that must be updated. */
|
||||
|
||||
|
||||
/* This macro defines textual names for all the opcodes. These are used only
|
||||
for debugging, and some of them are only partial names. The macro is referenced
|
||||
only in pcre2_printint.c, which fills out the full names in many cases (and in
|
||||
some cases doesn't actually use these names at all). */
|
||||
|
||||
#define OP_NAME_LIST \
|
||||
"End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d", \
|
||||
"\\S", "\\s", "\\W", "\\w", "Any", "AllAny", "Anybyte", \
|
||||
"notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \
|
||||
"extuni", "\\Z", "\\z", \
|
||||
"$", "$", "^", "^", "char", "chari", "not", "noti", \
|
||||
"*", "*?", "+", "+?", "?", "??", \
|
||||
"{", "{", "{", \
|
||||
"*+","++", "?+", "{", \
|
||||
"*", "*?", "+", "+?", "?", "??", \
|
||||
"{", "{", "{", \
|
||||
"*+","++", "?+", "{", \
|
||||
"*", "*?", "+", "+?", "?", "??", \
|
||||
"{", "{", "{", \
|
||||
"*+","++", "?+", "{", \
|
||||
"*", "*?", "+", "+?", "?", "??", \
|
||||
"{", "{", "{", \
|
||||
"*+","++", "?+", "{", \
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
||||
"*+","++", "?+", "{", \
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", \
|
||||
"*+","++", "?+", "{", \
|
||||
"class", "nclass", "xclass", "eclass", \
|
||||
"Ref", "Refi", "DnRef", "DnRefi", \
|
||||
"Recurse", "Callout", "CalloutStr", \
|
||||
"Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \
|
||||
"Reverse", "VReverse", "Assert", "Assert not", \
|
||||
"Assert back", "Assert back not", \
|
||||
"Non-atomic assert", "Non-atomic assert back", \
|
||||
"Scan substring", \
|
||||
"Once", \
|
||||
"Script run", \
|
||||
"Bra", "BraPos", "CBra", "CBraPos", \
|
||||
"Cond", \
|
||||
"SBra", "SBraPos", "SCBra", "SCBraPos", \
|
||||
"SCond", \
|
||||
"Capture ref", "Capture dnref", "Cond rec", "Cond dnrec", \
|
||||
"Cond false", "Cond true", \
|
||||
"Brazero", "Braminzero", "Braposzero", \
|
||||
"*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \
|
||||
"*THEN", "*THEN", "*COMMIT", "*COMMIT", "*FAIL", \
|
||||
"*ACCEPT", "*ASSERT_ACCEPT", \
|
||||
"Close", "Skip zero", "Define", "\\B (ucp)", "\\b (ucp)"
|
||||
|
||||
|
||||
/* This macro defines the length of fixed length operations in the compiled
|
||||
regex. The lengths are used when searching for specific things, and also in the
|
||||
debugging printing of a compiled regex. We use a macro so that it can be
|
||||
defined close to the definitions of the opcodes themselves.
|
||||
|
||||
As things have been extended, some of these are no longer fixed lenths, but are
|
||||
minima instead. For example, the length of a single-character repeat may vary
|
||||
in UTF-8 mode. The code that uses this table must know about such things. */
|
||||
|
||||
#define OP_LENGTHS \
|
||||
1, /* End */ \
|
||||
1, 1, 1, 1, 1, /* \A, \G, \K, \B, \b */ \
|
||||
1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ \
|
||||
1, 1, 1, /* Any, AllAny, Anybyte */ \
|
||||
3, 3, /* \P, \p */ \
|
||||
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \
|
||||
1, /* \X */ \
|
||||
1, 1, 1, 1, 1, 1, /* \Z, \z, $, $M ^, ^M */ \
|
||||
2, /* Char - the minimum length */ \
|
||||
2, /* Chari - the minimum length */ \
|
||||
2, /* not */ \
|
||||
2, /* noti */ \
|
||||
/* Positive single-char repeats ** These are */ \
|
||||
2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \
|
||||
2+IMM2_SIZE, 2+IMM2_SIZE, /* upto, minupto ** mode */ \
|
||||
2+IMM2_SIZE, /* exact */ \
|
||||
2, 2, 2, 2+IMM2_SIZE, /* *+, ++, ?+, upto+ */ \
|
||||
2, 2, 2, 2, 2, 2, /* *I, *?I, +I, +?I, ?I, ??I ** UTF-8 */ \
|
||||
2+IMM2_SIZE, 2+IMM2_SIZE, /* upto I, minupto I */ \
|
||||
2+IMM2_SIZE, /* exact I */ \
|
||||
2, 2, 2, 2+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */ \
|
||||
/* Negative single-char repeats - only for chars < 256 */ \
|
||||
2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \
|
||||
2+IMM2_SIZE, 2+IMM2_SIZE, /* NOT upto, minupto */ \
|
||||
2+IMM2_SIZE, /* NOT exact */ \
|
||||
2, 2, 2, 2+IMM2_SIZE, /* Possessive NOT *, +, ?, upto */ \
|
||||
2, 2, 2, 2, 2, 2, /* NOT *I, *?I, +I, +?I, ?I, ??I */ \
|
||||
2+IMM2_SIZE, 2+IMM2_SIZE, /* NOT upto I, minupto I */ \
|
||||
2+IMM2_SIZE, /* NOT exact I */ \
|
||||
2, 2, 2, 2+IMM2_SIZE, /* Possessive NOT *I, +I, ?I, upto I */ \
|
||||
/* Positive type repeats */ \
|
||||
2, 2, 2, 2, 2, 2, /* Type *, *?, +, +?, ?, ?? */ \
|
||||
2+IMM2_SIZE, 2+IMM2_SIZE, /* Type upto, minupto */ \
|
||||
2+IMM2_SIZE, /* Type exact */ \
|
||||
2, 2, 2, 2+IMM2_SIZE, /* Possessive *+, ++, ?+, upto+ */ \
|
||||
/* Character class & ref repeats */ \
|
||||
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \
|
||||
1+2*IMM2_SIZE, 1+2*IMM2_SIZE, /* CRRANGE, CRMINRANGE */ \
|
||||
1, 1, 1, 1+2*IMM2_SIZE, /* Possessive *+, ++, ?+, CRPOSRANGE */ \
|
||||
1+(32/sizeof(PCRE2_UCHAR)), /* CLASS */ \
|
||||
1+(32/sizeof(PCRE2_UCHAR)), /* NCLASS */ \
|
||||
0, /* XCLASS - variable length */ \
|
||||
0, /* ECLASS - variable length */ \
|
||||
1+IMM2_SIZE, /* REF */ \
|
||||
1+IMM2_SIZE+1, /* REFI */ \
|
||||
1+2*IMM2_SIZE, /* DNREF */ \
|
||||
1+2*IMM2_SIZE+1, /* DNREFI */ \
|
||||
1+LINK_SIZE, /* RECURSE */ \
|
||||
1+2*LINK_SIZE+1, /* CALLOUT */ \
|
||||
0, /* CALLOUT_STR - variable length */ \
|
||||
1+LINK_SIZE, /* Alt */ \
|
||||
1+LINK_SIZE, /* Ket */ \
|
||||
1+LINK_SIZE, /* KetRmax */ \
|
||||
1+LINK_SIZE, /* KetRmin */ \
|
||||
1+LINK_SIZE, /* KetRpos */ \
|
||||
1+IMM2_SIZE, /* Reverse */ \
|
||||
1+2*IMM2_SIZE, /* VReverse */ \
|
||||
1+LINK_SIZE, /* Assert */ \
|
||||
1+LINK_SIZE, /* Assert not */ \
|
||||
1+LINK_SIZE, /* Assert behind */ \
|
||||
1+LINK_SIZE, /* Assert behind not */ \
|
||||
1+LINK_SIZE, /* NA Assert */ \
|
||||
1+LINK_SIZE, /* NA Assert behind */ \
|
||||
1+LINK_SIZE, /* Scan substring */ \
|
||||
1+LINK_SIZE, /* ONCE */ \
|
||||
1+LINK_SIZE, /* SCRIPT_RUN */ \
|
||||
1+LINK_SIZE, /* BRA */ \
|
||||
1+LINK_SIZE, /* BRAPOS */ \
|
||||
1+LINK_SIZE+IMM2_SIZE, /* CBRA */ \
|
||||
1+LINK_SIZE+IMM2_SIZE, /* CBRAPOS */ \
|
||||
1+LINK_SIZE, /* COND */ \
|
||||
1+LINK_SIZE, /* SBRA */ \
|
||||
1+LINK_SIZE, /* SBRAPOS */ \
|
||||
1+LINK_SIZE+IMM2_SIZE, /* SCBRA */ \
|
||||
1+LINK_SIZE+IMM2_SIZE, /* SCBRAPOS */ \
|
||||
1+LINK_SIZE, /* SCOND */ \
|
||||
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* CREF, DNCREF */ \
|
||||
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* RREF, DNRREF */ \
|
||||
1, 1, /* FALSE, TRUE */ \
|
||||
1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \
|
||||
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \
|
||||
1, 3, /* SKIP, SKIP_ARG */ \
|
||||
1, 3, /* THEN, THEN_ARG */ \
|
||||
1, 3, /* COMMIT, COMMIT_ARG */ \
|
||||
1, 1, 1, /* FAIL, ACCEPT, ASSERT_ACCEPT */ \
|
||||
1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \
|
||||
1, /* DEFINE */ \
|
||||
1, 1 /* \B and \b in UCP mode */
|
||||
|
||||
/* A magic value for OP_RREF to indicate the "any recursion" condition. */
|
||||
|
||||
#define RREF_ANY 0xffff
|
||||
|
||||
/* Constants used by OP_REFI and OP_DNREFI to control matching behaviour. */
|
||||
|
||||
#define REFI_FLAG_CASELESS_RESTRICT 0x1
|
||||
#define REFI_FLAG_TURKISH_CASING 0x2
|
||||
|
||||
|
||||
/* ---------- Private structures that are mode-independent. ---------- */
|
||||
|
||||
/* Structure to hold data for custom memory management. */
|
||||
|
||||
typedef struct pcre2_memctl {
|
||||
void * (*malloc)(size_t, void *);
|
||||
void (*free)(void *, void *);
|
||||
void *memory_data;
|
||||
} pcre2_memctl;
|
||||
|
||||
/* Structure for building a chain of open capturing subpatterns during
|
||||
compiling, so that instructions to close them can be compiled when (*ACCEPT) is
|
||||
encountered. */
|
||||
|
||||
typedef struct open_capitem {
|
||||
struct open_capitem *next; /* Chain link */
|
||||
uint16_t number; /* Capture number */
|
||||
uint16_t assert_depth; /* Assertion depth when opened */
|
||||
} open_capitem;
|
||||
|
||||
/* Layout of the UCP type table that translates property names into types and
|
||||
codes. Each entry used to point directly to a name, but to reduce the number of
|
||||
relocations in shared libraries, it now has an offset into a single string
|
||||
instead. */
|
||||
|
||||
typedef struct {
|
||||
uint16_t name_offset;
|
||||
uint16_t type;
|
||||
uint16_t value;
|
||||
} ucp_type_table;
|
||||
|
||||
/* Unicode character database (UCD) record format */
|
||||
|
||||
typedef struct {
|
||||
uint8_t script; /* ucp_Arabic, etc. */
|
||||
uint8_t chartype; /* ucp_Cc, etc. (general categories) */
|
||||
uint8_t gbprop; /* ucp_gbControl, etc. (grapheme break property) */
|
||||
uint8_t caseset; /* offset to multichar other cases or zero */
|
||||
int32_t other_case; /* offset to other case, or zero if none */
|
||||
uint16_t scriptx_bidiclass; /* script extension (11 bit) and bidi class (5 bit) values */
|
||||
uint16_t bprops; /* binary properties offset */
|
||||
} ucd_record;
|
||||
|
||||
/* UCD access macros */
|
||||
|
||||
#define UCD_BLOCK_SIZE 128
|
||||
#define REAL_GET_UCD(ch) (PRIV(ucd_records) + \
|
||||
PRIV(ucd_stage2)[PRIV(ucd_stage1)[(int)(ch) / UCD_BLOCK_SIZE] * \
|
||||
UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE])
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#define GET_UCD(ch) ((ch > MAX_UTF_CODE_POINT)? \
|
||||
PRIV(dummy_ucd_record) : REAL_GET_UCD(ch))
|
||||
#else
|
||||
#define GET_UCD(ch) REAL_GET_UCD(ch)
|
||||
#endif
|
||||
|
||||
#define UCD_SCRIPTX_MASK 0x3ff
|
||||
#define UCD_BIDICLASS_SHIFT 11
|
||||
#define UCD_BPROPS_MASK 0xfff
|
||||
|
||||
#define UCD_SCRIPTX_PROP(prop) ((prop)->scriptx_bidiclass & UCD_SCRIPTX_MASK)
|
||||
#define UCD_BIDICLASS_PROP(prop) ((prop)->scriptx_bidiclass >> UCD_BIDICLASS_SHIFT)
|
||||
#define UCD_BPROPS_PROP(prop) ((prop)->bprops & UCD_BPROPS_MASK)
|
||||
|
||||
#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
|
||||
#define UCD_SCRIPT(ch) GET_UCD(ch)->script
|
||||
#define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
|
||||
#define UCD_GRAPHBREAK(ch) GET_UCD(ch)->gbprop
|
||||
#define UCD_CASESET(ch) GET_UCD(ch)->caseset
|
||||
#define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case)))
|
||||
#define UCD_SCRIPTX(ch) UCD_SCRIPTX_PROP(GET_UCD(ch))
|
||||
#define UCD_BPROPS(ch) UCD_BPROPS_PROP(GET_UCD(ch))
|
||||
#define UCD_BIDICLASS(ch) UCD_BIDICLASS_PROP(GET_UCD(ch))
|
||||
#define UCD_ANY_I(ch) \
|
||||
/* match any of the four characters 'i', 'I', U+0130, U+0131 */ \
|
||||
(((uint32_t)(ch) | 0x20u) == 0x69u || ((uint32_t)(ch) | 1u) == 0x0131u)
|
||||
#define UCD_DOTTED_I(ch) \
|
||||
((uint32_t)(ch) == 0x69u || (uint32_t)(ch) == 0x0130u)
|
||||
#define UCD_FOLD_I_TURKISH(ch) \
|
||||
((uint32_t)(ch) == 0x0130u ? 0x69u : \
|
||||
(uint32_t)(ch) == 0x49u ? 0x0131u : (uint32_t)(ch))
|
||||
|
||||
/* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words
|
||||
that form a bitmap representing a list of scripts or boolean properties. These
|
||||
macros test or set a bit in the map by number. */
|
||||
|
||||
#define MAPBIT(map,n) ((map)[(n)/32]&(1u<<((n)%32)))
|
||||
#define MAPSET(map,n) ((map)[(n)/32]|=(1u<<((n)%32)))
|
||||
|
||||
/* Header for serialized pcre2 codes. */
|
||||
|
||||
typedef struct pcre2_serialized_data {
|
||||
uint32_t magic;
|
||||
uint32_t version;
|
||||
uint32_t config;
|
||||
int32_t number_of_codes;
|
||||
} pcre2_serialized_data;
|
||||
|
||||
|
||||
|
||||
/* ----------------- Items that need PCRE2_CODE_UNIT_WIDTH ----------------- */
|
||||
|
||||
/* When this file is included by pcre2test, PCRE2_CODE_UNIT_WIDTH is defined as
|
||||
0, so the following items are omitted. */
|
||||
|
||||
#if defined PCRE2_CODE_UNIT_WIDTH && PCRE2_CODE_UNIT_WIDTH != 0
|
||||
|
||||
/* EBCDIC is supported only for the 8-bit library. */
|
||||
|
||||
#if defined EBCDIC && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
#error EBCDIC is not supported for the 16-bit or 32-bit libraries
|
||||
#endif
|
||||
|
||||
/* This is the largest non-UTF code point. */
|
||||
|
||||
#define MAX_NON_UTF_CHAR (0xffffffffU >> (32 - PCRE2_CODE_UNIT_WIDTH))
|
||||
|
||||
/* Internal shared data tables and variables. These are used by more than one
|
||||
of the exported public functions. They have to be "external" in the C sense,
|
||||
but are not part of the PCRE2 public API. Although the data for some of them is
|
||||
identical in all libraries, they must have different names so that multiple
|
||||
libraries can be simultaneously linked to a single application. However, UTF-8
|
||||
tables are needed only when compiling the 8-bit library. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
extern const int PRIV(utf8_table1)[];
|
||||
extern const int PRIV(utf8_table1_size);
|
||||
extern const int PRIV(utf8_table2)[];
|
||||
extern const int PRIV(utf8_table3)[];
|
||||
extern const uint8_t PRIV(utf8_table4)[];
|
||||
#endif
|
||||
|
||||
#define _pcre2_OP_lengths PCRE2_SUFFIX(_pcre2_OP_lengths_)
|
||||
#define _pcre2_callout_end_delims PCRE2_SUFFIX(_pcre2_callout_end_delims_)
|
||||
#define _pcre2_callout_start_delims PCRE2_SUFFIX(_pcre2_callout_start_delims_)
|
||||
#define _pcre2_default_compile_context PCRE2_SUFFIX(_pcre2_default_compile_context_)
|
||||
#define _pcre2_default_convert_context PCRE2_SUFFIX(_pcre2_default_convert_context_)
|
||||
#define _pcre2_default_match_context PCRE2_SUFFIX(_pcre2_default_match_context_)
|
||||
#define _pcre2_default_tables PCRE2_SUFFIX(_pcre2_default_tables_)
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#define _pcre2_dummy_ucd_record PCRE2_SUFFIX(_pcre2_dummy_ucd_record_)
|
||||
#endif
|
||||
#define _pcre2_hspace_list PCRE2_SUFFIX(_pcre2_hspace_list_)
|
||||
#define _pcre2_vspace_list PCRE2_SUFFIX(_pcre2_vspace_list_)
|
||||
#define _pcre2_ucd_boolprop_sets PCRE2_SUFFIX(_pcre2_ucd_boolprop_sets_)
|
||||
#define _pcre2_ucd_caseless_sets PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_)
|
||||
#define _pcre2_ucd_turkish_dotted_i_caseset PCRE2_SUFFIX(_pcre2_ucd_turkish_dotted_i_caseset_)
|
||||
#define _pcre2_ucd_nocase_ranges PCRE2_SUFFIX(_pcre2_ucd_nocase_ranges_)
|
||||
#define _pcre2_ucd_nocase_ranges_size PCRE2_SUFFIX(_pcre2_ucd_nocase_ranges_size_)
|
||||
#define _pcre2_ucd_digit_sets PCRE2_SUFFIX(_pcre2_ucd_digit_sets_)
|
||||
#define _pcre2_ucd_script_sets PCRE2_SUFFIX(_pcre2_ucd_script_sets_)
|
||||
#define _pcre2_ucd_records PCRE2_SUFFIX(_pcre2_ucd_records_)
|
||||
#define _pcre2_ucd_stage1 PCRE2_SUFFIX(_pcre2_ucd_stage1_)
|
||||
#define _pcre2_ucd_stage2 PCRE2_SUFFIX(_pcre2_ucd_stage2_)
|
||||
#define _pcre2_ucp_gbtable PCRE2_SUFFIX(_pcre2_ucp_gbtable_)
|
||||
#define _pcre2_ucp_gentype PCRE2_SUFFIX(_pcre2_ucp_gentype_)
|
||||
#define _pcre2_ucp_typerange PCRE2_SUFFIX(_pcre2_ucp_typerange_)
|
||||
#define _pcre2_unicode_version PCRE2_SUFFIX(_pcre2_unicode_version_)
|
||||
#define _pcre2_utt PCRE2_SUFFIX(_pcre2_utt_)
|
||||
#define _pcre2_utt_names PCRE2_SUFFIX(_pcre2_utt_names_)
|
||||
#define _pcre2_utt_size PCRE2_SUFFIX(_pcre2_utt_size_)
|
||||
|
||||
extern const uint8_t PRIV(OP_lengths)[];
|
||||
extern const uint32_t PRIV(callout_end_delims)[];
|
||||
extern const uint32_t PRIV(callout_start_delims)[];
|
||||
extern pcre2_compile_context PRIV(default_compile_context);
|
||||
extern pcre2_convert_context PRIV(default_convert_context);
|
||||
extern pcre2_match_context PRIV(default_match_context);
|
||||
extern const uint8_t PRIV(default_tables)[];
|
||||
extern const uint32_t PRIV(hspace_list)[];
|
||||
extern const uint32_t PRIV(vspace_list)[];
|
||||
extern const uint32_t PRIV(ucd_boolprop_sets)[];
|
||||
extern const uint32_t PRIV(ucd_caseless_sets)[];
|
||||
extern const uint32_t PRIV(ucd_turkish_dotted_i_caseset);
|
||||
extern const uint32_t PRIV(ucd_nocase_ranges)[];
|
||||
extern const uint32_t PRIV(ucd_nocase_ranges_size);
|
||||
extern const uint32_t PRIV(ucd_digit_sets)[];
|
||||
extern const uint32_t PRIV(ucd_script_sets)[];
|
||||
extern const ucd_record PRIV(ucd_records)[];
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
extern const ucd_record PRIV(dummy_ucd_record)[];
|
||||
#endif
|
||||
extern const uint16_t PRIV(ucd_stage1)[];
|
||||
extern const uint16_t PRIV(ucd_stage2)[];
|
||||
extern const uint32_t PRIV(ucp_gbtable)[];
|
||||
extern const uint32_t PRIV(ucp_gentype)[];
|
||||
#ifdef SUPPORT_JIT
|
||||
extern const int PRIV(ucp_typerange)[];
|
||||
#endif
|
||||
extern const char *PRIV(unicode_version);
|
||||
extern const ucp_type_table PRIV(utt)[];
|
||||
extern const char PRIV(utt_names)[];
|
||||
extern const size_t PRIV(utt_size);
|
||||
|
||||
/* Mode-dependent macros and hidden and private structures are defined in a
|
||||
separate file so that pcre2test can include them at all supported widths. When
|
||||
compiling the library, PCRE2_CODE_UNIT_WIDTH will be defined, and we can
|
||||
include them at the appropriate width, after setting up suffix macros for the
|
||||
private structures. */
|
||||
|
||||
#define branch_chain PCRE2_SUFFIX(branch_chain_)
|
||||
#define compile_block PCRE2_SUFFIX(compile_block_)
|
||||
#define dfa_match_block PCRE2_SUFFIX(dfa_match_block_)
|
||||
#define match_block PCRE2_SUFFIX(match_block_)
|
||||
#define named_group PCRE2_SUFFIX(named_group_)
|
||||
|
||||
#include "pcre2_intmodedep.h"
|
||||
|
||||
/* Private "external" functions. These are internal functions that are called
|
||||
from modules other than the one in which they are defined. They have to be
|
||||
"external" in the C sense, but are not part of the PCRE2 public API. They are
|
||||
not referenced from pcre2test, and must not be defined when no code unit width
|
||||
is available. */
|
||||
|
||||
#define _pcre2_auto_possessify PCRE2_SUFFIX(_pcre2_auto_possessify_)
|
||||
#define _pcre2_check_escape PCRE2_SUFFIX(_pcre2_check_escape_)
|
||||
#define _pcre2_extuni PCRE2_SUFFIX(_pcre2_extuni_)
|
||||
#define _pcre2_find_bracket PCRE2_SUFFIX(_pcre2_find_bracket_)
|
||||
#define _pcre2_is_newline PCRE2_SUFFIX(_pcre2_is_newline_)
|
||||
#define _pcre2_jit_free_rodata PCRE2_SUFFIX(_pcre2_jit_free_rodata_)
|
||||
#define _pcre2_jit_free PCRE2_SUFFIX(_pcre2_jit_free_)
|
||||
#define _pcre2_jit_get_size PCRE2_SUFFIX(_pcre2_jit_get_size_)
|
||||
#define _pcre2_jit_get_target PCRE2_SUFFIX(_pcre2_jit_get_target_)
|
||||
#define _pcre2_memctl_malloc PCRE2_SUFFIX(_pcre2_memctl_malloc_)
|
||||
#define _pcre2_ord2utf PCRE2_SUFFIX(_pcre2_ord2utf_)
|
||||
#define _pcre2_script_run PCRE2_SUFFIX(_pcre2_script_run_)
|
||||
#define _pcre2_strcmp PCRE2_SUFFIX(_pcre2_strcmp_)
|
||||
#define _pcre2_strcmp_c8 PCRE2_SUFFIX(_pcre2_strcmp_c8_)
|
||||
#define _pcre2_strcpy_c8 PCRE2_SUFFIX(_pcre2_strcpy_c8_)
|
||||
#define _pcre2_strlen PCRE2_SUFFIX(_pcre2_strlen_)
|
||||
#define _pcre2_strncmp PCRE2_SUFFIX(_pcre2_strncmp_)
|
||||
#define _pcre2_strncmp_c8 PCRE2_SUFFIX(_pcre2_strncmp_c8_)
|
||||
#define _pcre2_study PCRE2_SUFFIX(_pcre2_study_)
|
||||
#define _pcre2_valid_utf PCRE2_SUFFIX(_pcre2_valid_utf_)
|
||||
#define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_)
|
||||
#define _pcre2_xclass PCRE2_SUFFIX(_pcre2_xclass_)
|
||||
#define _pcre2_eclass PCRE2_SUFFIX(_pcre2_eclass_)
|
||||
|
||||
extern int _pcre2_auto_possessify(PCRE2_UCHAR *,
|
||||
const compile_block *);
|
||||
extern int _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *,
|
||||
int *, uint32_t, uint32_t, uint32_t, BOOL, compile_block *);
|
||||
extern PCRE2_SPTR _pcre2_extuni(uint32_t, PCRE2_SPTR, PCRE2_SPTR, PCRE2_SPTR,
|
||||
BOOL, int *);
|
||||
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
|
||||
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
|
||||
uint32_t *, BOOL);
|
||||
extern void _pcre2_jit_free_rodata(void *, void *);
|
||||
extern void _pcre2_jit_free(void *, pcre2_memctl *);
|
||||
extern size_t _pcre2_jit_get_size(void *);
|
||||
const char * _pcre2_jit_get_target(void);
|
||||
extern void * _pcre2_memctl_malloc(size_t, pcre2_memctl *);
|
||||
extern unsigned int _pcre2_ord2utf(uint32_t, PCRE2_UCHAR *);
|
||||
extern BOOL _pcre2_script_run(PCRE2_SPTR, PCRE2_SPTR, BOOL);
|
||||
extern int _pcre2_strcmp(PCRE2_SPTR, PCRE2_SPTR);
|
||||
extern int _pcre2_strcmp_c8(PCRE2_SPTR, const char *);
|
||||
extern PCRE2_SIZE _pcre2_strcpy_c8(PCRE2_UCHAR *, const char *);
|
||||
extern PCRE2_SIZE _pcre2_strlen(PCRE2_SPTR);
|
||||
extern int _pcre2_strncmp(PCRE2_SPTR, PCRE2_SPTR, size_t);
|
||||
extern int _pcre2_strncmp_c8(PCRE2_SPTR, const char *, size_t);
|
||||
extern int _pcre2_study(pcre2_real_code *);
|
||||
extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *);
|
||||
extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
|
||||
uint32_t *, BOOL);
|
||||
extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, const uint8_t *, BOOL);
|
||||
extern BOOL _pcre2_eclass(uint32_t, PCRE2_SPTR, PCRE2_SPTR,
|
||||
const uint8_t *, BOOL);
|
||||
|
||||
/* This function is needed only when memmove() is not available. */
|
||||
|
||||
#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
|
||||
#define _pcre2_memmove PCRE2_SUFFIX(_pcre2_memmove)
|
||||
extern void * _pcre2_memmove(void *, const void *, size_t);
|
||||
#endif
|
||||
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH */
|
||||
|
||||
extern BOOL PRIV(ckd_smul)(PCRE2_SIZE *, int, int);
|
||||
|
||||
#include "pcre2_util.h"
|
||||
|
||||
#endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */
|
||||
|
||||
/* End of pcre2_internal.h */
|
||||
@@ -1,973 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains mode-dependent macro and structure definitions. The
|
||||
file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined.
|
||||
These mode-dependent items are kept in a separate file so that they can also be
|
||||
#included multiple times for different code unit widths by pcre2test in order
|
||||
to have access to the hidden structures at all supported widths.
|
||||
|
||||
Some of the mode-dependent macros are required at different widths for
|
||||
different parts of the pcre2test code (in particular, the included
|
||||
pcre2_printint.c file). We undefine them here so that they can be re-defined for
|
||||
multiple inclusions. Not all of these are used in pcre2test, but it's easier
|
||||
just to undefine them all. */
|
||||
|
||||
#undef ACROSSCHAR
|
||||
#undef BACKCHAR
|
||||
#undef BYTES2CU
|
||||
#undef CHMAX_255
|
||||
#undef CU2BYTES
|
||||
#undef FORWARDCHAR
|
||||
#undef FORWARDCHARTEST
|
||||
#undef GET
|
||||
#undef GET2
|
||||
#undef GETCHAR
|
||||
#undef GETCHARINC
|
||||
#undef GETCHARINCTEST
|
||||
#undef GETCHARLEN
|
||||
#undef GETCHARLENTEST
|
||||
#undef GETCHARTEST
|
||||
#undef GET_EXTRALEN
|
||||
#undef HAS_EXTRALEN
|
||||
#undef IMM2_SIZE
|
||||
#undef MAX_255
|
||||
#undef MAX_MARK
|
||||
#undef MAX_PATTERN_SIZE
|
||||
#undef MAX_UTF_SINGLE_CU
|
||||
#undef NOT_FIRSTCU
|
||||
#undef PUT
|
||||
#undef PUT2
|
||||
#undef PUT2INC
|
||||
#undef PUTCHAR
|
||||
#undef PUTINC
|
||||
#undef TABLE_GET
|
||||
|
||||
|
||||
|
||||
/* -------------------------- MACROS ----------------------------- */
|
||||
|
||||
/* PCRE keeps offsets in its compiled code as at least 16-bit quantities
|
||||
(always stored in big-endian order in 8-bit mode) by default. These are used,
|
||||
for example, to link from the start of a subpattern to its alternatives and its
|
||||
end. The use of 16 bits per offset limits the size of an 8-bit compiled regex
|
||||
to around 64K, which is big enough for almost everybody. However, I received a
|
||||
request for an even bigger limit. For this reason, and also to make the code
|
||||
easier to maintain, the storing and loading of offsets from the compiled code
|
||||
unit string is now handled by the macros that are defined here.
|
||||
|
||||
The macros are controlled by the value of LINK_SIZE. This defaults to 2, but
|
||||
values of 3 or 4 are also supported. */
|
||||
|
||||
/* ------------------- 8-bit support ------------------ */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
|
||||
#if LINK_SIZE == 2
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (PCRE2_UCHAR)((d) >> 8)), \
|
||||
(a[(n)+1] = (PCRE2_UCHAR)((d) & 255))
|
||||
#define GET(a,n) \
|
||||
(unsigned int)(((a)[n] << 8) | (a)[(n)+1])
|
||||
#define MAX_PATTERN_SIZE (1 << 16)
|
||||
|
||||
#elif LINK_SIZE == 3
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (PCRE2_UCHAR)((d) >> 16)), \
|
||||
(a[(n)+1] = (PCRE2_UCHAR)((d) >> 8)), \
|
||||
(a[(n)+2] = (PCRE2_UCHAR)((d) & 255))
|
||||
#define GET(a,n) \
|
||||
(unsigned int)(((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2])
|
||||
#define MAX_PATTERN_SIZE (1 << 24)
|
||||
|
||||
#elif LINK_SIZE == 4
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (PCRE2_UCHAR)((d) >> 24)), \
|
||||
(a[(n)+1] = (PCRE2_UCHAR)((d) >> 16)), \
|
||||
(a[(n)+2] = (PCRE2_UCHAR)((d) >> 8)), \
|
||||
(a[(n)+3] = (PCRE2_UCHAR)((d) & 255))
|
||||
#define GET(a,n) \
|
||||
(unsigned int)(((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3])
|
||||
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
|
||||
|
||||
#else
|
||||
#error LINK_SIZE must be 2, 3, or 4
|
||||
#endif
|
||||
|
||||
|
||||
/* ------------------- 16-bit support ------------------ */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
|
||||
#if LINK_SIZE == 2
|
||||
#undef LINK_SIZE
|
||||
#define LINK_SIZE 1
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (PCRE2_UCHAR)(d))
|
||||
#define GET(a,n) \
|
||||
(a[n])
|
||||
#define MAX_PATTERN_SIZE (1 << 16)
|
||||
|
||||
#elif LINK_SIZE == 3 || LINK_SIZE == 4
|
||||
#undef LINK_SIZE
|
||||
#define LINK_SIZE 2
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (PCRE2_UCHAR)((d) >> 16)), \
|
||||
(a[(n)+1] = (PCRE2_UCHAR)((d) & 65535))
|
||||
#define GET(a,n) \
|
||||
(unsigned int)(((a)[n] << 16) | (a)[(n)+1])
|
||||
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
|
||||
|
||||
#else
|
||||
#error LINK_SIZE must be 2, 3, or 4
|
||||
#endif
|
||||
|
||||
|
||||
/* ------------------- 32-bit support ------------------ */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#undef LINK_SIZE
|
||||
#define LINK_SIZE 1
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (d))
|
||||
#define GET(a,n) \
|
||||
(a[n])
|
||||
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
|
||||
|
||||
#else
|
||||
#error Unsupported compiling mode
|
||||
#endif
|
||||
|
||||
|
||||
/* --------------- Other mode-specific macros ----------------- */
|
||||
|
||||
/* PCRE uses some other (at least) 16-bit quantities that do not change when
|
||||
the size of offsets changes. There are used for repeat counts and for other
|
||||
things such as capturing parenthesis numbers in back references.
|
||||
|
||||
Define the number of code units required to hold a 16-bit count/offset, and
|
||||
macros to load and store such a value. For reasons that I do not understand,
|
||||
the expression in the 8-bit GET2 macro is treated by gcc as a signed
|
||||
expression, even when a is declared as unsigned. It seems that any kind of
|
||||
arithmetic results in a signed value. Hence the cast. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define IMM2_SIZE 2
|
||||
#define GET2(a,n) (unsigned int)(((a)[n] << 8) | (a)[(n)+1])
|
||||
#define PUT2(a,n,d) a[n] = (d) >> 8, a[(n)+1] = (d) & 255
|
||||
|
||||
#else /* Code units are 16 or 32 bits */
|
||||
#define IMM2_SIZE 1
|
||||
#define GET2(a,n) a[n]
|
||||
#define PUT2(a,n,d) a[n] = d
|
||||
#endif
|
||||
|
||||
/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
|
||||
whether its argument, which is assumed to be one code unit, is less than 256.
|
||||
The CHMAX_255 macro does not assume one code unit. The maximum length of a MARK
|
||||
name must fit in one code unit; currently it is set to 255 or 65535. The
|
||||
TABLE_GET macro is used to access elements of tables containing exactly 256
|
||||
items. Its argument is a code unit. When code points can be greater than 255, a
|
||||
check is needed before accessing these tables. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define MAX_255(c) TRUE
|
||||
#define MAX_MARK ((1u << 8) - 1)
|
||||
#define TABLE_GET(c, table, default) ((table)[c])
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#define SUPPORT_WIDE_CHARS
|
||||
#define CHMAX_255(c) ((c) <= 255u)
|
||||
#else
|
||||
#define CHMAX_255(c) TRUE
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
#else /* Code units are 16 or 32 bits */
|
||||
#define CHMAX_255(c) ((c) <= 255u)
|
||||
#define MAX_255(c) ((c) <= 255u)
|
||||
#define MAX_MARK ((1u << 16) - 1)
|
||||
#define SUPPORT_WIDE_CHARS
|
||||
#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
|
||||
#endif
|
||||
|
||||
|
||||
/* ----------------- Character-handling macros ----------------- */
|
||||
|
||||
/* There is a proposed future special "UTF-21" mode, in which only the lowest
|
||||
21 bits of a 32-bit character are interpreted as UTF, with the remaining 11
|
||||
high-order bits available to the application for other uses. In preparation for
|
||||
the future implementation of this mode, there are macros that load a data item
|
||||
and, if in this special mode, mask it to 21 bits. These macros all have names
|
||||
starting with UCHAR21. In all other modes, including the normal 32-bit
|
||||
library, the macros all have the same simple definitions. When the new mode is
|
||||
implemented, it is expected that these definitions will be varied appropriately
|
||||
using #ifdef when compiling the library that supports the special mode. */
|
||||
|
||||
#define UCHAR21(eptr) (*(eptr))
|
||||
#define UCHAR21TEST(eptr) (*(eptr))
|
||||
#define UCHAR21INC(eptr) (*(eptr)++)
|
||||
#define UCHAR21INCTEST(eptr) (*(eptr)++)
|
||||
|
||||
/* When UTF encoding is being used, a character is no longer just a single
|
||||
byte in 8-bit mode or a single short in 16-bit mode. The macros for character
|
||||
handling generate simple sequences when used in the basic mode, and more
|
||||
complicated ones for UTF characters. GETCHARLENTEST and other macros are not
|
||||
used when UTF is not supported. To make sure they can never even appear when
|
||||
UTF support is omitted, we don't even define them. */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
|
||||
/* #define MAX_UTF_SINGLE_CU */
|
||||
/* #define HAS_EXTRALEN(c) */
|
||||
/* #define GET_EXTRALEN(c) */
|
||||
/* #define NOT_FIRSTCU(c) */
|
||||
#define GETCHAR(c, eptr) c = *eptr;
|
||||
#define GETCHARTEST(c, eptr) c = *eptr;
|
||||
#define GETCHARINC(c, eptr) c = *eptr++;
|
||||
#define GETCHARINCTEST(c, eptr) c = *eptr++;
|
||||
#define GETCHARLEN(c, eptr, len) c = *eptr;
|
||||
#define PUTCHAR(c, p) (*p = c, 1)
|
||||
/* #define GETCHARLENTEST(c, eptr, len) */
|
||||
/* #define BACKCHAR(eptr) */
|
||||
/* #define FORWARDCHAR(eptr) */
|
||||
/* #define FORWARCCHARTEST(eptr,end) */
|
||||
/* #define ACROSSCHAR(condition, eptr, action) */
|
||||
|
||||
#else /* SUPPORT_UNICODE */
|
||||
|
||||
/* ------------------- 8-bit support ------------------ */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
|
||||
|
||||
/* The largest UTF code point that can be encoded as a single code unit. */
|
||||
|
||||
#define MAX_UTF_SINGLE_CU 127
|
||||
|
||||
/* Tests whether the code point needs extra characters to decode. */
|
||||
|
||||
#define HAS_EXTRALEN(c) HASUTF8EXTRALEN(c)
|
||||
|
||||
/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
|
||||
Otherwise it has an undefined behaviour. */
|
||||
|
||||
#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3fu])
|
||||
|
||||
/* Returns TRUE, if the given value is not the first code unit of a UTF
|
||||
sequence. */
|
||||
|
||||
#define NOT_FIRSTCU(c) (((c) & 0xc0u) == 0x80u)
|
||||
|
||||
/* Get the next UTF-8 character, not advancing the pointer. This is called when
|
||||
we know we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHAR(c, eptr) \
|
||||
c = *eptr; \
|
||||
if (c >= 0xc0u) GETUTF8(c, eptr);
|
||||
|
||||
/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
|
||||
pointer. */
|
||||
|
||||
#define GETCHARTEST(c, eptr) \
|
||||
c = *eptr; \
|
||||
if (utf && c >= 0xc0u) GETUTF8(c, eptr);
|
||||
|
||||
/* Get the next UTF-8 character, advancing the pointer. This is called when we
|
||||
know we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHARINC(c, eptr) \
|
||||
c = *eptr++; \
|
||||
if (c >= 0xc0u) GETUTF8INC(c, eptr);
|
||||
|
||||
/* Get the next character, testing for UTF-8 mode, and advancing the pointer.
|
||||
This is called when we don't know if we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHARINCTEST(c, eptr) \
|
||||
c = *eptr++; \
|
||||
if (utf && c >= 0xc0u) GETUTF8INC(c, eptr);
|
||||
|
||||
/* Get the next UTF-8 character, not advancing the pointer, incrementing length
|
||||
if there are extra bytes. This is called when we know we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHARLEN(c, eptr, len) \
|
||||
c = *eptr; \
|
||||
if (c >= 0xc0u) GETUTF8LEN(c, eptr, len);
|
||||
|
||||
/* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the
|
||||
pointer, incrementing length if there are extra bytes. This is called when we
|
||||
do not know if we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHARLENTEST(c, eptr, len) \
|
||||
c = *eptr; \
|
||||
if (utf && c >= 0xc0u) GETUTF8LEN(c, eptr, len);
|
||||
|
||||
/* If the pointer is not at the start of a character, move it back until
|
||||
it is. This is called only in UTF-8 mode - we don't put a test within the macro
|
||||
because almost all calls are already within a block of UTF-8 only code. */
|
||||
|
||||
#define BACKCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr--
|
||||
|
||||
/* Same as above, just in the other direction. */
|
||||
#define FORWARDCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr++
|
||||
#define FORWARDCHARTEST(eptr,end) while(eptr < end && (*eptr & 0xc0u) == 0x80u) eptr++
|
||||
|
||||
/* Same as above, but it allows a fully customizable form. */
|
||||
#define ACROSSCHAR(condition, eptr, action) \
|
||||
while((condition) && ((*eptr) & 0xc0u) == 0x80u) action
|
||||
|
||||
/* Deposit a character into memory, returning the number of code units. */
|
||||
|
||||
#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
|
||||
PRIV(ord2utf)(c,p) : (*p = c, 1))
|
||||
|
||||
|
||||
/* ------------------- 16-bit support ------------------ */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
|
||||
|
||||
/* The largest UTF code point that can be encoded as a single code unit. */
|
||||
|
||||
#define MAX_UTF_SINGLE_CU 65535
|
||||
|
||||
/* Tests whether the code point needs extra characters to decode. */
|
||||
|
||||
#define HAS_EXTRALEN(c) (((c) & 0xfc00u) == 0xd800u)
|
||||
|
||||
/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
|
||||
Otherwise it has an undefined behaviour. */
|
||||
|
||||
#define GET_EXTRALEN(c) 1
|
||||
|
||||
/* Returns TRUE, if the given value is not the first code unit of a UTF
|
||||
sequence. */
|
||||
|
||||
#define NOT_FIRSTCU(c) (((c) & 0xfc00u) == 0xdc00u)
|
||||
|
||||
/* Base macro to pick up the low surrogate of a UTF-16 character, not
|
||||
advancing the pointer. */
|
||||
|
||||
#define GETUTF16(c, eptr) \
|
||||
{ c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; }
|
||||
|
||||
/* Get the next UTF-16 character, not advancing the pointer. This is called when
|
||||
we know we are in UTF-16 mode. */
|
||||
|
||||
#define GETCHAR(c, eptr) \
|
||||
c = *eptr; \
|
||||
if ((c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr);
|
||||
|
||||
/* Get the next UTF-16 character, testing for UTF-16 mode, and not advancing the
|
||||
pointer. */
|
||||
|
||||
#define GETCHARTEST(c, eptr) \
|
||||
c = *eptr; \
|
||||
if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr);
|
||||
|
||||
/* Base macro to pick up the low surrogate of a UTF-16 character, advancing
|
||||
the pointer. */
|
||||
|
||||
#define GETUTF16INC(c, eptr) \
|
||||
{ c = (((c & 0x3ffu) << 10) | (*eptr++ & 0x3ffu)) + 0x10000u; }
|
||||
|
||||
/* Get the next UTF-16 character, advancing the pointer. This is called when we
|
||||
know we are in UTF-16 mode. */
|
||||
|
||||
#define GETCHARINC(c, eptr) \
|
||||
c = *eptr++; \
|
||||
if ((c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr);
|
||||
|
||||
/* Get the next character, testing for UTF-16 mode, and advancing the pointer.
|
||||
This is called when we don't know if we are in UTF-16 mode. */
|
||||
|
||||
#define GETCHARINCTEST(c, eptr) \
|
||||
c = *eptr++; \
|
||||
if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr);
|
||||
|
||||
/* Base macro to pick up the low surrogate of a UTF-16 character, not
|
||||
advancing the pointer, incrementing the length. */
|
||||
|
||||
#define GETUTF16LEN(c, eptr, len) \
|
||||
{ c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; len++; }
|
||||
|
||||
/* Get the next UTF-16 character, not advancing the pointer, incrementing
|
||||
length if there is a low surrogate. This is called when we know we are in
|
||||
UTF-16 mode. */
|
||||
|
||||
#define GETCHARLEN(c, eptr, len) \
|
||||
c = *eptr; \
|
||||
if ((c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len);
|
||||
|
||||
/* Get the next UTF-16 character, testing for UTF-16 mode, not advancing the
|
||||
pointer, incrementing length if there is a low surrogate. This is called when
|
||||
we do not know if we are in UTF-16 mode. */
|
||||
|
||||
#define GETCHARLENTEST(c, eptr, len) \
|
||||
c = *eptr; \
|
||||
if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len);
|
||||
|
||||
/* If the pointer is not at the start of a character, move it back until
|
||||
it is. This is called only in UTF-16 mode - we don't put a test within the
|
||||
macro because almost all calls are already within a block of UTF-16 only
|
||||
code. */
|
||||
|
||||
#define BACKCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr--
|
||||
|
||||
/* Same as above, just in the other direction. */
|
||||
#define FORWARDCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr++
|
||||
#define FORWARDCHARTEST(eptr,end) if (eptr < end && (*eptr & 0xfc00u) == 0xdc00u) eptr++
|
||||
|
||||
/* Same as above, but it allows a fully customizable form. */
|
||||
#define ACROSSCHAR(condition, eptr, action) \
|
||||
if ((condition) && ((*eptr) & 0xfc00u) == 0xdc00u) action
|
||||
|
||||
/* Deposit a character into memory, returning the number of code units. */
|
||||
|
||||
#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
|
||||
PRIV(ord2utf)(c,p) : (*p = c, 1))
|
||||
|
||||
|
||||
/* ------------------- 32-bit support ------------------ */
|
||||
|
||||
#else
|
||||
|
||||
/* These are trivial for the 32-bit library, since all UTF-32 characters fit
|
||||
into one PCRE2_UCHAR unit. */
|
||||
|
||||
#define MAX_UTF_SINGLE_CU (0x10ffffu)
|
||||
#define HAS_EXTRALEN(c) (0)
|
||||
#define GET_EXTRALEN(c) (0)
|
||||
#define NOT_FIRSTCU(c) (0)
|
||||
|
||||
/* Get the next UTF-32 character, not advancing the pointer. This is called when
|
||||
we know we are in UTF-32 mode. */
|
||||
|
||||
#define GETCHAR(c, eptr) \
|
||||
c = *(eptr);
|
||||
|
||||
/* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the
|
||||
pointer. */
|
||||
|
||||
#define GETCHARTEST(c, eptr) \
|
||||
c = *(eptr);
|
||||
|
||||
/* Get the next UTF-32 character, advancing the pointer. This is called when we
|
||||
know we are in UTF-32 mode. */
|
||||
|
||||
#define GETCHARINC(c, eptr) \
|
||||
c = *((eptr)++);
|
||||
|
||||
/* Get the next character, testing for UTF-32 mode, and advancing the pointer.
|
||||
This is called when we don't know if we are in UTF-32 mode. */
|
||||
|
||||
#define GETCHARINCTEST(c, eptr) \
|
||||
c = *((eptr)++);
|
||||
|
||||
/* Get the next UTF-32 character, not advancing the pointer, not incrementing
|
||||
length (since all UTF-32 is of length 1). This is called when we know we are in
|
||||
UTF-32 mode. */
|
||||
|
||||
#define GETCHARLEN(c, eptr, len) \
|
||||
GETCHAR(c, eptr)
|
||||
|
||||
/* Get the next UTF-32character, testing for UTF-32 mode, not advancing the
|
||||
pointer, not incrementing the length (since all UTF-32 is of length 1).
|
||||
This is called when we do not know if we are in UTF-32 mode. */
|
||||
|
||||
#define GETCHARLENTEST(c, eptr, len) \
|
||||
GETCHARTEST(c, eptr)
|
||||
|
||||
/* If the pointer is not at the start of a character, move it back until
|
||||
it is. This is called only in UTF-32 mode - we don't put a test within the
|
||||
macro because almost all calls are already within a block of UTF-32 only
|
||||
code.
|
||||
|
||||
These are all no-ops since all UTF-32 characters fit into one PCRE2_UCHAR. */
|
||||
|
||||
#define BACKCHAR(eptr) do { } while (0)
|
||||
|
||||
/* Same as above, just in the other direction. */
|
||||
|
||||
#define FORWARDCHAR(eptr) do { } while (0)
|
||||
#define FORWARDCHARTEST(eptr,end) do { } while (0)
|
||||
|
||||
/* Same as above, but it allows a fully customizable form. */
|
||||
|
||||
#define ACROSSCHAR(condition, eptr, action) do { } while (0)
|
||||
|
||||
/* Deposit a character into memory, returning the number of code units. */
|
||||
|
||||
#define PUTCHAR(c, p) (*p = c, 1)
|
||||
|
||||
#endif /* UTF-32 character handling */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
|
||||
/* Mode-dependent macros that have the same definition in all modes. */
|
||||
|
||||
#define CU2BYTES(x) ((x)*((PCRE2_CODE_UNIT_WIDTH/8)))
|
||||
#define BYTES2CU(x) ((x)/((PCRE2_CODE_UNIT_WIDTH/8)))
|
||||
#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE
|
||||
#define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE
|
||||
|
||||
|
||||
/* ----------------------- HIDDEN STRUCTURES ----------------------------- */
|
||||
|
||||
/* NOTE: All these structures *must* start with a pcre2_memctl structure. The
|
||||
code that uses them is simpler because it assumes this. */
|
||||
|
||||
/* The real general context structure. At present it holds only data for custom
|
||||
memory control. */
|
||||
|
||||
/* WARNING: if this is ever changed, code in pcre2_substitute.c will have to be
|
||||
changed because it builds a general context "by hand" in order to avoid the
|
||||
malloc() call in pcre2_general_context)_create(). There is also code in
|
||||
pcre2_match.c that makes the same assumption. */
|
||||
|
||||
typedef struct pcre2_real_general_context {
|
||||
pcre2_memctl memctl;
|
||||
} pcre2_real_general_context;
|
||||
|
||||
/* The real compile context structure */
|
||||
|
||||
typedef struct pcre2_real_compile_context {
|
||||
pcre2_memctl memctl;
|
||||
int (*stack_guard)(uint32_t, void *);
|
||||
void *stack_guard_data;
|
||||
const uint8_t *tables;
|
||||
PCRE2_SIZE max_pattern_length;
|
||||
PCRE2_SIZE max_pattern_compiled_length;
|
||||
uint16_t bsr_convention;
|
||||
uint16_t newline_convention;
|
||||
uint32_t parens_nest_limit;
|
||||
uint32_t extra_options;
|
||||
uint32_t max_varlookbehind;
|
||||
uint32_t optimization_flags;
|
||||
} pcre2_real_compile_context;
|
||||
|
||||
/* The real match context structure. */
|
||||
|
||||
typedef struct pcre2_real_match_context {
|
||||
pcre2_memctl memctl;
|
||||
#ifdef SUPPORT_JIT
|
||||
pcre2_jit_callback jit_callback;
|
||||
void *jit_callback_data;
|
||||
#endif
|
||||
int (*callout)(pcre2_callout_block *, void *);
|
||||
void *callout_data;
|
||||
int (*substitute_callout)(pcre2_substitute_callout_block *, void *);
|
||||
void *substitute_callout_data;
|
||||
PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *,
|
||||
PCRE2_SIZE, int, void *);
|
||||
void *substitute_case_callout_data;
|
||||
PCRE2_SIZE offset_limit;
|
||||
uint32_t heap_limit;
|
||||
uint32_t match_limit;
|
||||
uint32_t depth_limit;
|
||||
} pcre2_real_match_context;
|
||||
|
||||
/* The real convert context structure. */
|
||||
|
||||
typedef struct pcre2_real_convert_context {
|
||||
pcre2_memctl memctl;
|
||||
uint32_t glob_separator;
|
||||
uint32_t glob_escape;
|
||||
} pcre2_real_convert_context;
|
||||
|
||||
/* The real compiled code structure. The type for the blocksize field is
|
||||
defined specially because it is required in pcre2_serialize_decode() when
|
||||
copying the size from possibly unaligned memory into a variable of the same
|
||||
type. Use a macro rather than a typedef to avoid compiler warnings when this
|
||||
file is included multiple times by pcre2test. LOOKBEHIND_MAX specifies the
|
||||
largest lookbehind that is supported. (OP_REVERSE and OP_VREVERSE in a pattern
|
||||
have 16-bit arguments in 8-bit and 16-bit modes, so we need no more than a
|
||||
16-bit field here.) */
|
||||
|
||||
#undef CODE_BLOCKSIZE_TYPE
|
||||
#define CODE_BLOCKSIZE_TYPE PCRE2_SIZE
|
||||
|
||||
#undef LOOKBEHIND_MAX
|
||||
#define LOOKBEHIND_MAX UINT16_MAX
|
||||
|
||||
typedef struct pcre2_real_code {
|
||||
pcre2_memctl memctl; /* Memory control fields */
|
||||
const uint8_t *tables; /* The character tables */
|
||||
void *executable_jit; /* Pointer to JIT code */
|
||||
uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */
|
||||
CODE_BLOCKSIZE_TYPE blocksize; /* Total (bytes) that was malloc-ed */
|
||||
CODE_BLOCKSIZE_TYPE code_start; /* Byte code start offset */
|
||||
uint32_t magic_number; /* Paranoid and endianness check */
|
||||
uint32_t compile_options; /* Options passed to pcre2_compile() */
|
||||
uint32_t overall_options; /* Options after processing the pattern */
|
||||
uint32_t extra_options; /* Taken from compile_context */
|
||||
uint32_t flags; /* Various state flags */
|
||||
uint32_t limit_heap; /* Limit set in the pattern */
|
||||
uint32_t limit_match; /* Limit set in the pattern */
|
||||
uint32_t limit_depth; /* Limit set in the pattern */
|
||||
uint32_t first_codeunit; /* Starting code unit */
|
||||
uint32_t last_codeunit; /* This codeunit must be seen */
|
||||
uint16_t bsr_convention; /* What \R matches */
|
||||
uint16_t newline_convention; /* What is a newline? */
|
||||
uint16_t max_lookbehind; /* Longest lookbehind (characters) */
|
||||
uint16_t minlength; /* Minimum length of match */
|
||||
uint16_t top_bracket; /* Highest numbered group */
|
||||
uint16_t top_backref; /* Highest numbered back reference */
|
||||
uint16_t name_entry_size; /* Size (code units) of table entries */
|
||||
uint16_t name_count; /* Number of name entries in the table */
|
||||
uint32_t optimization_flags; /* Optimizations enabled at compile time */
|
||||
} pcre2_real_code;
|
||||
|
||||
/* The real match data structure. Define ovector as large as it can ever
|
||||
actually be so that array bound checkers don't grumble. Memory for this
|
||||
structure is obtained by calling pcre2_match_data_create(), which sets the size
|
||||
as the offset of ovector plus a pair of elements for each capturable string, so
|
||||
the size varies from call to call. As the maximum number of capturing
|
||||
subpatterns is 65535 we must allow for 65536 strings to include the overall
|
||||
match. (See also the heapframe structure below.) */
|
||||
|
||||
struct heapframe; /* Forward reference */
|
||||
|
||||
typedef struct pcre2_real_match_data {
|
||||
pcre2_memctl memctl; /* Memory control fields */
|
||||
const pcre2_real_code *code; /* The pattern used for the match */
|
||||
PCRE2_SPTR subject; /* The subject that was matched */
|
||||
PCRE2_SPTR mark; /* Pointer to last mark */
|
||||
struct heapframe *heapframes; /* Backtracking frames heap memory */
|
||||
PCRE2_SIZE heapframes_size; /* Malloc-ed size */
|
||||
PCRE2_SIZE subject_length; /* Subject length */
|
||||
PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
|
||||
PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
|
||||
PCRE2_SIZE startchar; /* Offset to starting code unit */
|
||||
uint8_t matchedby; /* Type of match (normal, JIT, DFA) */
|
||||
uint8_t flags; /* Various flags */
|
||||
uint16_t oveccount; /* Number of pairs */
|
||||
int rc; /* The return code from the match */
|
||||
PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
|
||||
} pcre2_real_match_data;
|
||||
|
||||
|
||||
/* ----------------------- PRIVATE STRUCTURES ----------------------------- */
|
||||
|
||||
/* These structures are not needed for pcre2test. */
|
||||
|
||||
#ifndef PCRE2_PCRE2TEST
|
||||
|
||||
/* Structures for checking for mutual function recursion when scanning compiled
|
||||
or parsed code. */
|
||||
|
||||
typedef struct recurse_check {
|
||||
struct recurse_check *prev;
|
||||
PCRE2_SPTR group;
|
||||
} recurse_check;
|
||||
|
||||
typedef struct parsed_recurse_check {
|
||||
struct parsed_recurse_check *prev;
|
||||
uint32_t *groupptr;
|
||||
} parsed_recurse_check;
|
||||
|
||||
/* Structure for building a cache when filling in pattern recursion offsets. */
|
||||
|
||||
typedef struct recurse_cache {
|
||||
PCRE2_SPTR group;
|
||||
int groupnumber;
|
||||
} recurse_cache;
|
||||
|
||||
/* Structure for maintaining a chain of pointers to the currently incomplete
|
||||
branches, for testing for left recursion while compiling. */
|
||||
|
||||
typedef struct branch_chain {
|
||||
struct branch_chain *outer;
|
||||
PCRE2_UCHAR *current_branch;
|
||||
} branch_chain;
|
||||
|
||||
/* Structure for building a list of named groups during the first pass of
|
||||
compiling. */
|
||||
|
||||
typedef struct named_group {
|
||||
PCRE2_SPTR name; /* Points to the name in the pattern */
|
||||
uint32_t number; /* Group number */
|
||||
uint16_t length; /* Length of the name */
|
||||
uint16_t isdup; /* TRUE if a duplicate */
|
||||
} named_group;
|
||||
|
||||
/* Structure for caching sorted ranges. This improves the performance
|
||||
of translating META code to byte code. */
|
||||
|
||||
typedef struct class_ranges {
|
||||
struct class_ranges *next; /* Next class ranges */
|
||||
size_t char_lists_size; /* Total size of encoded char lists */
|
||||
size_t char_lists_start; /* Start offset of encoded char lists */
|
||||
uint16_t range_list_size; /* Size of ranges array */
|
||||
uint16_t char_lists_types; /* The XCL_LIST header of char lists */
|
||||
/* Followed by the list of ranges (start/end pairs) */
|
||||
} class_ranges;
|
||||
|
||||
typedef union class_bits_storage {
|
||||
uint8_t classbits[32];
|
||||
uint32_t classwords[8];
|
||||
} class_bits_storage;
|
||||
|
||||
/* Structure for passing "static" information around between the functions
|
||||
doing the compiling, so that they are thread-safe. */
|
||||
|
||||
typedef struct compile_block {
|
||||
pcre2_real_compile_context *cx; /* Points to the compile context */
|
||||
const uint8_t *lcc; /* Points to lower casing table */
|
||||
const uint8_t *fcc; /* Points to case-flipping table */
|
||||
const uint8_t *cbits; /* Points to character type table */
|
||||
const uint8_t *ctypes; /* Points to table of type maps */
|
||||
PCRE2_UCHAR *start_workspace; /* The start of working space */
|
||||
PCRE2_UCHAR *start_code; /* The start of the compiled code */
|
||||
PCRE2_SPTR start_pattern; /* The start of the pattern */
|
||||
PCRE2_SPTR end_pattern; /* The end of the pattern */
|
||||
PCRE2_UCHAR *name_table; /* The name/number table */
|
||||
PCRE2_SIZE workspace_size; /* Size of workspace */
|
||||
PCRE2_SIZE small_ref_offset[10]; /* Offsets for \1 to \9 */
|
||||
PCRE2_SIZE erroroffset; /* Offset of error in pattern */
|
||||
class_bits_storage classbits; /* Temporary store for classbits */
|
||||
uint16_t names_found; /* Number of entries so far */
|
||||
uint16_t name_entry_size; /* Size of each entry */
|
||||
uint16_t parens_depth; /* Depth of nested parentheses */
|
||||
uint16_t assert_depth; /* Depth of nested assertions */
|
||||
named_group *named_groups; /* Points to vector in pre-compile */
|
||||
uint32_t named_group_list_size; /* Number of entries in the list */
|
||||
uint32_t external_options; /* External (initial) options */
|
||||
uint32_t external_flags; /* External flag bits to be set */
|
||||
uint32_t bracount; /* Count of capturing parentheses */
|
||||
uint32_t lastcapture; /* Last capture encountered */
|
||||
uint32_t *parsed_pattern; /* Parsed pattern buffer */
|
||||
uint32_t *parsed_pattern_end; /* Parsed pattern should not get here */
|
||||
uint32_t *groupinfo; /* Group info vector */
|
||||
uint32_t top_backref; /* Maximum back reference */
|
||||
uint32_t backref_map; /* Bitmap of low back refs */
|
||||
uint32_t nltype; /* Newline type */
|
||||
uint32_t nllen; /* Newline string length */
|
||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
|
||||
uint8_t class_op_used[ECLASS_NEST_LIMIT]; /* Operation used for
|
||||
extended classes */
|
||||
uint32_t req_varyopt; /* "After variable item" flag for reqbyte */
|
||||
uint32_t max_varlookbehind; /* Limit for variable lookbehinds */
|
||||
int max_lookbehind; /* Maximum lookbehind encountered (characters) */
|
||||
BOOL had_accept; /* (*ACCEPT) encountered */
|
||||
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
|
||||
BOOL had_recurse; /* Had a pattern recursion or subroutine call */
|
||||
BOOL dupnames; /* Duplicate names exist */
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
class_ranges *cranges; /* First class range. */
|
||||
class_ranges *next_cranges; /* Next class range. */
|
||||
size_t char_lists_size; /* Current size of character lists */
|
||||
#endif
|
||||
} compile_block;
|
||||
|
||||
/* Structure for keeping the properties of the in-memory stack used
|
||||
by the JIT matcher. */
|
||||
|
||||
typedef struct pcre2_real_jit_stack {
|
||||
pcre2_memctl memctl;
|
||||
void* stack;
|
||||
} pcre2_real_jit_stack;
|
||||
|
||||
/* Structure for items in a linked list that represents an explicit recursive
|
||||
call within the pattern when running pcre2_dfa_match(). */
|
||||
|
||||
typedef struct dfa_recursion_info {
|
||||
struct dfa_recursion_info *prevrec;
|
||||
PCRE2_SPTR subject_position;
|
||||
PCRE2_SPTR last_used_ptr;
|
||||
uint32_t group_num;
|
||||
} dfa_recursion_info;
|
||||
|
||||
/* Structure for "stack" frames that are used for remembering backtracking
|
||||
positions during matching. As these are used in a vector, with the ovector item
|
||||
being extended, the size of the structure must be a multiple of PCRE2_SIZE. The
|
||||
only way to check this at compile time is to force an error by generating an
|
||||
array with a negative size. By putting this in a typedef (which is never used),
|
||||
we don't generate any code when all is well. */
|
||||
|
||||
typedef struct heapframe {
|
||||
|
||||
/* The first set of fields are variables that have to be preserved over calls
|
||||
to RRMATCH(), but which do not need to be copied to new frames. */
|
||||
|
||||
PCRE2_SPTR ecode; /* The current position in the pattern */
|
||||
PCRE2_SPTR temp_sptr[2]; /* Used for short-term PCRE2_SPTR values */
|
||||
PCRE2_SIZE length; /* Used for character, string, or code lengths */
|
||||
PCRE2_SIZE back_frame; /* Amount to subtract on RRETURN */
|
||||
PCRE2_SIZE temp_size; /* Used for short-term PCRE2_SIZE values */
|
||||
uint32_t rdepth; /* Function "recursion" depth within pcre2_match() */
|
||||
uint32_t group_frame_type; /* Type information for group frames */
|
||||
uint32_t temp_32[4]; /* Used for short-term 32-bit or BOOL values */
|
||||
uint8_t return_id; /* Where to go on in internal "return" */
|
||||
uint8_t op; /* Processing opcode */
|
||||
|
||||
/* At this point, the structure is 16-bit aligned. On most architectures
|
||||
the alignment requirement for a pointer will ensure that the eptr field below
|
||||
is 32-bit or 64-bit aligned. However, on m68k it is fine to have a pointer
|
||||
that is 16-bit aligned. We must therefore ensure that what comes between here
|
||||
and eptr is an odd multiple of 16 bits so as to get back into 32-bit
|
||||
alignment. This happens naturally when PCRE2_UCHAR is 8 bits wide, but needs
|
||||
fudges in the other cases. In the 32-bit case the padding comes first so that
|
||||
the occu field itself is 32-bit aligned. Without the padding, this structure
|
||||
is no longer a multiple of PCRE2_SIZE on m68k, and the check below fails. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
PCRE2_UCHAR occu[6]; /* Used for other case code units */
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
PCRE2_UCHAR occu[2]; /* Used for other case code units */
|
||||
uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */
|
||||
#else
|
||||
uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */
|
||||
PCRE2_UCHAR occu[1]; /* Used for other case code units */
|
||||
#endif
|
||||
|
||||
/* The rest have to be copied from the previous frame whenever a new frame
|
||||
becomes current. The final field is specified as a large vector so that
|
||||
runtime array bound checks don't catch references to it. However, for any
|
||||
specific call to pcre2_match() the memory allocated for each frame structure
|
||||
allows for exactly the right size ovector for the number of capturing
|
||||
parentheses. (See also the comment for pcre2_real_match_data above.) */
|
||||
|
||||
PCRE2_SPTR eptr; /* MUST BE FIRST */
|
||||
PCRE2_SPTR start_match; /* Can be adjusted by \K */
|
||||
PCRE2_SPTR mark; /* Most recent mark on the success path */
|
||||
PCRE2_SPTR recurse_last_used; /* Last character used at time of pattern recursion */
|
||||
uint32_t current_recurse; /* Group number of current (deepest) pattern recursion */
|
||||
uint32_t capture_last; /* Most recent capture */
|
||||
PCRE2_SIZE last_group_offset; /* Saved offset to most recent group frame */
|
||||
PCRE2_SIZE offset_top; /* Offset after highest capture */
|
||||
PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
|
||||
} heapframe;
|
||||
|
||||
/* Assert that the size of the heapframe structure is a multiple of PCRE2_SIZE.
|
||||
See various comments above. */
|
||||
|
||||
STATIC_ASSERT((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0, heapframe_size);
|
||||
|
||||
/* Structure for computing the alignment of heapframe. */
|
||||
|
||||
typedef struct heapframe_align {
|
||||
char unalign; /* Completely unalign the current offset */
|
||||
heapframe frame; /* Offset is its alignment */
|
||||
} heapframe_align;
|
||||
|
||||
/* This define is the minimum alignment required for a heapframe, in bytes. */
|
||||
|
||||
#define HEAPFRAME_ALIGNMENT offsetof(heapframe_align, frame)
|
||||
|
||||
/* Structure for passing "static" information around between the functions
|
||||
doing traditional NFA matching (pcre2_match() and friends). */
|
||||
|
||||
typedef struct match_block {
|
||||
pcre2_memctl memctl; /* For general use */
|
||||
uint32_t heap_limit; /* As it says */
|
||||
uint32_t match_limit; /* As it says */
|
||||
uint32_t match_limit_depth; /* As it says */
|
||||
uint32_t match_call_count; /* Number of times a new frame is created */
|
||||
BOOL hitend; /* Hit the end of the subject at some point */
|
||||
BOOL hasthen; /* Pattern contains (*THEN) */
|
||||
BOOL allowemptypartial; /* Allow empty hard partial */
|
||||
const uint8_t *lcc; /* Points to lower casing table */
|
||||
const uint8_t *fcc; /* Points to case-flipping table */
|
||||
const uint8_t *ctypes; /* Points to table of type maps */
|
||||
PCRE2_SIZE start_offset; /* The start offset value */
|
||||
PCRE2_SIZE end_offset_top; /* Highwater mark at end of match */
|
||||
uint16_t partial; /* PARTIAL options */
|
||||
uint16_t bsr_convention; /* \R interpretation */
|
||||
uint16_t name_count; /* Number of names in name table */
|
||||
uint16_t name_entry_size; /* Size of entry in names table */
|
||||
PCRE2_SPTR name_table; /* Table of group names */
|
||||
PCRE2_SPTR start_code; /* For use in pattern recursion */
|
||||
PCRE2_SPTR start_subject; /* Start of the subject string */
|
||||
PCRE2_SPTR check_subject; /* Where UTF-checked from */
|
||||
PCRE2_SPTR end_subject; /* Usable end of the subject string */
|
||||
PCRE2_SPTR true_end_subject; /* Actual end of the subject string */
|
||||
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
|
||||
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
||||
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
|
||||
PCRE2_SPTR mark; /* Mark pointer to pass back on success */
|
||||
PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
|
||||
PCRE2_SPTR verb_ecode_ptr; /* For passing back info */
|
||||
PCRE2_SPTR verb_skip_ptr; /* For passing back a (*SKIP) name */
|
||||
uint32_t verb_current_recurse; /* Current recursion group when (*VERB) happens */
|
||||
uint32_t moptions; /* Match options */
|
||||
uint32_t poptions; /* Pattern options */
|
||||
uint32_t skip_arg_count; /* For counting SKIP_ARGs */
|
||||
uint32_t ignore_skip_arg; /* For re-run when SKIP arg name not found */
|
||||
uint32_t nltype; /* Newline type */
|
||||
uint32_t nllen; /* Newline string length */
|
||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
||||
pcre2_callout_block *cb; /* Points to a callout block */
|
||||
void *callout_data; /* To pass back to callouts */
|
||||
int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
|
||||
} match_block;
|
||||
|
||||
/* A similar structure is used for the same purpose by the DFA matching
|
||||
functions. */
|
||||
|
||||
typedef struct dfa_match_block {
|
||||
pcre2_memctl memctl; /* For general use */
|
||||
PCRE2_SPTR start_code; /* Start of the compiled pattern */
|
||||
PCRE2_SPTR start_subject ; /* Start of the subject string */
|
||||
PCRE2_SPTR end_subject; /* End of subject string */
|
||||
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
||||
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
|
||||
const uint8_t *tables; /* Character tables */
|
||||
PCRE2_SIZE start_offset; /* The start offset value */
|
||||
uint32_t heap_limit; /* As it says */
|
||||
PCRE2_SIZE heap_used; /* As it says */
|
||||
uint32_t match_limit; /* As it says */
|
||||
uint32_t match_limit_depth; /* As it says */
|
||||
uint32_t match_call_count; /* Number of calls of internal function */
|
||||
uint32_t moptions; /* Match options */
|
||||
uint32_t poptions; /* Pattern options */
|
||||
uint32_t nltype; /* Newline type */
|
||||
uint32_t nllen; /* Newline string length */
|
||||
BOOL allowemptypartial; /* Allow empty hard partial */
|
||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
||||
uint16_t bsr_convention; /* \R interpretation */
|
||||
pcre2_callout_block *cb; /* Points to a callout block */
|
||||
void *callout_data; /* To pass back to callouts */
|
||||
int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
|
||||
dfa_recursion_info *recursive; /* Linked list of pattern recursion data */
|
||||
} dfa_match_block;
|
||||
|
||||
#endif /* PCRE2_PCRE2TEST */
|
||||
|
||||
/* End of pcre2_intmodedep.h */
|
||||
@@ -1,2280 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
This module by Zoltan Herczeg
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* XClass matching code. */
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
|
||||
#define ECLASS_CHAR_DATA STACK_TOP
|
||||
#define ECLASS_STACK_DATA STACK_LIMIT
|
||||
|
||||
#define SET_CHAR_OFFSET(value) \
|
||||
if ((value) != charoffset) \
|
||||
{ \
|
||||
if ((value) < charoffset) \
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
|
||||
else \
|
||||
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
|
||||
} \
|
||||
charoffset = (value);
|
||||
|
||||
#define READ_FROM_CHAR_LIST(destination) \
|
||||
if (list_ind <= 1) \
|
||||
{ \
|
||||
destination = *(const uint16_t*)next_char; \
|
||||
next_char += 2; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
destination = *(const uint32_t*)next_char; \
|
||||
next_char += 4; \
|
||||
}
|
||||
|
||||
#define XCLASS_LOCAL_RANGES_SIZE 32
|
||||
#define XCLASS_LOCAL_RANGES_LOG2_SIZE 5
|
||||
|
||||
typedef struct xclass_stack_item {
|
||||
sljit_u32 first_item;
|
||||
sljit_u32 last_item;
|
||||
struct sljit_jump *jump;
|
||||
} xclass_stack_item;
|
||||
|
||||
typedef struct xclass_ranges {
|
||||
size_t range_count;
|
||||
/* Pointer to ranges. A stack area is provided when a small buffer is enough. */
|
||||
uint32_t *ranges;
|
||||
uint32_t local_ranges[XCLASS_LOCAL_RANGES_SIZE * 2];
|
||||
/* Stack size must be log2(ranges / 2). */
|
||||
xclass_stack_item *stack;
|
||||
xclass_stack_item local_stack[XCLASS_LOCAL_RANGES_LOG2_SIZE];
|
||||
} xclass_ranges;
|
||||
|
||||
static void xclass_compute_ranges(compiler_common *common, PCRE2_SPTR cc, xclass_ranges *ranges)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
size_t range_count = 0, est_range_count;
|
||||
size_t est_stack_size, tmp;
|
||||
uint32_t type, list_ind;
|
||||
uint32_t est_type;
|
||||
uint32_t char_list_add, range_start, range_end;
|
||||
const uint8_t *next_char;
|
||||
const uint8_t *est_next_char;
|
||||
#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
|
||||
BOOL utf = common->utf;
|
||||
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
|
||||
|
||||
if (*cc == XCL_SINGLE || *cc == XCL_RANGE)
|
||||
{
|
||||
/* Only a few ranges are present. */
|
||||
do
|
||||
{
|
||||
type = *cc++;
|
||||
SLJIT_ASSERT(type == XCL_SINGLE || type == XCL_RANGE);
|
||||
GETCHARINCTEST(range_end, cc);
|
||||
ranges->ranges[range_count] = range_end;
|
||||
|
||||
if (type == XCL_RANGE)
|
||||
{
|
||||
GETCHARINCTEST(range_end, cc);
|
||||
}
|
||||
|
||||
ranges->ranges[range_count + 1] = range_end;
|
||||
range_count += 2;
|
||||
}
|
||||
while (*cc != XCL_END);
|
||||
|
||||
SLJIT_ASSERT(range_count <= XCLASS_LOCAL_RANGES_SIZE);
|
||||
ranges->range_count = range_count;
|
||||
return;
|
||||
}
|
||||
|
||||
SLJIT_ASSERT(cc[0] >= XCL_LIST);
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
type = (uint32_t)(cc[0] << 8) | cc[1];
|
||||
cc += 2;
|
||||
#else
|
||||
type = cc[0];
|
||||
cc++;
|
||||
#endif /* CODE_UNIT_WIDTH */
|
||||
|
||||
/* Align characters. */
|
||||
next_char = (const uint8_t*)common->start - (GET(cc, 0) << 1);
|
||||
type &= XCL_TYPE_MASK;
|
||||
|
||||
/* Estimate size. */
|
||||
est_next_char = next_char;
|
||||
est_type = type;
|
||||
est_range_count = 0;
|
||||
list_ind = 0;
|
||||
|
||||
while (est_type > 0)
|
||||
{
|
||||
uint32_t item_count = est_type & XCL_ITEM_COUNT_MASK;
|
||||
|
||||
if (item_count == XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
if (list_ind <= 1)
|
||||
{
|
||||
item_count = *(const uint16_t*)est_next_char;
|
||||
est_next_char += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
item_count = *(const uint32_t*)est_next_char;
|
||||
est_next_char += 4;
|
||||
}
|
||||
}
|
||||
|
||||
est_type >>= XCL_TYPE_BIT_LEN;
|
||||
est_next_char += (size_t)item_count << (list_ind <= 1 ? 1 : 2);
|
||||
list_ind++;
|
||||
est_range_count += item_count + 1;
|
||||
}
|
||||
|
||||
if (est_range_count > XCLASS_LOCAL_RANGES_SIZE)
|
||||
{
|
||||
est_stack_size = 0;
|
||||
tmp = est_range_count - 1;
|
||||
|
||||
/* Compute log2(est_range_count) */
|
||||
while (tmp > 0)
|
||||
{
|
||||
est_stack_size++;
|
||||
tmp >>= 1;
|
||||
}
|
||||
|
||||
ranges->stack = (xclass_stack_item*)SLJIT_MALLOC((sizeof(xclass_stack_item) * est_stack_size)
|
||||
+ ((sizeof(uint32_t) << 1) * (size_t)est_range_count), compiler->allocator_data);
|
||||
|
||||
if (ranges->stack == NULL)
|
||||
{
|
||||
sljit_set_compiler_memory_error(compiler);
|
||||
ranges->ranges = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
ranges->ranges = (uint32_t*)(ranges->stack + est_stack_size);
|
||||
}
|
||||
|
||||
char_list_add = XCL_CHAR_LIST_LOW_16_ADD;
|
||||
range_start = ~(uint32_t)0;
|
||||
list_ind = 0;
|
||||
|
||||
if ((type & XCL_BEGIN_WITH_RANGE) != 0)
|
||||
range_start = XCL_CHAR_LIST_LOW_16_START;
|
||||
|
||||
while (type > 0)
|
||||
{
|
||||
uint32_t item_count = type & XCL_ITEM_COUNT_MASK;
|
||||
|
||||
if (item_count == XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
READ_FROM_CHAR_LIST(item_count);
|
||||
SLJIT_ASSERT(item_count >= XCL_ITEM_COUNT_MASK);
|
||||
}
|
||||
|
||||
while (item_count > 0)
|
||||
{
|
||||
READ_FROM_CHAR_LIST(range_end);
|
||||
|
||||
if ((range_end & XCL_CHAR_END) != 0)
|
||||
{
|
||||
range_end = char_list_add + (range_end >> XCL_CHAR_SHIFT);
|
||||
|
||||
if (range_start == ~(uint32_t)0)
|
||||
range_start = range_end;
|
||||
|
||||
ranges->ranges[range_count] = range_start;
|
||||
ranges->ranges[range_count + 1] = range_end;
|
||||
range_count += 2;
|
||||
range_start = ~(uint32_t)0;
|
||||
}
|
||||
else
|
||||
range_start = char_list_add + (range_end >> XCL_CHAR_SHIFT);
|
||||
|
||||
item_count--;
|
||||
}
|
||||
|
||||
list_ind++;
|
||||
type >>= XCL_TYPE_BIT_LEN;
|
||||
|
||||
if (range_start == ~(uint32_t)0)
|
||||
{
|
||||
if ((type & XCL_BEGIN_WITH_RANGE) != 0)
|
||||
{
|
||||
if (list_ind == 1) range_start = XCL_CHAR_LIST_HIGH_16_START;
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
else if (list_ind == 2) range_start = XCL_CHAR_LIST_LOW_32_START;
|
||||
else range_start = XCL_CHAR_LIST_HIGH_32_START;
|
||||
#else
|
||||
else range_start = XCL_CHAR_LIST_LOW_32_START;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else if ((type & XCL_BEGIN_WITH_RANGE) == 0)
|
||||
{
|
||||
if (list_ind == 1) range_end = XCL_CHAR_LIST_LOW_16_END;
|
||||
else if (list_ind == 2) range_end = XCL_CHAR_LIST_HIGH_16_END;
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
else if (list_ind == 3) range_end = XCL_CHAR_LIST_LOW_32_END;
|
||||
else range_end = XCL_CHAR_LIST_HIGH_32_END;
|
||||
#else
|
||||
else range_end = XCL_CHAR_LIST_LOW_32_END;
|
||||
#endif
|
||||
|
||||
ranges->ranges[range_count] = range_start;
|
||||
ranges->ranges[range_count + 1] = range_end;
|
||||
range_count += 2;
|
||||
range_start = ~(uint32_t)0;
|
||||
}
|
||||
|
||||
if (list_ind == 1) char_list_add = XCL_CHAR_LIST_HIGH_16_ADD;
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
else if (list_ind == 2) char_list_add = XCL_CHAR_LIST_LOW_32_ADD;
|
||||
else char_list_add = XCL_CHAR_LIST_HIGH_32_ADD;
|
||||
#else
|
||||
else char_list_add = XCL_CHAR_LIST_LOW_32_ADD;
|
||||
#endif
|
||||
}
|
||||
|
||||
SLJIT_ASSERT(range_count > 0 && range_count <= (est_range_count << 1));
|
||||
SLJIT_ASSERT(next_char <= (const uint8_t*)common->start);
|
||||
ranges->range_count = range_count;
|
||||
}
|
||||
|
||||
static void xclass_check_bitset(compiler_common *common, const sljit_u8 *bitset, jump_list **found, jump_list **backtracks)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
struct sljit_jump *jump;
|
||||
|
||||
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
|
||||
if (!optimize_class(common, bitset, (bitset[31] & 0x80) != 0, TRUE, found))
|
||||
{
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
|
||||
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
|
||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)bitset);
|
||||
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
|
||||
add_jump(compiler, found, JUMP(SLJIT_NOT_ZERO));
|
||||
}
|
||||
|
||||
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
|
||||
JUMPHERE(jump);
|
||||
}
|
||||
|
||||
#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
|
||||
|
||||
static void xclass_update_min_max(compiler_common *common, PCRE2_SPTR cc, sljit_u32 *min_ptr, sljit_u32 *max_ptr)
|
||||
{
|
||||
uint32_t type, list_ind, c;
|
||||
sljit_u32 min = *min_ptr;
|
||||
sljit_u32 max = *max_ptr;
|
||||
uint32_t char_list_add;
|
||||
const uint8_t *next_char;
|
||||
BOOL utf = TRUE;
|
||||
|
||||
/* This function is pointless without utf 8/16. */
|
||||
SLJIT_ASSERT(common->utf);
|
||||
if (*cc == XCL_SINGLE || *cc == XCL_RANGE)
|
||||
{
|
||||
/* Only a few ranges are present. */
|
||||
do
|
||||
{
|
||||
type = *cc++;
|
||||
SLJIT_ASSERT(type == XCL_SINGLE || type == XCL_RANGE);
|
||||
GETCHARINCTEST(c, cc);
|
||||
|
||||
if (c < min)
|
||||
min = c;
|
||||
|
||||
if (type == XCL_RANGE)
|
||||
{
|
||||
GETCHARINCTEST(c, cc);
|
||||
}
|
||||
|
||||
if (c > max)
|
||||
max = c;
|
||||
}
|
||||
while (*cc != XCL_END);
|
||||
|
||||
SLJIT_ASSERT(min <= MAX_UTF_CODE_POINT && max <= MAX_UTF_CODE_POINT && min <= max);
|
||||
*min_ptr = min;
|
||||
*max_ptr = max;
|
||||
return;
|
||||
}
|
||||
|
||||
SLJIT_ASSERT(cc[0] >= XCL_LIST);
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
type = (uint32_t)(cc[0] << 8) | cc[1];
|
||||
cc += 2;
|
||||
#else
|
||||
type = cc[0];
|
||||
cc++;
|
||||
#endif /* CODE_UNIT_WIDTH */
|
||||
|
||||
/* Align characters. */
|
||||
next_char = (const uint8_t*)common->start - (GET(cc, 0) << 1);
|
||||
type &= XCL_TYPE_MASK;
|
||||
|
||||
SLJIT_ASSERT(type != 0);
|
||||
|
||||
/* Detect minimum. */
|
||||
|
||||
/* Skip unused ranges. */
|
||||
list_ind = 0;
|
||||
while ((type & (XCL_BEGIN_WITH_RANGE | XCL_ITEM_COUNT_MASK)) == 0)
|
||||
{
|
||||
type >>= XCL_TYPE_BIT_LEN;
|
||||
list_ind++;
|
||||
}
|
||||
|
||||
SLJIT_ASSERT(list_ind <= 2);
|
||||
switch (list_ind)
|
||||
{
|
||||
case 0:
|
||||
char_list_add = XCL_CHAR_LIST_LOW_16_ADD;
|
||||
c = XCL_CHAR_LIST_LOW_16_START;
|
||||
break;
|
||||
|
||||
case 1:
|
||||
char_list_add = XCL_CHAR_LIST_HIGH_16_ADD;
|
||||
c = XCL_CHAR_LIST_HIGH_16_START;
|
||||
break;
|
||||
|
||||
default:
|
||||
char_list_add = XCL_CHAR_LIST_LOW_32_ADD;
|
||||
c = XCL_CHAR_LIST_LOW_32_START;
|
||||
break;
|
||||
}
|
||||
|
||||
if ((type & XCL_BEGIN_WITH_RANGE) != 0)
|
||||
{
|
||||
if (c < min)
|
||||
min = c;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((type & XCL_ITEM_COUNT_MASK) == XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
if (list_ind <= 1)
|
||||
c = *(const uint16_t*)(next_char + 2);
|
||||
else
|
||||
c = *(const uint32_t*)(next_char + 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (list_ind <= 1)
|
||||
c = *(const uint16_t*)next_char;
|
||||
else
|
||||
c = *(const uint32_t*)next_char;
|
||||
}
|
||||
|
||||
c = char_list_add + (c >> XCL_CHAR_SHIFT);
|
||||
if (c < min)
|
||||
min = c;
|
||||
}
|
||||
|
||||
/* Detect maximum. */
|
||||
|
||||
/* Skip intermediate ranges. */
|
||||
while (TRUE)
|
||||
{
|
||||
if ((type & XCL_ITEM_COUNT_MASK) == XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
if (list_ind <= 1)
|
||||
{
|
||||
c = *(const uint16_t*)next_char;
|
||||
next_char += (c + 1) << 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
c = *(const uint32_t*)next_char;
|
||||
next_char += (c + 1) << 2;
|
||||
}
|
||||
}
|
||||
else
|
||||
next_char += (type & XCL_ITEM_COUNT_MASK) << (list_ind <= 1 ? 1 : 2);
|
||||
|
||||
if ((type >> XCL_TYPE_BIT_LEN) == 0)
|
||||
break;
|
||||
|
||||
list_ind++;
|
||||
type >>= XCL_TYPE_BIT_LEN;
|
||||
}
|
||||
|
||||
SLJIT_ASSERT(list_ind <= 2 && type != 0);
|
||||
switch (list_ind)
|
||||
{
|
||||
case 0:
|
||||
char_list_add = XCL_CHAR_LIST_LOW_16_ADD;
|
||||
c = XCL_CHAR_LIST_LOW_16_END;
|
||||
break;
|
||||
|
||||
case 1:
|
||||
char_list_add = XCL_CHAR_LIST_HIGH_16_ADD;
|
||||
c = XCL_CHAR_LIST_HIGH_16_END;
|
||||
break;
|
||||
|
||||
default:
|
||||
char_list_add = XCL_CHAR_LIST_LOW_32_ADD;
|
||||
c = XCL_CHAR_LIST_LOW_32_END;
|
||||
break;
|
||||
}
|
||||
|
||||
if ((type & XCL_ITEM_COUNT_MASK) != 0)
|
||||
{
|
||||
/* Type is reused as temporary. */
|
||||
if (list_ind <= 1)
|
||||
type = *(const uint16_t*)(next_char - 2);
|
||||
else
|
||||
type = *(const uint32_t*)(next_char - 4);
|
||||
|
||||
if (type & XCL_CHAR_END)
|
||||
c = char_list_add + (type >> XCL_CHAR_SHIFT);
|
||||
}
|
||||
|
||||
if (c > max)
|
||||
max = c;
|
||||
|
||||
SLJIT_ASSERT(min <= MAX_UTF_CODE_POINT && max <= MAX_UTF_CODE_POINT && min <= max);
|
||||
*min_ptr = min;
|
||||
*max_ptr = max;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
|
||||
|
||||
#define XCLASS_IS_ECLASS 0x001
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#define XCLASS_SAVE_CHAR 0x002
|
||||
#define XCLASS_HAS_TYPE 0x004
|
||||
#define XCLASS_HAS_SCRIPT 0x008
|
||||
#define XCLASS_HAS_SCRIPT_EXTENSION 0x010
|
||||
#define XCLASS_HAS_BOOL 0x020
|
||||
#define XCLASS_HAS_BIDICL 0x040
|
||||
#define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL)
|
||||
#define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080
|
||||
#define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100
|
||||
#define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCAL0 0x200
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
|
||||
|
||||
/* TMP3 must be preserved because it is used by compile_iterator_matchingpath. */
|
||||
static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, sljit_u32 status)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
jump_list *found = NULL;
|
||||
jump_list *check_result = NULL;
|
||||
jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
|
||||
sljit_uw c, charoffset;
|
||||
sljit_u32 max = READ_CHAR_MAX, min = 0;
|
||||
struct sljit_jump *jump = NULL;
|
||||
PCRE2_UCHAR flags;
|
||||
PCRE2_SPTR ccbegin;
|
||||
sljit_u32 compares, invertcmp, depth;
|
||||
sljit_u32 first_item, last_item, mid_item;
|
||||
sljit_u32 range_start, range_end;
|
||||
xclass_ranges ranges;
|
||||
BOOL has_cmov, last_range_set;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
sljit_u32 category_list = 0;
|
||||
sljit_u32 items;
|
||||
int typereg = TMP1;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
SLJIT_ASSERT(common->locals_size >= SSIZE_OF(sw));
|
||||
/* Scanning the necessary info. */
|
||||
flags = *cc++;
|
||||
ccbegin = cc;
|
||||
compares = 0;
|
||||
|
||||
if (flags & XCL_MAP)
|
||||
cc += 32 / sizeof(PCRE2_UCHAR);
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
|
||||
{
|
||||
compares++;
|
||||
cc++;
|
||||
|
||||
items = 0;
|
||||
|
||||
switch(*cc)
|
||||
{
|
||||
case PT_LAMP:
|
||||
items = UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt);
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
items = UCPCAT_RANGE(PRIV(ucp_typerange)[(int)cc[1] * 2], PRIV(ucp_typerange)[(int)cc[1] * 2 + 1]);
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
items = UCPCAT(cc[1]);
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
items = UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N;
|
||||
break;
|
||||
|
||||
case PT_ALNUM:
|
||||
items = UCPCAT_L | UCPCAT_N;
|
||||
break;
|
||||
|
||||
case PT_SCX:
|
||||
status |= XCLASS_HAS_SCRIPT_EXTENSION;
|
||||
if (cc[-1] == XCL_NOTPROP)
|
||||
{
|
||||
status |= XCLASS_SCRIPT_EXTENSION_NOTPROP;
|
||||
break;
|
||||
}
|
||||
compares++;
|
||||
/* Fall through */
|
||||
|
||||
case PT_SC:
|
||||
status |= XCLASS_HAS_SCRIPT;
|
||||
break;
|
||||
|
||||
case PT_SPACE:
|
||||
case PT_PXSPACE:
|
||||
case PT_PXGRAPH:
|
||||
case PT_PXPRINT:
|
||||
case PT_PXPUNCT:
|
||||
status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE;
|
||||
break;
|
||||
|
||||
case PT_UCNC:
|
||||
case PT_PXXDIGIT:
|
||||
status |= XCLASS_SAVE_CHAR;
|
||||
break;
|
||||
|
||||
case PT_BOOL:
|
||||
status |= XCLASS_HAS_BOOL;
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
status |= XCLASS_HAS_BIDICL;
|
||||
break;
|
||||
|
||||
default:
|
||||
SLJIT_UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
|
||||
if (items > 0)
|
||||
{
|
||||
if (cc[-1] == XCL_NOTPROP)
|
||||
items ^= UCPCAT_ALL;
|
||||
category_list |= items;
|
||||
status |= XCLASS_HAS_TYPE;
|
||||
compares--;
|
||||
}
|
||||
|
||||
cc += 2;
|
||||
}
|
||||
|
||||
if (category_list == UCPCAT_ALL)
|
||||
{
|
||||
/* All or no characters are accepted, same as dotall. */
|
||||
if (status & XCLASS_IS_ECLASS)
|
||||
{
|
||||
if (list != backtracks)
|
||||
OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
|
||||
if (list == backtracks)
|
||||
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
|
||||
return;
|
||||
}
|
||||
|
||||
if (category_list != 0)
|
||||
compares++;
|
||||
#endif
|
||||
|
||||
if (*cc != XCL_END)
|
||||
{
|
||||
#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
|
||||
if (common->utf && compares == 0 && !(status & XCLASS_IS_ECLASS))
|
||||
{
|
||||
SLJIT_ASSERT(category_list == 0);
|
||||
max = 0;
|
||||
min = (flags & XCL_MAP) != 0 ? 0 : READ_CHAR_MAX;
|
||||
xclass_update_min_max(common, cc, &min, &max);
|
||||
}
|
||||
#endif
|
||||
compares++;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
status |= XCLASS_SAVE_CHAR;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
SLJIT_ASSERT(compares > 0 || category_list != 0);
|
||||
#else /* !SUPPORT_UNICODE */
|
||||
SLJIT_ASSERT(compares > 0);
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* We are not necessary in utf mode even in 8 bit mode. */
|
||||
cc = ccbegin;
|
||||
if (!(status & XCLASS_IS_ECLASS))
|
||||
{
|
||||
if ((flags & XCL_NOT) != 0)
|
||||
read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||
else
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
read_char(common, min, max, (status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0);
|
||||
#else /* !SUPPORT_UNICODE */
|
||||
read_char(common, min, max, NULL, 0);
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
}
|
||||
|
||||
if ((flags & XCL_MAP) != 0)
|
||||
{
|
||||
SLJIT_ASSERT(!(status & XCLASS_IS_ECLASS));
|
||||
xclass_check_bitset(common, (const sljit_u8 *)cc, &found, backtracks);
|
||||
cc += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (status & XCLASS_NEEDS_UCD)
|
||||
{
|
||||
if ((status & (XCLASS_SAVE_CHAR | XCLASS_IS_ECLASS)) == XCLASS_SAVE_CHAR)
|
||||
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (!common->utf)
|
||||
{
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
|
||||
SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, UNASSIGNED_UTF_CHAR, TMP1);
|
||||
}
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||
|
||||
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
|
||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
|
||||
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
|
||||
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
|
||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
|
||||
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
|
||||
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
|
||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
|
||||
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
||||
|
||||
ccbegin = cc;
|
||||
|
||||
if (status & XCLASS_HAS_BIDICL)
|
||||
{
|
||||
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
|
||||
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT);
|
||||
|
||||
while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
|
||||
{
|
||||
cc++;
|
||||
|
||||
if (*cc == PT_BIDICL)
|
||||
{
|
||||
compares--;
|
||||
invertcmp = (compares == 0 && list != backtracks);
|
||||
if (cc[-1] == XCL_NOTPROP)
|
||||
invertcmp ^= 0x1;
|
||||
jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
|
||||
add_jump(compiler, compares > 0 ? list : backtracks, jump);
|
||||
}
|
||||
cc += 2;
|
||||
}
|
||||
|
||||
cc = ccbegin;
|
||||
}
|
||||
|
||||
if (status & XCLASS_HAS_BOOL)
|
||||
{
|
||||
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops));
|
||||
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK);
|
||||
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
|
||||
|
||||
while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
|
||||
{
|
||||
cc++;
|
||||
if (*cc == PT_BOOL)
|
||||
{
|
||||
compares--;
|
||||
invertcmp = (compares == 0 && list != backtracks);
|
||||
if (cc[-1] == XCL_NOTPROP)
|
||||
invertcmp ^= 0x1;
|
||||
|
||||
OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f)));
|
||||
add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
|
||||
}
|
||||
cc += 2;
|
||||
}
|
||||
|
||||
cc = ccbegin;
|
||||
}
|
||||
|
||||
if (status & XCLASS_HAS_SCRIPT)
|
||||
{
|
||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
|
||||
|
||||
while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
|
||||
{
|
||||
cc++;
|
||||
|
||||
switch (*cc)
|
||||
{
|
||||
case PT_SCX:
|
||||
if (cc[-1] == XCL_NOTPROP)
|
||||
break;
|
||||
/* Fall through */
|
||||
|
||||
case PT_SC:
|
||||
compares--;
|
||||
invertcmp = (compares == 0 && list != backtracks);
|
||||
if (cc[-1] == XCL_NOTPROP)
|
||||
invertcmp ^= 0x1;
|
||||
|
||||
add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]));
|
||||
}
|
||||
cc += 2;
|
||||
}
|
||||
|
||||
cc = ccbegin;
|
||||
}
|
||||
|
||||
if (status & XCLASS_HAS_SCRIPT_EXTENSION)
|
||||
{
|
||||
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
|
||||
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK);
|
||||
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
|
||||
|
||||
if (status & XCLASS_SCRIPT_EXTENSION_NOTPROP)
|
||||
{
|
||||
if (status & XCLASS_HAS_TYPE)
|
||||
{
|
||||
if ((status & (XCLASS_SAVE_CHAR | XCLASS_IS_ECLASS)) == XCLASS_SAVE_CHAR)
|
||||
{
|
||||
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, TMP2, 0);
|
||||
status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCAL0;
|
||||
}
|
||||
else
|
||||
{
|
||||
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
|
||||
status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR;
|
||||
}
|
||||
}
|
||||
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
|
||||
}
|
||||
|
||||
while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
|
||||
{
|
||||
cc++;
|
||||
|
||||
if (*cc == PT_SCX)
|
||||
{
|
||||
compares--;
|
||||
invertcmp = (compares == 0 && list != backtracks);
|
||||
|
||||
jump = NULL;
|
||||
if (cc[-1] == XCL_NOTPROP)
|
||||
{
|
||||
jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]);
|
||||
if (invertcmp)
|
||||
{
|
||||
add_jump(compiler, backtracks, jump);
|
||||
jump = NULL;
|
||||
}
|
||||
invertcmp ^= 0x1;
|
||||
}
|
||||
|
||||
OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f)));
|
||||
add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
|
||||
|
||||
if (jump != NULL)
|
||||
JUMPHERE(jump);
|
||||
}
|
||||
cc += 2;
|
||||
}
|
||||
|
||||
if (status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCAL0)
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
|
||||
else if (status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR)
|
||||
OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
|
||||
cc = ccbegin;
|
||||
}
|
||||
|
||||
if (status & XCLASS_SAVE_CHAR)
|
||||
OP1(SLJIT_MOV, TMP1, 0, (status & XCLASS_IS_ECLASS) ? ECLASS_CHAR_DATA : RETURN_ADDR, 0);
|
||||
|
||||
if (status & XCLASS_HAS_TYPE)
|
||||
{
|
||||
if (status & XCLASS_SAVE_CHAR)
|
||||
typereg = RETURN_ADDR;
|
||||
|
||||
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
|
||||
OP2(SLJIT_SHL, typereg, 0, SLJIT_IMM, 1, TMP2, 0);
|
||||
|
||||
if (category_list > 0)
|
||||
{
|
||||
compares--;
|
||||
invertcmp = (compares == 0 && list != backtracks);
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, category_list);
|
||||
add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Generating code. */
|
||||
charoffset = 0;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
|
||||
{
|
||||
compares--;
|
||||
invertcmp = (compares == 0 && list != backtracks);
|
||||
jump = NULL;
|
||||
|
||||
if (*cc == XCL_NOTPROP)
|
||||
invertcmp ^= 0x1;
|
||||
cc++;
|
||||
switch(*cc)
|
||||
{
|
||||
case PT_LAMP:
|
||||
case PT_GC:
|
||||
case PT_PC:
|
||||
case PT_SC:
|
||||
case PT_SCX:
|
||||
case PT_BOOL:
|
||||
case PT_BIDICL:
|
||||
case PT_WORD:
|
||||
case PT_ALNUM:
|
||||
compares++;
|
||||
/* Already handled. */
|
||||
break;
|
||||
|
||||
case PT_SPACE:
|
||||
case PT_PXSPACE:
|
||||
SET_CHAR_OFFSET(9);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
|
||||
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Zl, ucp_Zs));
|
||||
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
|
||||
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
|
||||
break;
|
||||
|
||||
case PT_UCNC:
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
|
||||
|
||||
SET_CHAR_OFFSET(0xa0);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
SET_CHAR_OFFSET(0);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
|
||||
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
|
||||
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
|
||||
break;
|
||||
|
||||
case PT_PXGRAPH:
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT_RANGE(ucp_Zl, ucp_Zs));
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
|
||||
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
|
||||
jump = JUMP(SLJIT_ZERO);
|
||||
|
||||
c = charoffset;
|
||||
/* In case of ucp_Cf, we overwrite the result. */
|
||||
SET_CHAR_OFFSET(0x2066);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
|
||||
|
||||
/* Restore charoffset. */
|
||||
SET_CHAR_OFFSET(c);
|
||||
|
||||
JUMPHERE(jump);
|
||||
jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
|
||||
break;
|
||||
|
||||
case PT_PXPRINT:
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT2(ucp_Zl, ucp_Zp));
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
|
||||
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
|
||||
jump = JUMP(SLJIT_ZERO);
|
||||
|
||||
c = charoffset;
|
||||
/* In case of ucp_Cf, we overwrite the result. */
|
||||
SET_CHAR_OFFSET(0x2066);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
|
||||
|
||||
/* Restore charoffset. */
|
||||
SET_CHAR_OFFSET(c);
|
||||
|
||||
JUMPHERE(jump);
|
||||
jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
|
||||
break;
|
||||
|
||||
case PT_PXPUNCT:
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Sc, ucp_So));
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
|
||||
|
||||
SET_CHAR_OFFSET(0);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f);
|
||||
OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Pc, ucp_Ps));
|
||||
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
|
||||
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
|
||||
break;
|
||||
|
||||
case PT_PXXDIGIT:
|
||||
SET_CHAR_OFFSET(CHAR_A);
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, ~0x20);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP2, 0, SLJIT_IMM, CHAR_F - CHAR_A);
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
|
||||
SET_CHAR_OFFSET(CHAR_0);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_9 - CHAR_0);
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
|
||||
SET_CHAR_OFFSET(0xff10);
|
||||
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff10);
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff19 - 0xff10);
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
|
||||
SET_CHAR_OFFSET(0xff21);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff26 - 0xff21);
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
|
||||
SET_CHAR_OFFSET(0xff41);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff41);
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
|
||||
SET_CHAR_OFFSET(0xff10);
|
||||
|
||||
JUMPHERE(jump);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
|
||||
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
|
||||
break;
|
||||
|
||||
default:
|
||||
SLJIT_UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
|
||||
cc += 2;
|
||||
|
||||
if (jump != NULL)
|
||||
add_jump(compiler, compares > 0 ? list : backtracks, jump);
|
||||
}
|
||||
|
||||
if (compares == 0)
|
||||
{
|
||||
if (found != NULL)
|
||||
set_jumps(found, LABEL());
|
||||
|
||||
if (status & XCLASS_IS_ECLASS)
|
||||
OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||
return;
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
SLJIT_ASSERT(compares == 1);
|
||||
ranges.range_count = 0;
|
||||
ranges.ranges = ranges.local_ranges;
|
||||
ranges.stack = ranges.local_stack;
|
||||
|
||||
xclass_compute_ranges(common, cc, &ranges);
|
||||
|
||||
/* Memory error is set for the compiler. */
|
||||
if (ranges.stack == NULL)
|
||||
return;
|
||||
|
||||
#if (defined SLJIT_DEBUG && SLJIT_DEBUG) && \
|
||||
defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
|
||||
if (common->utf)
|
||||
{
|
||||
min = READ_CHAR_MAX;
|
||||
max = 0;
|
||||
xclass_update_min_max(common, cc, &min, &max);
|
||||
SLJIT_ASSERT(ranges.ranges[0] == min && ranges.ranges[ranges.range_count - 1] == max);
|
||||
}
|
||||
#endif /* SLJIT_DEBUG && SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
|
||||
|
||||
invertcmp = (list != backtracks);
|
||||
|
||||
if (ranges.range_count == 2)
|
||||
{
|
||||
range_start = ranges.ranges[0];
|
||||
range_end = ranges.ranges[1];
|
||||
|
||||
if (range_start < range_end)
|
||||
{
|
||||
SET_CHAR_OFFSET(range_start);
|
||||
jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start));
|
||||
}
|
||||
else
|
||||
jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_start - charoffset));
|
||||
|
||||
add_jump(compiler, backtracks, jump);
|
||||
|
||||
SLJIT_ASSERT(ranges.stack == ranges.local_stack);
|
||||
if (found != NULL)
|
||||
set_jumps(found, LABEL());
|
||||
|
||||
if (status & XCLASS_IS_ECLASS)
|
||||
OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
range_start = ranges.ranges[0];
|
||||
SET_CHAR_OFFSET(range_start);
|
||||
if (ranges.range_count >= 6)
|
||||
{
|
||||
/* Early fail. */
|
||||
range_end = ranges.ranges[ranges.range_count - 1];
|
||||
add_jump(compiler, (flags & XCL_NOT) == 0 ? backtracks : &found,
|
||||
CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start)));
|
||||
}
|
||||
|
||||
depth = 0;
|
||||
first_item = 0;
|
||||
last_item = ranges.range_count - 2;
|
||||
has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV) != 0;
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
/* At least two items are present. */
|
||||
SLJIT_ASSERT(first_item < last_item && charoffset == ranges.ranges[0]);
|
||||
last_range_set = FALSE;
|
||||
|
||||
if (first_item + 6 <= last_item)
|
||||
{
|
||||
mid_item = ((first_item + last_item) >> 1) & ~(sljit_u32)1;
|
||||
SLJIT_ASSERT(last_item >= mid_item + 4);
|
||||
|
||||
range_end = ranges.ranges[mid_item + 1];
|
||||
if (first_item + 6 > mid_item && ranges.ranges[mid_item] == range_end)
|
||||
{
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_GREATER | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - charoffset));
|
||||
ranges.stack[depth].jump = JUMP(SLJIT_GREATER);
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
|
||||
last_range_set = TRUE;
|
||||
}
|
||||
else
|
||||
ranges.stack[depth].jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - charoffset));
|
||||
|
||||
ranges.stack[depth].first_item = (sljit_u32)(mid_item + 2);
|
||||
ranges.stack[depth].last_item = (sljit_u32)last_item;
|
||||
|
||||
depth++;
|
||||
SLJIT_ASSERT(ranges.stack == ranges.local_stack ?
|
||||
depth <= XCLASS_LOCAL_RANGES_LOG2_SIZE : (ranges.stack + depth) <= (xclass_stack_item*)ranges.ranges);
|
||||
|
||||
last_item = mid_item;
|
||||
if (!last_range_set)
|
||||
continue;
|
||||
|
||||
last_item -= 2;
|
||||
}
|
||||
|
||||
if (!last_range_set)
|
||||
{
|
||||
range_start = ranges.ranges[first_item];
|
||||
range_end = ranges.ranges[first_item + 1];
|
||||
|
||||
if (range_start < range_end)
|
||||
{
|
||||
SET_CHAR_OFFSET(range_start);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start));
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
}
|
||||
else
|
||||
{
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_start - charoffset));
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
|
||||
}
|
||||
first_item += 2;
|
||||
}
|
||||
|
||||
SLJIT_ASSERT(first_item <= last_item);
|
||||
|
||||
do
|
||||
{
|
||||
range_start = ranges.ranges[first_item];
|
||||
range_end = ranges.ranges[first_item + 1];
|
||||
|
||||
if (range_start < range_end)
|
||||
{
|
||||
SET_CHAR_OFFSET(range_start);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start));
|
||||
|
||||
if (has_cmov)
|
||||
SELECT(SLJIT_LESS_EQUAL, TMP2, STR_END, 0, TMP2);
|
||||
else
|
||||
OP_FLAGS(SLJIT_OR | ((first_item == last_item) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
}
|
||||
else
|
||||
{
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_start - charoffset));
|
||||
|
||||
if (has_cmov)
|
||||
SELECT(SLJIT_EQUAL, TMP2, STR_END, 0, TMP2);
|
||||
else
|
||||
OP_FLAGS(SLJIT_OR | ((first_item == last_item) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
|
||||
}
|
||||
|
||||
first_item += 2;
|
||||
}
|
||||
while (first_item <= last_item);
|
||||
|
||||
if (depth == 0) break;
|
||||
|
||||
add_jump(compiler, &check_result, JUMP(SLJIT_JUMP));
|
||||
|
||||
/* The charoffset resets after the end of a branch is reached. */
|
||||
charoffset = ranges.ranges[0];
|
||||
depth--;
|
||||
first_item = ranges.stack[depth].first_item;
|
||||
last_item = ranges.stack[depth].last_item;
|
||||
JUMPHERE(ranges.stack[depth].jump);
|
||||
}
|
||||
|
||||
if (check_result != NULL)
|
||||
set_jumps(check_result, LABEL());
|
||||
|
||||
if (has_cmov)
|
||||
jump = CMP(SLJIT_NOT_EQUAL ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
|
||||
else
|
||||
{
|
||||
sljit_set_current_flags(compiler, SLJIT_SET_Z);
|
||||
jump = JUMP(SLJIT_NOT_EQUAL ^ invertcmp);
|
||||
}
|
||||
|
||||
add_jump(compiler, backtracks, jump);
|
||||
|
||||
if (found != NULL)
|
||||
set_jumps(found, LABEL());
|
||||
|
||||
if (status & XCLASS_IS_ECLASS)
|
||||
OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||
|
||||
if (ranges.stack != ranges.local_stack)
|
||||
SLJIT_FREE(ranges.stack, compiler->allocator_data);
|
||||
}
|
||||
|
||||
static PCRE2_SPTR compile_eclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
PCRE2_SPTR end = cc + GET(cc, 0) - 1;
|
||||
PCRE2_SPTR begin;
|
||||
jump_list *not_found;
|
||||
jump_list *found = NULL;
|
||||
|
||||
cc += LINK_SIZE;
|
||||
|
||||
/* Should be optimized later. */
|
||||
read_char(common, 0, READ_CHAR_MAX, backtracks, 0);
|
||||
|
||||
if (((*cc++) & ECL_MAP) != 0)
|
||||
{
|
||||
xclass_check_bitset(common, (const sljit_u8 *)cc, &found, backtracks);
|
||||
cc += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
|
||||
begin = cc;
|
||||
|
||||
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, ECLASS_CHAR_DATA, 0);
|
||||
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, ECLASS_STACK_DATA, 0);
|
||||
OP1(SLJIT_MOV, ECLASS_STACK_DATA, 0, SLJIT_IMM, 0);
|
||||
OP1(SLJIT_MOV, ECLASS_CHAR_DATA, 0, TMP1, 0);
|
||||
|
||||
/* All eclass must start with an xclass. */
|
||||
SLJIT_ASSERT(*cc == ECL_XCLASS);
|
||||
|
||||
while (cc < end)
|
||||
{
|
||||
switch (*cc)
|
||||
{
|
||||
case ECL_AND:
|
||||
++cc;
|
||||
OP2(SLJIT_OR, TMP2, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, ~(sljit_sw)1);
|
||||
OP2(SLJIT_LSHR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||
OP2(SLJIT_AND, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, TMP2, 0);
|
||||
break;
|
||||
|
||||
case ECL_OR:
|
||||
++cc;
|
||||
OP2(SLJIT_AND, TMP2, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||
OP2(SLJIT_LSHR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||
OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, TMP2, 0);
|
||||
break;
|
||||
|
||||
case ECL_XOR:
|
||||
++cc;
|
||||
OP2(SLJIT_AND, TMP2, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||
OP2(SLJIT_LSHR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||
OP2(SLJIT_XOR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, TMP2, 0);
|
||||
break;
|
||||
|
||||
case ECL_NOT:
|
||||
++cc;
|
||||
OP2(SLJIT_XOR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||
break;
|
||||
|
||||
default:
|
||||
SLJIT_ASSERT(*cc == ECL_XCLASS);
|
||||
if (cc != begin)
|
||||
{
|
||||
OP1(SLJIT_MOV, TMP1, 0, ECLASS_CHAR_DATA, 0);
|
||||
OP2(SLJIT_SHL, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||
}
|
||||
|
||||
not_found = NULL;
|
||||
compile_xclass_matchingpath(common, cc + 1 + LINK_SIZE, ¬_found, XCLASS_IS_ECLASS);
|
||||
set_jumps(not_found, LABEL());
|
||||
|
||||
cc += GET(cc, 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, ECLASS_STACK_DATA, 0, SLJIT_IMM, 0);
|
||||
OP1(SLJIT_MOV, ECLASS_CHAR_DATA, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
|
||||
OP1(SLJIT_MOV, ECLASS_STACK_DATA, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
|
||||
add_jump(compiler, backtracks, JUMP(SLJIT_EQUAL));
|
||||
set_jumps(found, LABEL());
|
||||
return end;
|
||||
}
|
||||
|
||||
/* Generic character matching code. */
|
||||
|
||||
#undef SET_CHAR_OFFSET
|
||||
#undef READ_FROM_CHAR_LIST
|
||||
#undef XCLASS_LOCAL_RANGES_SIZE
|
||||
#undef XCLASS_LOCAL_RANGES_LOG2_SIZE
|
||||
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
|
||||
static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
|
||||
compare_context *context, jump_list **backtracks)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
unsigned int othercasebit = 0;
|
||||
PCRE2_SPTR othercasechar = NULL;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
int utflength;
|
||||
#endif
|
||||
|
||||
if (caseless && char_has_othercase(common, cc))
|
||||
{
|
||||
othercasebit = char_get_othercase_bit(common, cc);
|
||||
SLJIT_ASSERT(othercasebit);
|
||||
/* Extracting bit difference info. */
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
othercasechar = cc + (othercasebit >> 8);
|
||||
othercasebit &= 0xff;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
|
||||
/* Note that this code only handles characters in the BMP. If there
|
||||
ever are characters outside the BMP whose othercase differs in only one
|
||||
bit from itself (there currently are none), this code will need to be
|
||||
revised for PCRE2_CODE_UNIT_WIDTH == 32. */
|
||||
othercasechar = cc + (othercasebit >> 9);
|
||||
if ((othercasebit & 0x100) != 0)
|
||||
othercasebit = (othercasebit & 0xff) << 8;
|
||||
else
|
||||
othercasebit &= 0xff;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
|
||||
}
|
||||
|
||||
if (context->sourcereg == -1)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
|
||||
if (context->length >= 4)
|
||||
OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||
else if (context->length >= 2)
|
||||
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||
else
|
||||
#endif
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
|
||||
if (context->length >= 4)
|
||||
OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||
else
|
||||
#endif
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
|
||||
context->sourcereg = TMP2;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
utflength = 1;
|
||||
if (common->utf && HAS_EXTRALEN(*cc))
|
||||
utflength += GET_EXTRALEN(*cc);
|
||||
|
||||
do
|
||||
{
|
||||
#endif
|
||||
|
||||
context->length -= IN_UCHARS(1);
|
||||
#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
|
||||
|
||||
/* Unaligned read is supported. */
|
||||
if (othercasebit != 0 && othercasechar == cc)
|
||||
{
|
||||
context->c.asuchars[context->ucharptr] = *cc | othercasebit;
|
||||
context->oc.asuchars[context->ucharptr] = othercasebit;
|
||||
}
|
||||
else
|
||||
{
|
||||
context->c.asuchars[context->ucharptr] = *cc;
|
||||
context->oc.asuchars[context->ucharptr] = 0;
|
||||
}
|
||||
context->ucharptr++;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
|
||||
#else
|
||||
if (context->ucharptr >= 2 || context->length == 0)
|
||||
#endif
|
||||
{
|
||||
if (context->length >= 4)
|
||||
OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||
else if (context->length >= 2)
|
||||
OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
else if (context->length >= 1)
|
||||
OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
|
||||
|
||||
switch(context->ucharptr)
|
||||
{
|
||||
case 4 / sizeof(PCRE2_UCHAR):
|
||||
if (context->oc.asint != 0)
|
||||
OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
|
||||
break;
|
||||
|
||||
case 2 / sizeof(PCRE2_UCHAR):
|
||||
if (context->oc.asushort != 0)
|
||||
OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
|
||||
break;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
case 1:
|
||||
if (context->oc.asbyte != 0)
|
||||
OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
SLJIT_UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
context->ucharptr = 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Unaligned read is unsupported or in 32 bit mode. */
|
||||
if (context->length >= 1)
|
||||
OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||
|
||||
context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
|
||||
|
||||
if (othercasebit != 0 && othercasechar == cc)
|
||||
{
|
||||
OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
|
||||
}
|
||||
else
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
|
||||
|
||||
#endif
|
||||
|
||||
cc++;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
utflength--;
|
||||
}
|
||||
while (utflength > 0);
|
||||
#endif
|
||||
|
||||
return cc;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
|
||||
/* The code in this function copies the logic of the interpreter function that
|
||||
is defined in the pcre2_extuni.c source. If that code is updated, this
|
||||
function, and those below it, must be kept in step (note by PH, June 2024). */
|
||||
|
||||
static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
|
||||
{
|
||||
PCRE2_SPTR start_subject = args->begin;
|
||||
PCRE2_SPTR end_subject = args->end;
|
||||
int lgb, rgb, ricount;
|
||||
PCRE2_SPTR prevcc, endcc, bptr;
|
||||
BOOL first = TRUE;
|
||||
BOOL was_ep_ZWJ = FALSE;
|
||||
uint32_t c;
|
||||
|
||||
prevcc = cc;
|
||||
endcc = NULL;
|
||||
do
|
||||
{
|
||||
GETCHARINC(c, cc);
|
||||
rgb = UCD_GRAPHBREAK(c);
|
||||
|
||||
if (first)
|
||||
{
|
||||
lgb = rgb;
|
||||
endcc = cc;
|
||||
first = FALSE;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
|
||||
break;
|
||||
|
||||
/* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
|
||||
preceded by Extended Pictographic. */
|
||||
|
||||
if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
|
||||
break;
|
||||
|
||||
/* Not breaking between Regional Indicators is allowed only if there
|
||||
are an even number of preceding RIs. */
|
||||
|
||||
if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
|
||||
{
|
||||
ricount = 0;
|
||||
bptr = prevcc;
|
||||
|
||||
/* bptr is pointing to the left-hand character */
|
||||
while (bptr > start_subject)
|
||||
{
|
||||
bptr--;
|
||||
BACKCHAR(bptr);
|
||||
GETCHAR(c, bptr);
|
||||
|
||||
if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
|
||||
break;
|
||||
|
||||
ricount++;
|
||||
}
|
||||
|
||||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
||||
}
|
||||
|
||||
/* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
|
||||
between; see next statement). */
|
||||
|
||||
was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
|
||||
|
||||
/* If Extend follows Extended_Pictographic, do not update lgb; this allows
|
||||
any number of them before a following ZWJ. */
|
||||
|
||||
if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
|
||||
lgb = rgb;
|
||||
|
||||
prevcc = endcc;
|
||||
endcc = cc;
|
||||
}
|
||||
while (cc < end_subject);
|
||||
|
||||
return endcc;
|
||||
}
|
||||
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
|
||||
/* The code in this function copies the logic of the interpreter function that
|
||||
is defined in the pcre2_extuni.c source. If that code is updated, this
|
||||
function, and the one below it, must be kept in step (note by PH, June 2024). */
|
||||
|
||||
static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
|
||||
{
|
||||
PCRE2_SPTR start_subject = args->begin;
|
||||
PCRE2_SPTR end_subject = args->end;
|
||||
int lgb, rgb, ricount;
|
||||
PCRE2_SPTR prevcc, endcc, bptr;
|
||||
BOOL first = TRUE;
|
||||
BOOL was_ep_ZWJ = FALSE;
|
||||
uint32_t c;
|
||||
|
||||
prevcc = cc;
|
||||
endcc = NULL;
|
||||
do
|
||||
{
|
||||
GETCHARINC_INVALID(c, cc, end_subject, break);
|
||||
rgb = UCD_GRAPHBREAK(c);
|
||||
|
||||
if (first)
|
||||
{
|
||||
lgb = rgb;
|
||||
endcc = cc;
|
||||
first = FALSE;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
|
||||
break;
|
||||
|
||||
/* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
|
||||
preceded by Extended Pictographic. */
|
||||
|
||||
if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
|
||||
break;
|
||||
|
||||
/* Not breaking between Regional Indicators is allowed only if there
|
||||
are an even number of preceding RIs. */
|
||||
|
||||
if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
|
||||
{
|
||||
ricount = 0;
|
||||
bptr = prevcc;
|
||||
|
||||
/* bptr is pointing to the left-hand character */
|
||||
while (bptr > start_subject)
|
||||
{
|
||||
GETCHARBACK_INVALID(c, bptr, start_subject, break);
|
||||
|
||||
if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
|
||||
break;
|
||||
|
||||
ricount++;
|
||||
}
|
||||
|
||||
if ((ricount & 1) != 0)
|
||||
break; /* Grapheme break required */
|
||||
}
|
||||
|
||||
/* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
|
||||
between; see next statement). */
|
||||
|
||||
was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
|
||||
|
||||
/* If Extend follows Extended_Pictographic, do not update lgb; this allows
|
||||
any number of them before a following ZWJ. */
|
||||
|
||||
if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
|
||||
lgb = rgb;
|
||||
|
||||
prevcc = endcc;
|
||||
endcc = cc;
|
||||
}
|
||||
while (cc < end_subject);
|
||||
|
||||
return endcc;
|
||||
}
|
||||
|
||||
/* The code in this function copies the logic of the interpreter function that
|
||||
is defined in the pcre2_extuni.c source. If that code is updated, this
|
||||
function must be kept in step (note by PH, June 2024). */
|
||||
|
||||
static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
|
||||
{
|
||||
PCRE2_SPTR start_subject = args->begin;
|
||||
PCRE2_SPTR end_subject = args->end;
|
||||
int lgb, rgb, ricount;
|
||||
PCRE2_SPTR bptr;
|
||||
uint32_t c;
|
||||
BOOL was_ep_ZWJ = FALSE;
|
||||
|
||||
/* Patch by PH */
|
||||
/* GETCHARINC(c, cc); */
|
||||
c = *cc++;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (c >= 0x110000)
|
||||
return cc;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||
lgb = UCD_GRAPHBREAK(c);
|
||||
|
||||
while (cc < end_subject)
|
||||
{
|
||||
c = *cc;
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (c >= 0x110000)
|
||||
break;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||
rgb = UCD_GRAPHBREAK(c);
|
||||
|
||||
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
|
||||
break;
|
||||
|
||||
/* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
|
||||
preceded by Extended Pictographic. */
|
||||
|
||||
if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
|
||||
break;
|
||||
|
||||
/* Not breaking between Regional Indicators is allowed only if there
|
||||
are an even number of preceding RIs. */
|
||||
|
||||
if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
|
||||
{
|
||||
ricount = 0;
|
||||
bptr = cc - 1;
|
||||
|
||||
/* bptr is pointing to the left-hand character */
|
||||
while (bptr > start_subject)
|
||||
{
|
||||
bptr--;
|
||||
c = *bptr;
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (c >= 0x110000)
|
||||
break;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||
|
||||
if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
|
||||
|
||||
ricount++;
|
||||
}
|
||||
|
||||
if ((ricount & 1) != 0)
|
||||
break; /* Grapheme break required */
|
||||
}
|
||||
|
||||
/* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
|
||||
between; see next statement). */
|
||||
|
||||
was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
|
||||
|
||||
/* If Extend follows Extended_Pictographic, do not update lgb; this allows
|
||||
any number of them before a following ZWJ. */
|
||||
|
||||
if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
|
||||
lgb = rgb;
|
||||
|
||||
cc++;
|
||||
}
|
||||
|
||||
return cc;
|
||||
}
|
||||
|
||||
static void compile_clist(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
const sljit_u32 *other_cases;
|
||||
struct sljit_jump *jump;
|
||||
sljit_u32 min = 0, max = READ_CHAR_MAX;
|
||||
BOOL has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV) != 0;
|
||||
|
||||
SLJIT_ASSERT(cc[1] == PT_CLIST);
|
||||
|
||||
if (cc[0] == OP_PROP)
|
||||
{
|
||||
other_cases = PRIV(ucd_caseless_sets) + cc[2];
|
||||
|
||||
min = *other_cases++;
|
||||
max = min;
|
||||
|
||||
while (*other_cases != NOTACHAR)
|
||||
{
|
||||
if (*other_cases > max) max = *other_cases;
|
||||
if (*other_cases < min) min = *other_cases;
|
||||
other_cases++;
|
||||
}
|
||||
}
|
||||
|
||||
other_cases = PRIV(ucd_caseless_sets) + cc[2];
|
||||
SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR);
|
||||
/* The NOTACHAR is higher than any character. */
|
||||
SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
|
||||
|
||||
read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||
|
||||
/* At least two characters are required.
|
||||
Otherwise this case would be handled by the normal code path. */
|
||||
/* NOTACHAR is the unsigned maximum. */
|
||||
|
||||
/* Optimizing character pairs, if their difference is power of 2. */
|
||||
if (is_powerof2(other_cases[1] ^ other_cases[0]))
|
||||
{
|
||||
OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[1] ^ other_cases[0]));
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]);
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
|
||||
other_cases += 2;
|
||||
}
|
||||
else if (is_powerof2(other_cases[2] ^ other_cases[1]))
|
||||
{
|
||||
SLJIT_ASSERT(other_cases[2] != NOTACHAR);
|
||||
|
||||
OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[2] ^ other_cases[1]));
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]);
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)other_cases[0]);
|
||||
|
||||
if (has_cmov)
|
||||
SELECT(SLJIT_EQUAL, TMP2, STR_END, 0, TMP2);
|
||||
else
|
||||
OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
|
||||
|
||||
other_cases += 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++));
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
|
||||
}
|
||||
|
||||
while (*other_cases != NOTACHAR)
|
||||
{
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++));
|
||||
|
||||
if (has_cmov)
|
||||
SELECT(SLJIT_EQUAL, TMP2, STR_END, 0, TMP2);
|
||||
else
|
||||
OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
|
||||
}
|
||||
|
||||
if (has_cmov)
|
||||
jump = CMP(cc[0] == OP_PROP ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
|
||||
else
|
||||
jump = JUMP(cc[0] == OP_PROP ? SLJIT_ZERO : SLJIT_NOT_ZERO);
|
||||
|
||||
add_jump(compiler, backtracks, jump);
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
int length;
|
||||
unsigned int c, oc, bit;
|
||||
compare_context context;
|
||||
struct sljit_jump *jump[3];
|
||||
jump_list *end_list;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
PCRE2_UCHAR propdata[5];
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
switch(type)
|
||||
{
|
||||
case OP_NOT_DIGIT:
|
||||
case OP_DIGIT:
|
||||
/* Digits are usually 0-9, so it is worth to optimize them. */
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
|
||||
read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
|
||||
else
|
||||
#endif
|
||||
read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
|
||||
/* Flip the starting bit in the negative case. */
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit);
|
||||
add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
|
||||
return cc;
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
case OP_WHITESPACE:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
|
||||
read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
|
||||
else
|
||||
#endif
|
||||
read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space);
|
||||
add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
|
||||
return cc;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
case OP_WORDCHAR:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
|
||||
read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
|
||||
else
|
||||
#endif
|
||||
read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word);
|
||||
add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
|
||||
return cc;
|
||||
|
||||
case OP_ANY:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
|
||||
{
|
||||
jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
|
||||
end_list = NULL;
|
||||
if (common->mode != PCRE2_JIT_PARTIAL_HARD)
|
||||
add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
else
|
||||
check_str_end(common, &end_list);
|
||||
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
|
||||
set_jumps(end_list, LABEL());
|
||||
JUMPHERE(jump[0]);
|
||||
}
|
||||
else
|
||||
check_newlinechar(common, common->nltype, backtracks, TRUE);
|
||||
return cc;
|
||||
|
||||
case OP_ALLANY:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (common->utf && common->invalid_utf)
|
||||
{
|
||||
read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||
return cc;
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
skip_valid_char(common);
|
||||
return cc;
|
||||
|
||||
case OP_ANYBYTE:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
return cc;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case OP_NOTPROP:
|
||||
case OP_PROP:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
if (cc[0] == PT_CLIST)
|
||||
{
|
||||
compile_clist(common, cc - 1, backtracks);
|
||||
return cc + 2;
|
||||
}
|
||||
|
||||
propdata[0] = 0;
|
||||
propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
|
||||
propdata[2] = cc[0];
|
||||
propdata[3] = cc[1];
|
||||
propdata[4] = XCL_END;
|
||||
compile_xclass_matchingpath(common, propdata, backtracks, 0);
|
||||
return cc + 2;
|
||||
#endif
|
||||
|
||||
case OP_ANYNL:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
|
||||
jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
|
||||
/* We don't need to handle soft partial matching case. */
|
||||
end_list = NULL;
|
||||
if (common->mode != PCRE2_JIT_PARTIAL_HARD)
|
||||
add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
else
|
||||
check_str_end(common, &end_list);
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);
|
||||
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
|
||||
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
|
||||
#endif
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||
jump[1] = JUMP(SLJIT_JUMP);
|
||||
JUMPHERE(jump[0]);
|
||||
check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
|
||||
set_jumps(end_list, LABEL());
|
||||
JUMPHERE(jump[1]);
|
||||
return cc;
|
||||
|
||||
case OP_NOT_HSPACE:
|
||||
case OP_HSPACE:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
|
||||
if (type == OP_NOT_HSPACE)
|
||||
read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||
else
|
||||
read_char(common, 0x9, 0x3000, NULL, 0);
|
||||
|
||||
add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
|
||||
sljit_set_current_flags(compiler, SLJIT_SET_Z);
|
||||
add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
|
||||
return cc;
|
||||
|
||||
case OP_NOT_VSPACE:
|
||||
case OP_VSPACE:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
|
||||
if (type == OP_NOT_VSPACE)
|
||||
read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||
else
|
||||
read_char(common, 0xa, 0x2029, NULL, 0);
|
||||
|
||||
add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
|
||||
sljit_set_current_flags(compiler, SLJIT_SET_Z);
|
||||
add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
|
||||
return cc;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case OP_EXTUNI:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
|
||||
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
|
||||
OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
|
||||
common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
|
||||
if (common->invalid_utf)
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
|
||||
#else
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
|
||||
common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
|
||||
if (common->invalid_utf)
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
|
||||
#endif
|
||||
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
|
||||
|
||||
if (common->mode == PCRE2_JIT_PARTIAL_HARD)
|
||||
{
|
||||
jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
|
||||
/* Since we successfully read a char above, partial matching must occur. */
|
||||
check_partial(common, TRUE);
|
||||
JUMPHERE(jump[0]);
|
||||
}
|
||||
return cc;
|
||||
#endif
|
||||
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
length = 1;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
|
||||
#endif
|
||||
|
||||
if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
|
||||
detect_partial_match(common, backtracks);
|
||||
|
||||
if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
|
||||
{
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
|
||||
if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
context.length = IN_UCHARS(length);
|
||||
context.sourcereg = -1;
|
||||
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
|
||||
context.ucharptr = 0;
|
||||
#endif
|
||||
return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (common->utf)
|
||||
{
|
||||
GETCHAR(c, cc);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
c = *cc;
|
||||
|
||||
SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
|
||||
|
||||
if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
oc = char_othercase(common, c);
|
||||
read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
|
||||
|
||||
SLJIT_ASSERT(!is_powerof2(c ^ oc));
|
||||
|
||||
if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
|
||||
{
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc);
|
||||
SELECT(SLJIT_EQUAL, TMP1, SLJIT_IMM, c, TMP1);
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
|
||||
}
|
||||
else
|
||||
{
|
||||
jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
|
||||
JUMPHERE(jump[0]);
|
||||
}
|
||||
return cc + length;
|
||||
|
||||
case OP_NOT:
|
||||
case OP_NOTI:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
|
||||
length = 1;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (common->utf)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
c = *cc;
|
||||
if (c < 128 && !common->invalid_utf)
|
||||
{
|
||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
|
||||
if (type == OP_NOT || !char_has_othercase(common, cc))
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
|
||||
else
|
||||
{
|
||||
/* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
|
||||
OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
|
||||
}
|
||||
/* Skip the variable-length character. */
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||
JUMPHERE(jump[0]);
|
||||
return cc + 1;
|
||||
}
|
||||
else
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
{
|
||||
GETCHARLEN(c, cc, length);
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
c = *cc;
|
||||
|
||||
if (type == OP_NOT || !char_has_othercase(common, cc))
|
||||
{
|
||||
read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
|
||||
}
|
||||
else
|
||||
{
|
||||
oc = char_othercase(common, c);
|
||||
read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||
bit = c ^ oc;
|
||||
if (is_powerof2(bit))
|
||||
{
|
||||
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
|
||||
}
|
||||
else
|
||||
{
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
|
||||
}
|
||||
}
|
||||
return cc + length;
|
||||
|
||||
case OP_CLASS:
|
||||
case OP_NCLASS:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
|
||||
if (type == OP_NCLASS)
|
||||
read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||
else
|
||||
read_char(common, 0, bit, NULL, 0);
|
||||
#else
|
||||
if (type == OP_NCLASS)
|
||||
read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||
else
|
||||
read_char(common, 0, 255, NULL, 0);
|
||||
#endif
|
||||
|
||||
if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
|
||||
return cc + 32 / sizeof(PCRE2_UCHAR);
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
jump[0] = NULL;
|
||||
if (common->utf)
|
||||
{
|
||||
jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
|
||||
if (type == OP_CLASS)
|
||||
{
|
||||
add_jump(compiler, backtracks, jump[0]);
|
||||
jump[0] = NULL;
|
||||
}
|
||||
}
|
||||
#elif PCRE2_CODE_UNIT_WIDTH != 8
|
||||
jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
|
||||
if (type == OP_CLASS)
|
||||
{
|
||||
add_jump(compiler, backtracks, jump[0]);
|
||||
jump[0] = NULL;
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
|
||||
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
|
||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
|
||||
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
|
||||
add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
|
||||
|
||||
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (jump[0] != NULL)
|
||||
JUMPHERE(jump[0]);
|
||||
#endif
|
||||
return cc + 32 / sizeof(PCRE2_UCHAR);
|
||||
|
||||
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
|
||||
case OP_XCLASS:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks, 0);
|
||||
return cc + GET(cc, 0) - 1;
|
||||
|
||||
case OP_ECLASS:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
return compile_eclass_matchingpath(common, cc, backtracks);
|
||||
#endif
|
||||
}
|
||||
SLJIT_UNREACHABLE();
|
||||
return cc;
|
||||
}
|
||||
|
||||
static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
|
||||
{
|
||||
/* This function consumes at least one input character. */
|
||||
/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
|
||||
DEFINE_COMPILER;
|
||||
PCRE2_SPTR ccbegin = cc;
|
||||
compare_context context;
|
||||
int size;
|
||||
|
||||
context.length = 0;
|
||||
do
|
||||
{
|
||||
if (cc >= ccend)
|
||||
break;
|
||||
|
||||
if (*cc == OP_CHAR)
|
||||
{
|
||||
size = 1;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (common->utf && HAS_EXTRALEN(cc[1]))
|
||||
size += GET_EXTRALEN(cc[1]);
|
||||
#endif
|
||||
}
|
||||
else if (*cc == OP_CHARI)
|
||||
{
|
||||
size = 1;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (common->utf)
|
||||
{
|
||||
if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
|
||||
size = 0;
|
||||
else if (HAS_EXTRALEN(cc[1]))
|
||||
size += GET_EXTRALEN(cc[1]);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
|
||||
size = 0;
|
||||
}
|
||||
else
|
||||
size = 0;
|
||||
|
||||
cc += 1 + size;
|
||||
context.length += IN_UCHARS(size);
|
||||
}
|
||||
while (size > 0 && context.length <= 128);
|
||||
|
||||
cc = ccbegin;
|
||||
if (context.length > 0)
|
||||
{
|
||||
/* We have a fixed-length byte sequence. */
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
context.sourcereg = -1;
|
||||
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
|
||||
context.ucharptr = 0;
|
||||
#endif
|
||||
do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
|
||||
return cc;
|
||||
}
|
||||
|
||||
/* A non-fixed length character will be checked if length == 0. */
|
||||
return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,200 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2023 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE
|
||||
#error This file must be included from pcre2_jit_compile.c.
|
||||
#endif
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer)
|
||||
#include <sanitizer/msan_interface.h>
|
||||
#endif /* __has_feature(memory_sanitizer) */
|
||||
#endif /* defined(__has_feature) */
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
|
||||
static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, jit_function executable_func)
|
||||
{
|
||||
sljit_u8 local_space[MACHINE_STACK_SIZE];
|
||||
struct sljit_stack local_stack;
|
||||
|
||||
local_stack.min_start = local_space;
|
||||
local_stack.start = local_space;
|
||||
local_stack.end = local_space + MACHINE_STACK_SIZE;
|
||||
local_stack.top = local_space + MACHINE_STACK_SIZE;
|
||||
arguments->stack = &local_stack;
|
||||
return executable_func(arguments);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Do a JIT pattern match *
|
||||
*************************************************/
|
||||
|
||||
/* This function runs a JIT pattern match.
|
||||
|
||||
Arguments:
|
||||
code points to the compiled expression
|
||||
subject points to the subject string
|
||||
length length of subject string (may contain binary zeros)
|
||||
start_offset where to start in the subject string
|
||||
options option bits
|
||||
match_data points to a match_data block
|
||||
mcontext points to a match context
|
||||
|
||||
Returns: > 0 => success; value is the number of ovector pairs filled
|
||||
= 0 => success, but ovector is not big enough
|
||||
-1 => failed to match (PCRE2_ERROR_NOMATCH)
|
||||
< -1 => some kind of unexpected problem
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
|
||||
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
|
||||
pcre2_match_context *mcontext)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
|
||||
(void)code;
|
||||
(void)subject;
|
||||
(void)length;
|
||||
(void)start_offset;
|
||||
(void)options;
|
||||
(void)match_data;
|
||||
(void)mcontext;
|
||||
return PCRE2_ERROR_JIT_BADOPTION;
|
||||
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
pcre2_real_code *re = (pcre2_real_code *)code;
|
||||
executable_functions *functions = (executable_functions *)re->executable_jit;
|
||||
pcre2_jit_stack *jit_stack;
|
||||
uint32_t oveccount = match_data->oveccount;
|
||||
uint32_t max_oveccount;
|
||||
union {
|
||||
void *executable_func;
|
||||
jit_function call_executable_func;
|
||||
} convert_executable_func;
|
||||
jit_arguments arguments;
|
||||
int rc;
|
||||
int index = 0;
|
||||
|
||||
if ((options & PCRE2_PARTIAL_HARD) != 0)
|
||||
index = 2;
|
||||
else if ((options & PCRE2_PARTIAL_SOFT) != 0)
|
||||
index = 1;
|
||||
|
||||
if (functions == NULL || functions->executable_funcs[index] == NULL)
|
||||
return PCRE2_ERROR_JIT_BADOPTION;
|
||||
|
||||
/* Sanity checks should be handled by pcre2_match. */
|
||||
arguments.str = subject + start_offset;
|
||||
arguments.begin = subject;
|
||||
arguments.end = subject + length;
|
||||
arguments.match_data = match_data;
|
||||
arguments.startchar_ptr = subject;
|
||||
arguments.mark_ptr = NULL;
|
||||
arguments.options = options;
|
||||
|
||||
if (mcontext != NULL)
|
||||
{
|
||||
arguments.callout = mcontext->callout;
|
||||
arguments.callout_data = mcontext->callout_data;
|
||||
arguments.offset_limit = mcontext->offset_limit;
|
||||
arguments.limit_match = (mcontext->match_limit < re->limit_match)?
|
||||
mcontext->match_limit : re->limit_match;
|
||||
if (mcontext->jit_callback != NULL)
|
||||
jit_stack = mcontext->jit_callback(mcontext->jit_callback_data);
|
||||
else
|
||||
jit_stack = (pcre2_jit_stack *)mcontext->jit_callback_data;
|
||||
}
|
||||
else
|
||||
{
|
||||
arguments.callout = NULL;
|
||||
arguments.callout_data = NULL;
|
||||
arguments.offset_limit = PCRE2_UNSET;
|
||||
arguments.limit_match = (MATCH_LIMIT < re->limit_match)?
|
||||
MATCH_LIMIT : re->limit_match;
|
||||
jit_stack = NULL;
|
||||
}
|
||||
|
||||
|
||||
max_oveccount = functions->top_bracket;
|
||||
if (oveccount > max_oveccount)
|
||||
oveccount = max_oveccount;
|
||||
arguments.oveccount = oveccount << 1;
|
||||
|
||||
|
||||
convert_executable_func.executable_func = functions->executable_funcs[index];
|
||||
if (jit_stack != NULL)
|
||||
{
|
||||
arguments.stack = (struct sljit_stack *)(jit_stack->stack);
|
||||
rc = convert_executable_func.call_executable_func(&arguments);
|
||||
}
|
||||
else
|
||||
rc = jit_machine_stack_exec(&arguments, convert_executable_func.call_executable_func);
|
||||
|
||||
if (rc > (int)oveccount)
|
||||
rc = 0;
|
||||
match_data->code = re;
|
||||
match_data->subject = (rc >= 0 || rc == PCRE2_ERROR_PARTIAL)? subject : NULL;
|
||||
match_data->subject_length = length;
|
||||
match_data->rc = rc;
|
||||
match_data->startchar = arguments.startchar_ptr - subject;
|
||||
match_data->leftchar = 0;
|
||||
match_data->rightchar = 0;
|
||||
match_data->mark = arguments.mark_ptr;
|
||||
match_data->matchedby = PCRE2_MATCHEDBY_JIT;
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer)
|
||||
if (rc > 0)
|
||||
__msan_unpoison(match_data->ovector, 2 * rc * sizeof(match_data->ovector[0]));
|
||||
#endif /* __has_feature(memory_sanitizer) */
|
||||
#endif /* defined(__has_feature) */
|
||||
|
||||
return match_data->rc;
|
||||
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
/* End of pcre2_jit_match.c */
|
||||
@@ -1,234 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE
|
||||
#error This file must be included from pcre2_jit_compile.c.
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free JIT read-only data *
|
||||
*************************************************/
|
||||
|
||||
void
|
||||
PRIV(jit_free_rodata)(void *current, void *allocator_data)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)current;
|
||||
(void)allocator_data;
|
||||
#else /* SUPPORT_JIT */
|
||||
void *next;
|
||||
|
||||
SLJIT_UNUSED_ARG(allocator_data);
|
||||
|
||||
while (current != NULL)
|
||||
{
|
||||
next = *(void**)current;
|
||||
SLJIT_FREE(current, allocator_data);
|
||||
current = next;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
/*************************************************
|
||||
* Free JIT compiled code *
|
||||
*************************************************/
|
||||
|
||||
void
|
||||
PRIV(jit_free)(void *executable_jit, pcre2_memctl *memctl)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)executable_jit;
|
||||
(void)memctl;
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
executable_functions *functions = (executable_functions *)executable_jit;
|
||||
void *allocator_data = memctl;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
|
||||
{
|
||||
if (functions->executable_funcs[i] != NULL)
|
||||
sljit_free_code(functions->executable_funcs[i], NULL);
|
||||
PRIV(jit_free_rodata)(functions->read_only_data_heads[i], allocator_data);
|
||||
}
|
||||
|
||||
SLJIT_FREE(functions, allocator_data);
|
||||
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free unused JIT memory *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_free_unused_memory(pcre2_general_context *gcontext)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)gcontext; /* Suppress warning */
|
||||
#else /* SUPPORT_JIT */
|
||||
SLJIT_UNUSED_ARG(gcontext);
|
||||
#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
|
||||
sljit_free_unused_memory_exec();
|
||||
#endif /* SLJIT_EXECUTABLE_ALLOCATOR */
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Allocate a JIT stack *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_jit_stack * PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_stack_create(size_t startsize, size_t maxsize,
|
||||
pcre2_general_context *gcontext)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
|
||||
(void)gcontext;
|
||||
(void)startsize;
|
||||
(void)maxsize;
|
||||
return NULL;
|
||||
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
pcre2_jit_stack *jit_stack;
|
||||
|
||||
if (startsize == 0 || maxsize == 0 || maxsize > SIZE_MAX - STACK_GROWTH_RATE)
|
||||
return NULL;
|
||||
if (startsize > maxsize)
|
||||
startsize = maxsize;
|
||||
startsize = (startsize + STACK_GROWTH_RATE - 1) & (size_t)(~(STACK_GROWTH_RATE - 1));
|
||||
maxsize = (maxsize + STACK_GROWTH_RATE - 1) & (size_t)(~(STACK_GROWTH_RATE - 1));
|
||||
|
||||
jit_stack = PRIV(memctl_malloc)(sizeof(pcre2_real_jit_stack), (pcre2_memctl *)gcontext);
|
||||
if (jit_stack == NULL) return NULL;
|
||||
jit_stack->stack = sljit_allocate_stack(startsize, maxsize, &jit_stack->memctl);
|
||||
if (jit_stack->stack == NULL)
|
||||
{
|
||||
jit_stack->memctl.free(jit_stack, jit_stack->memctl.memory_data);
|
||||
return NULL;
|
||||
}
|
||||
return jit_stack;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Assign a JIT stack to a pattern *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_stack_assign(pcre2_match_context *mcontext, pcre2_jit_callback callback,
|
||||
void *callback_data)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)mcontext;
|
||||
(void)callback;
|
||||
(void)callback_data;
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
if (mcontext == NULL) return;
|
||||
mcontext->jit_callback = callback;
|
||||
mcontext->jit_callback_data = callback_data;
|
||||
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free a JIT stack *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_stack_free(pcre2_jit_stack *jit_stack)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)jit_stack;
|
||||
#else /* SUPPORT_JIT */
|
||||
if (jit_stack != NULL)
|
||||
{
|
||||
sljit_free_stack((struct sljit_stack *)(jit_stack->stack), &jit_stack->memctl);
|
||||
jit_stack->memctl.free(jit_stack, jit_stack->memctl.memory_data);
|
||||
}
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get target CPU type *
|
||||
*************************************************/
|
||||
|
||||
const char*
|
||||
PRIV(jit_get_target)(void)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
return "JIT is not supported";
|
||||
#else /* SUPPORT_JIT */
|
||||
return sljit_get_platform_name();
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get size of JIT code *
|
||||
*************************************************/
|
||||
|
||||
size_t
|
||||
PRIV(jit_get_size)(void *executable_jit)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)executable_jit;
|
||||
return 0;
|
||||
#else /* SUPPORT_JIT */
|
||||
sljit_uw *executable_sizes = ((executable_functions *)executable_jit)->executable_sizes;
|
||||
SLJIT_COMPILE_ASSERT(JIT_NUMBER_OF_COMPILE_MODES == 3, number_of_compile_modes_changed);
|
||||
return executable_sizes[0] + executable_sizes[1] + executable_sizes[2];
|
||||
#endif
|
||||
}
|
||||
|
||||
/* End of pcre2_jit_misc.c */
|
||||
@@ -1,354 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
This module by Zoltan Herczeg and Sebastian Pop
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
# if defined(FFCS)
|
||||
# if defined(FF_UTF)
|
||||
# define FF_FUN ffcs_utf
|
||||
# else
|
||||
# define FF_FUN ffcs
|
||||
# endif
|
||||
|
||||
# elif defined(FFCS_2)
|
||||
# if defined(FF_UTF)
|
||||
# define FF_FUN ffcs_2_utf
|
||||
# else
|
||||
# define FF_FUN ffcs_2
|
||||
# endif
|
||||
|
||||
# elif defined(FFCS_MASK)
|
||||
# if defined(FF_UTF)
|
||||
# define FF_FUN ffcs_mask_utf
|
||||
# else
|
||||
# define FF_FUN ffcs_mask
|
||||
# endif
|
||||
|
||||
# elif defined(FFCPS_0)
|
||||
# if defined (FF_UTF)
|
||||
# define FF_FUN ffcps_0_utf
|
||||
# else
|
||||
# define FF_FUN ffcps_0
|
||||
# endif
|
||||
|
||||
# elif defined (FFCPS_1)
|
||||
# if defined (FF_UTF)
|
||||
# define FF_FUN ffcps_1_utf
|
||||
# else
|
||||
# define FF_FUN ffcps_1
|
||||
# endif
|
||||
|
||||
# elif defined (FFCPS_DEFAULT)
|
||||
# if defined (FF_UTF)
|
||||
# define FF_FUN ffcps_default_utf
|
||||
# else
|
||||
# define FF_FUN ffcps_default
|
||||
# endif
|
||||
# endif
|
||||
|
||||
#if (defined(__GNUC__) && defined(__SANITIZE_ADDRESS__) && __SANITIZE_ADDRESS__ ) \
|
||||
|| (defined(__clang__) \
|
||||
&& ((__clang_major__ == 3 && __clang_minor__ >= 3) || (__clang_major__ > 3)))
|
||||
__attribute__((no_sanitize_address))
|
||||
#endif
|
||||
static sljit_u8* SLJIT_FUNC FF_FUN(sljit_u8 *str_end, sljit_u8 **str_ptr, sljit_uw offs1, sljit_uw offs2, sljit_uw chars)
|
||||
#undef FF_FUN
|
||||
{
|
||||
quad_word qw;
|
||||
int_char ic;
|
||||
|
||||
SLJIT_UNUSED_ARG(offs1);
|
||||
SLJIT_UNUSED_ARG(offs2);
|
||||
|
||||
ic.x = chars;
|
||||
|
||||
#if defined(FFCS)
|
||||
sljit_u8 c1 = ic.c.c1;
|
||||
vect_t vc1 = VDUPQ(c1);
|
||||
|
||||
#elif defined(FFCS_2)
|
||||
sljit_u8 c1 = ic.c.c1;
|
||||
vect_t vc1 = VDUPQ(c1);
|
||||
sljit_u8 c2 = ic.c.c2;
|
||||
vect_t vc2 = VDUPQ(c2);
|
||||
|
||||
#elif defined(FFCS_MASK)
|
||||
sljit_u8 c1 = ic.c.c1;
|
||||
vect_t vc1 = VDUPQ(c1);
|
||||
sljit_u8 mask = ic.c.c2;
|
||||
vect_t vmask = VDUPQ(mask);
|
||||
#endif
|
||||
|
||||
#if defined(FFCPS)
|
||||
compare_type compare1_type = compare_match1;
|
||||
compare_type compare2_type = compare_match1;
|
||||
vect_t cmp1a, cmp1b, cmp2a, cmp2b;
|
||||
const sljit_u32 diff = IN_UCHARS(offs1 - offs2);
|
||||
PCRE2_UCHAR char1a = ic.c.c1;
|
||||
PCRE2_UCHAR char2a = ic.c.c3;
|
||||
|
||||
# ifdef FFCPS_CHAR1A2A
|
||||
cmp1a = VDUPQ(char1a);
|
||||
cmp2a = VDUPQ(char2a);
|
||||
cmp1b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
|
||||
cmp2b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
|
||||
# else
|
||||
PCRE2_UCHAR char1b = ic.c.c2;
|
||||
PCRE2_UCHAR char2b = ic.c.c4;
|
||||
if (char1a == char1b)
|
||||
{
|
||||
cmp1a = VDUPQ(char1a);
|
||||
cmp1b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
|
||||
}
|
||||
else
|
||||
{
|
||||
sljit_u32 bit1 = char1a ^ char1b;
|
||||
if (is_powerof2(bit1))
|
||||
{
|
||||
compare1_type = compare_match1i;
|
||||
cmp1a = VDUPQ(char1a | bit1);
|
||||
cmp1b = VDUPQ(bit1);
|
||||
}
|
||||
else
|
||||
{
|
||||
compare1_type = compare_match2;
|
||||
cmp1a = VDUPQ(char1a);
|
||||
cmp1b = VDUPQ(char1b);
|
||||
}
|
||||
}
|
||||
|
||||
if (char2a == char2b)
|
||||
{
|
||||
cmp2a = VDUPQ(char2a);
|
||||
cmp2b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
|
||||
}
|
||||
else
|
||||
{
|
||||
sljit_u32 bit2 = char2a ^ char2b;
|
||||
if (is_powerof2(bit2))
|
||||
{
|
||||
compare2_type = compare_match1i;
|
||||
cmp2a = VDUPQ(char2a | bit2);
|
||||
cmp2b = VDUPQ(bit2);
|
||||
}
|
||||
else
|
||||
{
|
||||
compare2_type = compare_match2;
|
||||
cmp2a = VDUPQ(char2a);
|
||||
cmp2b = VDUPQ(char2b);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
*str_ptr += IN_UCHARS(offs1);
|
||||
#endif
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
vect_t char_mask = VDUPQ(0xff);
|
||||
#endif
|
||||
|
||||
#if defined(FF_UTF)
|
||||
restart:;
|
||||
#endif
|
||||
|
||||
#if defined(FFCPS)
|
||||
if (*str_ptr >= str_end)
|
||||
return NULL;
|
||||
sljit_u8 *p1 = *str_ptr - diff;
|
||||
#endif
|
||||
sljit_s32 align_offset = ((uint64_t)*str_ptr & 0xf);
|
||||
*str_ptr = (sljit_u8 *) ((uint64_t)*str_ptr & ~0xf);
|
||||
vect_t data = VLD1Q(*str_ptr);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
data = VANDQ(data, char_mask);
|
||||
#endif
|
||||
|
||||
#if defined(FFCS)
|
||||
vect_t eq = VCEQQ(data, vc1);
|
||||
|
||||
#elif defined(FFCS_2)
|
||||
vect_t eq1 = VCEQQ(data, vc1);
|
||||
vect_t eq2 = VCEQQ(data, vc2);
|
||||
vect_t eq = VORRQ(eq1, eq2);
|
||||
|
||||
#elif defined(FFCS_MASK)
|
||||
vect_t eq = VORRQ(data, vmask);
|
||||
eq = VCEQQ(eq, vc1);
|
||||
|
||||
#elif defined(FFCPS)
|
||||
# if defined(FFCPS_DIFF1)
|
||||
vect_t prev_data = data;
|
||||
# endif
|
||||
|
||||
vect_t data2;
|
||||
if (p1 < *str_ptr)
|
||||
{
|
||||
data2 = VLD1Q(*str_ptr - diff);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
data2 = VANDQ(data2, char_mask);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
data2 = shift_left_n_lanes(data, offs1 - offs2);
|
||||
|
||||
if (compare1_type == compare_match1)
|
||||
data = VCEQQ(data, cmp1a);
|
||||
else
|
||||
data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
|
||||
|
||||
if (compare2_type == compare_match1)
|
||||
data2 = VCEQQ(data2, cmp2a);
|
||||
else
|
||||
data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
|
||||
|
||||
vect_t eq = VANDQ(data, data2);
|
||||
#endif
|
||||
|
||||
VST1Q(qw.mem, eq);
|
||||
/* Ignore matches before the first STR_PTR. */
|
||||
if (align_offset < 8)
|
||||
{
|
||||
qw.dw[0] >>= align_offset * 8;
|
||||
if (qw.dw[0])
|
||||
{
|
||||
*str_ptr += align_offset + __builtin_ctzll(qw.dw[0]) / 8;
|
||||
goto match;
|
||||
}
|
||||
if (qw.dw[1])
|
||||
{
|
||||
*str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8;
|
||||
goto match;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
qw.dw[1] >>= (align_offset - 8) * 8;
|
||||
if (qw.dw[1])
|
||||
{
|
||||
*str_ptr += align_offset + __builtin_ctzll(qw.dw[1]) / 8;
|
||||
goto match;
|
||||
}
|
||||
}
|
||||
*str_ptr += 16;
|
||||
|
||||
while (*str_ptr < str_end)
|
||||
{
|
||||
vect_t orig_data = VLD1Q(*str_ptr);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
orig_data = VANDQ(orig_data, char_mask);
|
||||
#endif
|
||||
data = orig_data;
|
||||
|
||||
#if defined(FFCS)
|
||||
eq = VCEQQ(data, vc1);
|
||||
|
||||
#elif defined(FFCS_2)
|
||||
eq1 = VCEQQ(data, vc1);
|
||||
eq2 = VCEQQ(data, vc2);
|
||||
eq = VORRQ(eq1, eq2);
|
||||
|
||||
#elif defined(FFCS_MASK)
|
||||
eq = VORRQ(data, vmask);
|
||||
eq = VCEQQ(eq, vc1);
|
||||
#endif
|
||||
|
||||
#if defined(FFCPS)
|
||||
# if defined (FFCPS_DIFF1)
|
||||
data2 = VEXTQ(prev_data, data, VECTOR_FACTOR - 1);
|
||||
# else
|
||||
data2 = VLD1Q(*str_ptr - diff);
|
||||
# if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
data2 = VANDQ(data2, char_mask);
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# ifdef FFCPS_CHAR1A2A
|
||||
data = VCEQQ(data, cmp1a);
|
||||
data2 = VCEQQ(data2, cmp2a);
|
||||
# else
|
||||
if (compare1_type == compare_match1)
|
||||
data = VCEQQ(data, cmp1a);
|
||||
else
|
||||
data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
|
||||
if (compare2_type == compare_match1)
|
||||
data2 = VCEQQ(data2, cmp2a);
|
||||
else
|
||||
data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
|
||||
# endif
|
||||
|
||||
eq = VANDQ(data, data2);
|
||||
#endif
|
||||
|
||||
VST1Q(qw.mem, eq);
|
||||
if (qw.dw[0])
|
||||
*str_ptr += __builtin_ctzll(qw.dw[0]) / 8;
|
||||
else if (qw.dw[1])
|
||||
*str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8;
|
||||
else {
|
||||
*str_ptr += 16;
|
||||
#if defined (FFCPS_DIFF1)
|
||||
prev_data = orig_data;
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
match:;
|
||||
if (*str_ptr >= str_end)
|
||||
/* Failed match. */
|
||||
return NULL;
|
||||
|
||||
#if defined(FF_UTF)
|
||||
if (utf_continue((PCRE2_SPTR)*str_ptr - offs1))
|
||||
{
|
||||
/* Not a match. */
|
||||
*str_ptr += IN_UCHARS(1);
|
||||
goto restart;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Match. */
|
||||
#if defined (FFCPS)
|
||||
*str_ptr -= IN_UCHARS(offs1);
|
||||
#endif
|
||||
return *str_ptr;
|
||||
}
|
||||
|
||||
/* Failed match. */
|
||||
return NULL;
|
||||
}
|
||||
@@ -1,2356 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
This module by Zoltan Herczeg
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#if !(defined SUPPORT_VALGRIND)
|
||||
|
||||
#if ((defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
|
||||
|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
|
||||
|| (defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64))
|
||||
|
||||
typedef enum {
|
||||
vector_compare_match1,
|
||||
vector_compare_match1i,
|
||||
vector_compare_match2,
|
||||
} vector_compare_type;
|
||||
|
||||
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
|
||||
static SLJIT_INLINE sljit_s32 max_fast_forward_char_pair_offset(void)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
/* The AVX2 code path is currently disabled. */
|
||||
/* return sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? 31 : 15; */
|
||||
return 15;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
/* The AVX2 code path is currently disabled. */
|
||||
/* return sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? 15 : 7; */
|
||||
return 7;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
/* The AVX2 code path is currently disabled. */
|
||||
/* return sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? 7 : 3; */
|
||||
return 3;
|
||||
#else
|
||||
#error "Unsupported unit width"
|
||||
#endif
|
||||
}
|
||||
#else /* !SLJIT_CONFIG_X86 */
|
||||
static SLJIT_INLINE sljit_s32 max_fast_forward_char_pair_offset(void)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
return 15;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
return 7;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
return 3;
|
||||
#else
|
||||
#error "Unsupported unit width"
|
||||
#endif
|
||||
}
|
||||
#endif /* SLJIT_CONFIG_X86 */
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
|
||||
return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80);
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
|
||||
return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00);
|
||||
#else
|
||||
#error "Unknown code width"
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* SLJIT_CONFIG_X86 || SLJIT_CONFIG_S390X */
|
||||
|
||||
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
|
||||
|
||||
static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
|
||||
{
|
||||
sljit_u32 value = chr;
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define SIMD_COMPARE_TYPE_INDEX 0
|
||||
return (sljit_s32)((value << 24) | (value << 16) | (value << 8) | value);
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
#define SIMD_COMPARE_TYPE_INDEX 1
|
||||
return (sljit_s32)((value << 16) | value);
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#define SIMD_COMPARE_TYPE_INDEX 2
|
||||
return (sljit_s32)(value);
|
||||
#else
|
||||
#error "Unsupported unit width"
|
||||
#endif
|
||||
}
|
||||
|
||||
static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type,
|
||||
sljit_s32 reg_type, int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
|
||||
{
|
||||
sljit_u8 instruction[4];
|
||||
|
||||
if (reg_type == SLJIT_SIMD_REG_128)
|
||||
{
|
||||
instruction[0] = 0x66;
|
||||
instruction[1] = 0x0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Two byte VEX prefix. */
|
||||
instruction[0] = 0xc5;
|
||||
instruction[1] = 0xfd;
|
||||
}
|
||||
|
||||
SLJIT_ASSERT(step >= 0 && step <= 3);
|
||||
|
||||
if (compare_type != vector_compare_match2)
|
||||
{
|
||||
if (step == 0)
|
||||
{
|
||||
if (compare_type == vector_compare_match1i)
|
||||
{
|
||||
/* POR xmm1, xmm2/m128 */
|
||||
if (reg_type == SLJIT_SIMD_REG_256)
|
||||
instruction[1] ^= (dst_ind << 3);
|
||||
|
||||
/* Prefix is filled. */
|
||||
instruction[2] = 0xeb;
|
||||
instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (step != 2)
|
||||
return;
|
||||
|
||||
/* PCMPEQB/W/D xmm1, xmm2/m128 */
|
||||
if (reg_type == SLJIT_SIMD_REG_256)
|
||||
instruction[1] ^= (dst_ind << 3);
|
||||
|
||||
/* Prefix is filled. */
|
||||
instruction[2] = 0x74 + SIMD_COMPARE_TYPE_INDEX;
|
||||
instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
return;
|
||||
}
|
||||
|
||||
if (reg_type == SLJIT_SIMD_REG_256)
|
||||
{
|
||||
if (step == 2)
|
||||
return;
|
||||
|
||||
if (step == 0)
|
||||
{
|
||||
step = 2;
|
||||
instruction[1] ^= (dst_ind << 3);
|
||||
}
|
||||
}
|
||||
|
||||
switch (step)
|
||||
{
|
||||
case 0:
|
||||
SLJIT_ASSERT(reg_type == SLJIT_SIMD_REG_128);
|
||||
|
||||
/* MOVDQA xmm1, xmm2/m128 */
|
||||
/* Prefix is filled. */
|
||||
instruction[2] = 0x6f;
|
||||
instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
return;
|
||||
|
||||
case 1:
|
||||
/* PCMPEQB/W/D xmm1, xmm2/m128 */
|
||||
if (reg_type == SLJIT_SIMD_REG_256)
|
||||
instruction[1] ^= (dst_ind << 3);
|
||||
|
||||
/* Prefix is filled. */
|
||||
instruction[2] = 0x74 + SIMD_COMPARE_TYPE_INDEX;
|
||||
instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
return;
|
||||
|
||||
case 2:
|
||||
/* PCMPEQB/W/D xmm1, xmm2/m128 */
|
||||
/* Prefix is filled. */
|
||||
instruction[2] = 0x74 + SIMD_COMPARE_TYPE_INDEX;
|
||||
instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
return;
|
||||
|
||||
case 3:
|
||||
/* POR xmm1, xmm2/m128 */
|
||||
if (reg_type == SLJIT_SIMD_REG_256)
|
||||
instruction[1] ^= (dst_ind << 3);
|
||||
|
||||
/* Prefix is filled. */
|
||||
instruction[2] = 0xeb;
|
||||
instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD))
|
||||
|
||||
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
sljit_u8 instruction[8];
|
||||
/* The AVX2 code path is currently disabled. */
|
||||
/* sljit_s32 reg_type = sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? SLJIT_SIMD_REG_256 : SLJIT_SIMD_REG_128; */
|
||||
sljit_s32 reg_type = SLJIT_SIMD_REG_128;
|
||||
sljit_s32 value;
|
||||
struct sljit_label *start;
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
struct sljit_label *restart;
|
||||
#endif
|
||||
struct sljit_jump *quit;
|
||||
struct sljit_jump *partial_quit[2];
|
||||
vector_compare_type compare_type = vector_compare_match1;
|
||||
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||
sljit_s32 data_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR0);
|
||||
sljit_s32 cmp1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR1);
|
||||
sljit_s32 cmp2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR2);
|
||||
sljit_s32 tmp_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR3);
|
||||
sljit_u32 bit = 0;
|
||||
int i;
|
||||
|
||||
SLJIT_UNUSED_ARG(offset);
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
bit = char1 ^ char2;
|
||||
compare_type = vector_compare_match1i;
|
||||
|
||||
if (!is_powerof2(bit))
|
||||
{
|
||||
bit = 0;
|
||||
compare_type = vector_compare_match2;
|
||||
}
|
||||
}
|
||||
|
||||
partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||
add_jump(compiler, &common->failed_match, partial_quit[0]);
|
||||
|
||||
/* First part (unaligned start) */
|
||||
value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO;
|
||||
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
|
||||
|
||||
if (char1 != char2)
|
||||
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR2, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
|
||||
|
||||
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
|
||||
|
||||
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR1, SLJIT_VR1, 0);
|
||||
|
||||
if (char1 != char2)
|
||||
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR2, SLJIT_VR2, 0);
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
restart = LABEL();
|
||||
#endif
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? 0x1f : 0xf;
|
||||
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~value);
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, value);
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
|
||||
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
|
||||
quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
/* Second part (aligned) */
|
||||
start = LABEL();
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? 32 : 16;
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
|
||||
|
||||
partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||
add_jump(compiler, &common->failed_match, partial_quit[1]);
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
|
||||
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
|
||||
for (i = 0; i < 4; i++)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
|
||||
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
|
||||
|
||||
JUMPHERE(quit);
|
||||
|
||||
SLJIT_ASSERT(tmp1_reg_ind < 8);
|
||||
/* BSF r32, r/m32 */
|
||||
instruction[0] = 0x0f;
|
||||
instruction[1] = 0xbc;
|
||||
instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 3);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||
|
||||
if (common->mode != PCRE2_JIT_COMPLETE)
|
||||
{
|
||||
JUMPHERE(partial_quit[0]);
|
||||
JUMPHERE(partial_quit[1]);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
|
||||
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
|
||||
}
|
||||
else
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf && offset > 0)
|
||||
{
|
||||
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
|
||||
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
|
||||
|
||||
quit = jump_if_utf_char_start(compiler, TMP1);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
|
||||
JUMPTO(SLJIT_JUMP, restart);
|
||||
|
||||
JUMPHERE(quit);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD))
|
||||
|
||||
static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
sljit_u8 instruction[8];
|
||||
/* The AVX2 code path is currently disabled. */
|
||||
/* sljit_s32 reg_type = sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? SLJIT_SIMD_REG_256 : SLJIT_SIMD_REG_128; */
|
||||
sljit_s32 reg_type = SLJIT_SIMD_REG_128;
|
||||
sljit_s32 value;
|
||||
struct sljit_label *start;
|
||||
struct sljit_jump *quit;
|
||||
jump_list *not_found = NULL;
|
||||
vector_compare_type compare_type = vector_compare_match1;
|
||||
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||
sljit_s32 data_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR0);
|
||||
sljit_s32 cmp1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR1);
|
||||
sljit_s32 cmp2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR2);
|
||||
sljit_s32 tmp_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR3);
|
||||
sljit_u32 bit = 0;
|
||||
int i;
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
bit = char1 ^ char2;
|
||||
compare_type = vector_compare_match1i;
|
||||
|
||||
if (!is_powerof2(bit))
|
||||
{
|
||||
bit = 0;
|
||||
compare_type = vector_compare_match2;
|
||||
}
|
||||
}
|
||||
|
||||
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||
OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
|
||||
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
|
||||
|
||||
/* First part (unaligned start) */
|
||||
|
||||
value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO;
|
||||
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
|
||||
|
||||
if (char1 != char2)
|
||||
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR2, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
|
||||
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR1, SLJIT_VR1, 0);
|
||||
|
||||
if (char1 != char2)
|
||||
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR2, SLJIT_VR2, 0);
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? 0x1f : 0xf;
|
||||
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~value);
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, value);
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
|
||||
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
|
||||
quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
/* Second part (aligned) */
|
||||
start = LABEL();
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? 32 : 16;
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
|
||||
|
||||
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
|
||||
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
|
||||
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
|
||||
|
||||
JUMPHERE(quit);
|
||||
|
||||
SLJIT_ASSERT(tmp1_reg_ind < 8);
|
||||
/* BSF r32, r/m32 */
|
||||
instruction[0] = 0x0f;
|
||||
instruction[1] = 0xbc;
|
||||
instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 3);
|
||||
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0);
|
||||
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
|
||||
return not_found;
|
||||
}
|
||||
|
||||
#ifndef _WIN64
|
||||
|
||||
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD))
|
||||
|
||||
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
|
||||
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
sljit_u8 instruction[8];
|
||||
/* The AVX2 code path is currently disabled. */
|
||||
/* sljit_s32 reg_type = sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? SLJIT_SIMD_REG_256 : SLJIT_SIMD_REG_128; */
|
||||
sljit_s32 reg_type = SLJIT_SIMD_REG_128;
|
||||
sljit_s32 value;
|
||||
vector_compare_type compare1_type = vector_compare_match1;
|
||||
vector_compare_type compare2_type = vector_compare_match1;
|
||||
sljit_u32 bit1 = 0;
|
||||
sljit_u32 bit2 = 0;
|
||||
sljit_u32 diff = IN_UCHARS(offs1 - offs2);
|
||||
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||
sljit_s32 data1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR0);
|
||||
sljit_s32 data2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR1);
|
||||
sljit_s32 cmp1a_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR2);
|
||||
sljit_s32 cmp2a_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR3);
|
||||
sljit_s32 cmp1b_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR4);
|
||||
sljit_s32 cmp2b_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR5);
|
||||
sljit_s32 tmp1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR6);
|
||||
sljit_s32 tmp2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_TMP_DEST_VREG);
|
||||
struct sljit_label *start;
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
struct sljit_label *restart;
|
||||
#endif
|
||||
struct sljit_jump *jump[2];
|
||||
int i;
|
||||
|
||||
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2 && offs2 >= 0);
|
||||
SLJIT_ASSERT(diff <= (unsigned)IN_UCHARS(max_fast_forward_char_pair_offset()));
|
||||
|
||||
/* Initialize. */
|
||||
if (common->match_end_ptr != 0)
|
||||
{
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
|
||||
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0);
|
||||
SELECT(SLJIT_LESS, STR_END, TMP1, 0, STR_END);
|
||||
}
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
if (char1a == char1b)
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
|
||||
else
|
||||
{
|
||||
bit1 = char1a ^ char1b;
|
||||
if (is_powerof2(bit1))
|
||||
{
|
||||
compare1_type = vector_compare_match1i;
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1));
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1));
|
||||
}
|
||||
else
|
||||
{
|
||||
compare1_type = vector_compare_match2;
|
||||
bit1 = 0;
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b));
|
||||
}
|
||||
}
|
||||
|
||||
value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO;
|
||||
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR2, 0, TMP1, 0);
|
||||
|
||||
if (char1a != char1b)
|
||||
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR4, 0, TMP2, 0);
|
||||
|
||||
if (char2a == char2b)
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
|
||||
else
|
||||
{
|
||||
bit2 = char2a ^ char2b;
|
||||
if (is_powerof2(bit2))
|
||||
{
|
||||
compare2_type = vector_compare_match1i;
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2));
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2));
|
||||
}
|
||||
else
|
||||
{
|
||||
compare2_type = vector_compare_match2;
|
||||
bit2 = 0;
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b));
|
||||
}
|
||||
}
|
||||
|
||||
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR3, 0, TMP1, 0);
|
||||
|
||||
if (char2a != char2b)
|
||||
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR5, 0, TMP2, 0);
|
||||
|
||||
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR2, SLJIT_VR2, 0);
|
||||
if (char1a != char1b)
|
||||
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR4, SLJIT_VR4, 0);
|
||||
|
||||
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR3, SLJIT_VR3, 0);
|
||||
if (char2a != char2b)
|
||||
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR5, SLJIT_VR5, 0);
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
restart = LABEL();
|
||||
#endif
|
||||
|
||||
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff);
|
||||
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? ~0x1f : ~0xf;
|
||||
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
|
||||
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
|
||||
|
||||
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0);
|
||||
|
||||
sljit_emit_simd_mov(compiler, reg_type, SLJIT_VR1, SLJIT_MEM1(STR_PTR), -(sljit_sw)diff);
|
||||
jump[1] = JUMP(SLJIT_JUMP);
|
||||
|
||||
JUMPHERE(jump[0]);
|
||||
|
||||
if (reg_type == SLJIT_SIMD_REG_256)
|
||||
{
|
||||
if (diff != 16)
|
||||
{
|
||||
/* PSLLDQ ymm1, ymm2, imm8 */
|
||||
instruction[0] = 0xc5;
|
||||
instruction[1] = (sljit_u8)(0xf9 ^ (data2_ind << 3));
|
||||
instruction[2] = 0x73;
|
||||
instruction[3] = 0xc0 | (7 << 3) | data1_ind;
|
||||
instruction[4] = diff & 0xf;
|
||||
sljit_emit_op_custom(compiler, instruction, 5);
|
||||
}
|
||||
|
||||
instruction[0] = 0xc4;
|
||||
instruction[1] = 0xe3;
|
||||
if (diff < 16)
|
||||
{
|
||||
/* VINSERTI128 xmm1, xmm2, xmm3/m128 */
|
||||
/* instruction[0] = 0xc4; */
|
||||
/* instruction[1] = 0xe3; */
|
||||
instruction[2] = (sljit_u8)(0x7d ^ (data2_ind << 3));
|
||||
instruction[3] = 0x38;
|
||||
SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR) <= 7);
|
||||
instruction[4] = 0x40 | (data2_ind << 3) | sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
|
||||
instruction[5] = (sljit_u8)(16 - diff);
|
||||
instruction[6] = 1;
|
||||
sljit_emit_op_custom(compiler, instruction, 7);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* VPERM2I128 xmm1, xmm2, xmm3/m128 */
|
||||
/* instruction[0] = 0xc4; */
|
||||
/* instruction[1] = 0xe3; */
|
||||
value = (diff == 16) ? data1_ind : data2_ind;
|
||||
instruction[2] = (sljit_u8)(0x7d ^ (value << 3));
|
||||
instruction[3] = 0x46;
|
||||
instruction[4] = 0xc0 | (data2_ind << 3) | value;
|
||||
instruction[5] = 0x08;
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* MOVDQA xmm1, xmm2/m128 */
|
||||
instruction[0] = 0x66;
|
||||
instruction[1] = 0x0f;
|
||||
instruction[2] = 0x6f;
|
||||
instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
|
||||
/* PSLLDQ xmm1, imm8 */
|
||||
/* instruction[0] = 0x66; */
|
||||
/* instruction[1] = 0x0f; */
|
||||
instruction[2] = 0x73;
|
||||
instruction[3] = 0xc0 | (7 << 3) | data2_ind;
|
||||
instruction[4] = diff;
|
||||
sljit_emit_op_custom(compiler, instruction, 5);
|
||||
}
|
||||
|
||||
JUMPHERE(jump[1]);
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? 0x1f : 0xf;
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, value);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare2_type, reg_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare1_type, reg_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
|
||||
}
|
||||
|
||||
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_VR0, SLJIT_VR0, SLJIT_VR1, 0);
|
||||
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
|
||||
|
||||
/* Ignore matches before the first STR_PTR. */
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
|
||||
jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
/* Main loop. */
|
||||
start = LABEL();
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? 32 : 16;
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
|
||||
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
|
||||
sljit_emit_simd_mov(compiler, reg_type, SLJIT_VR1, SLJIT_MEM1(STR_PTR), -(sljit_sw)diff);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare1_type, reg_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind);
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare2_type, reg_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind);
|
||||
}
|
||||
|
||||
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_VR0, SLJIT_VR0, SLJIT_VR1, 0);
|
||||
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
|
||||
|
||||
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
|
||||
|
||||
JUMPHERE(jump[0]);
|
||||
|
||||
SLJIT_ASSERT(tmp1_reg_ind < 8);
|
||||
/* BSF r32, r/m32 */
|
||||
instruction[0] = 0x0f;
|
||||
instruction[1] = 0xbc;
|
||||
instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 3);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf)
|
||||
{
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
|
||||
|
||||
jump[0] = jump_if_utf_char_start(compiler, TMP1);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
|
||||
|
||||
add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));
|
||||
|
||||
JUMPHERE(jump[0]);
|
||||
}
|
||||
#endif
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
|
||||
|
||||
if (common->match_end_ptr != 0)
|
||||
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
|
||||
}
|
||||
|
||||
#endif /* !_WIN64 */
|
||||
|
||||
#undef SIMD_COMPARE_TYPE_INDEX
|
||||
|
||||
#endif /* SLJIT_CONFIG_X86 */
|
||||
|
||||
#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__))
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
typedef union {
|
||||
unsigned int x;
|
||||
struct { unsigned char c1, c2, c3, c4; } c;
|
||||
} int_char;
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
static SLJIT_INLINE int utf_continue(PCRE2_SPTR s)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
return (*s & 0xc0) == 0x80;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
return (*s & 0xfc00) == 0xdc00;
|
||||
#else
|
||||
#error "Unknown code width"
|
||||
#endif
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
# define VECTOR_FACTOR 16
|
||||
# define vect_t uint8x16_t
|
||||
# define VLD1Q(X) vld1q_u8((sljit_u8 *)(X))
|
||||
# define VCEQQ vceqq_u8
|
||||
# define VORRQ vorrq_u8
|
||||
# define VST1Q vst1q_u8
|
||||
# define VDUPQ vdupq_n_u8
|
||||
# define VEXTQ vextq_u8
|
||||
# define VANDQ vandq_u8
|
||||
typedef union {
|
||||
uint8_t mem[16];
|
||||
uint64_t dw[2];
|
||||
} quad_word;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
# define VECTOR_FACTOR 8
|
||||
# define vect_t uint16x8_t
|
||||
# define VLD1Q(X) vld1q_u16((sljit_u16 *)(X))
|
||||
# define VCEQQ vceqq_u16
|
||||
# define VORRQ vorrq_u16
|
||||
# define VST1Q vst1q_u16
|
||||
# define VDUPQ vdupq_n_u16
|
||||
# define VEXTQ vextq_u16
|
||||
# define VANDQ vandq_u16
|
||||
typedef union {
|
||||
uint16_t mem[8];
|
||||
uint64_t dw[2];
|
||||
} quad_word;
|
||||
#else
|
||||
# define VECTOR_FACTOR 4
|
||||
# define vect_t uint32x4_t
|
||||
# define VLD1Q(X) vld1q_u32((sljit_u32 *)(X))
|
||||
# define VCEQQ vceqq_u32
|
||||
# define VORRQ vorrq_u32
|
||||
# define VST1Q vst1q_u32
|
||||
# define VDUPQ vdupq_n_u32
|
||||
# define VEXTQ vextq_u32
|
||||
# define VANDQ vandq_u32
|
||||
typedef union {
|
||||
uint32_t mem[4];
|
||||
uint64_t dw[2];
|
||||
} quad_word;
|
||||
#endif
|
||||
|
||||
#define FFCS
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCS
|
||||
|
||||
#define FFCS_2
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCS_2
|
||||
|
||||
#define FFCS_MASK
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCS_MASK
|
||||
|
||||
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
|
||||
|
||||
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
int_char ic;
|
||||
struct sljit_jump *partial_quit, *quit;
|
||||
/* Save temporary registers. */
|
||||
SLJIT_ASSERT(common->locals_size >= 2 * (int)sizeof(sljit_sw));
|
||||
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, STR_PTR, 0);
|
||||
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, TMP3, 0);
|
||||
|
||||
/* Prepare function arguments */
|
||||
OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
|
||||
GET_LOCAL_BASE(SLJIT_R1, 0, LOCAL0);
|
||||
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, offset);
|
||||
|
||||
if (char1 == char2)
|
||||
{
|
||||
ic.c.c1 = char1;
|
||||
ic.c.c2 = char2;
|
||||
OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf && offset > 0)
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_utf));
|
||||
else
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs));
|
||||
#else
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs));
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
PCRE2_UCHAR mask = char1 ^ char2;
|
||||
if (is_powerof2(mask))
|
||||
{
|
||||
ic.c.c1 = char1 | mask;
|
||||
ic.c.c2 = mask;
|
||||
OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf && offset > 0)
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask_utf));
|
||||
else
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask));
|
||||
#else
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask));
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
ic.c.c1 = char1;
|
||||
ic.c.c2 = char2;
|
||||
OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf && offset > 0)
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2_utf));
|
||||
else
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2));
|
||||
#else
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
/* Restore registers. */
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
|
||||
OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
|
||||
|
||||
/* Check return value. */
|
||||
partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
|
||||
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||
add_jump(compiler, &common->failed_match, partial_quit);
|
||||
|
||||
/* Fast forward STR_PTR to the result of memchr. */
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
|
||||
if (common->mode != PCRE2_JIT_COMPLETE)
|
||||
{
|
||||
quit = CMP(SLJIT_NOT_ZERO, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
|
||||
JUMPHERE(partial_quit);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
|
||||
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
|
||||
JUMPHERE(quit);
|
||||
}
|
||||
}
|
||||
|
||||
typedef enum {
|
||||
compare_match1,
|
||||
compare_match1i,
|
||||
compare_match2,
|
||||
} compare_type;
|
||||
|
||||
static inline vect_t fast_forward_char_pair_compare(compare_type ctype, vect_t dst, vect_t cmp1, vect_t cmp2)
|
||||
{
|
||||
if (ctype == compare_match2)
|
||||
{
|
||||
vect_t tmp = dst;
|
||||
dst = VCEQQ(dst, cmp1);
|
||||
tmp = VCEQQ(tmp, cmp2);
|
||||
dst = VORRQ(dst, tmp);
|
||||
return dst;
|
||||
}
|
||||
|
||||
if (ctype == compare_match1i)
|
||||
dst = VORRQ(dst, cmp2);
|
||||
dst = VCEQQ(dst, cmp1);
|
||||
return dst;
|
||||
}
|
||||
|
||||
static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
return 15;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
return 7;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
return 3;
|
||||
#else
|
||||
#error "Unsupported unit width"
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ARM doesn't have a shift left across lanes. */
|
||||
static SLJIT_INLINE vect_t shift_left_n_lanes(vect_t a, sljit_u8 n)
|
||||
{
|
||||
vect_t zero = VDUPQ(0);
|
||||
SLJIT_ASSERT(0 < n && n < VECTOR_FACTOR);
|
||||
/* VEXTQ takes an immediate as last argument. */
|
||||
#define C(X) case X: return VEXTQ(zero, a, VECTOR_FACTOR - X);
|
||||
switch (n)
|
||||
{
|
||||
C(1); C(2); C(3);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
C(4); C(5); C(6); C(7);
|
||||
# if PCRE2_CODE_UNIT_WIDTH != 16
|
||||
C(8); C(9); C(10); C(11); C(12); C(13); C(14); C(15);
|
||||
# endif
|
||||
#endif
|
||||
default:
|
||||
/* Based on the ASSERT(0 < n && n < VECTOR_FACTOR) above, this won't
|
||||
happen. The return is still here for compilers to not warn. */
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
#define FFCPS
|
||||
#define FFCPS_DIFF1
|
||||
#define FFCPS_CHAR1A2A
|
||||
|
||||
#define FFCPS_0
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCPS_0
|
||||
|
||||
#undef FFCPS_CHAR1A2A
|
||||
|
||||
#define FFCPS_1
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCPS_1
|
||||
|
||||
#undef FFCPS_DIFF1
|
||||
|
||||
#define FFCPS_DEFAULT
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCPS
|
||||
|
||||
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1
|
||||
|
||||
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
|
||||
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
sljit_u32 diff = IN_UCHARS(offs1 - offs2);
|
||||
struct sljit_jump *partial_quit;
|
||||
int_char ic;
|
||||
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
|
||||
SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset()));
|
||||
SLJIT_ASSERT(compiler->scratches == 5);
|
||||
|
||||
/* Save temporary register STR_PTR. */
|
||||
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, STR_PTR, 0);
|
||||
|
||||
/* Prepare arguments for the function call. */
|
||||
if (common->match_end_ptr == 0)
|
||||
OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
|
||||
else
|
||||
{
|
||||
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
|
||||
OP2(SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, SLJIT_R0, 0);
|
||||
SELECT(SLJIT_LESS, SLJIT_R0, STR_END, 0, SLJIT_R0);
|
||||
}
|
||||
|
||||
GET_LOCAL_BASE(SLJIT_R1, 0, LOCAL0);
|
||||
OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_IMM, offs1);
|
||||
OP1(SLJIT_MOV_S32, SLJIT_R3, 0, SLJIT_IMM, offs2);
|
||||
ic.c.c1 = char1a;
|
||||
ic.c.c2 = char1b;
|
||||
ic.c.c3 = char2a;
|
||||
ic.c.c4 = char2b;
|
||||
OP1(SLJIT_MOV_U32, SLJIT_R4, 0, SLJIT_IMM, ic.x);
|
||||
|
||||
if (diff == 1) {
|
||||
if (char1a == char1b && char2a == char2b) {
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf)
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0_utf));
|
||||
else
|
||||
#endif
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0));
|
||||
} else {
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf)
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1_utf));
|
||||
else
|
||||
#endif
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1));
|
||||
}
|
||||
} else {
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf)
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default_utf));
|
||||
else
|
||||
#endif
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default));
|
||||
}
|
||||
|
||||
/* Restore STR_PTR register. */
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
|
||||
|
||||
/* Check return value. */
|
||||
partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
|
||||
add_jump(compiler, &common->failed_match, partial_quit);
|
||||
|
||||
/* Fast forward STR_PTR to the result of memchr. */
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
|
||||
|
||||
JUMPHERE(partial_quit);
|
||||
}
|
||||
|
||||
#endif /* SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 */
|
||||
|
||||
#if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define VECTOR_ELEMENT_SIZE 0
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
#define VECTOR_ELEMENT_SIZE 1
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#define VECTOR_ELEMENT_SIZE 2
|
||||
#else
|
||||
#error "Unsupported unit width"
|
||||
#endif
|
||||
|
||||
static void load_from_mem_vector(struct sljit_compiler *compiler, BOOL vlbb, sljit_s32 dst_vreg,
|
||||
sljit_s32 base_reg, sljit_s32 index_reg)
|
||||
{
|
||||
sljit_u16 instruction[3];
|
||||
|
||||
instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | index_reg);
|
||||
instruction[1] = (sljit_u16)(base_reg << 12);
|
||||
instruction[2] = (sljit_u16)((0x8 << 8) | (vlbb ? 0x07 : 0x06));
|
||||
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
|
||||
static void replicate_imm_vector(struct sljit_compiler *compiler, int step, sljit_s32 dst_vreg,
|
||||
PCRE2_UCHAR chr, sljit_s32 tmp_general_reg)
|
||||
{
|
||||
sljit_u16 instruction[3];
|
||||
|
||||
SLJIT_ASSERT(step >= 0 && step <= 1);
|
||||
|
||||
if (chr < 0x7fff)
|
||||
{
|
||||
if (step == 1)
|
||||
return;
|
||||
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4));
|
||||
instruction[1] = (sljit_u16)chr;
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
return;
|
||||
}
|
||||
|
||||
if (step == 0)
|
||||
{
|
||||
OP1(SLJIT_MOV, tmp_general_reg, 0, SLJIT_IMM, chr);
|
||||
|
||||
/* VLVG */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | sljit_get_register_index(SLJIT_GP_REGISTER, tmp_general_reg));
|
||||
instruction[1] = 0;
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x22);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
return;
|
||||
}
|
||||
|
||||
/* VREP */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | dst_vreg);
|
||||
instruction[1] = 0;
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xc << 8) | 0x4d);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type,
|
||||
int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
|
||||
{
|
||||
sljit_u16 instruction[3];
|
||||
|
||||
SLJIT_ASSERT(step >= 0 && step <= 2);
|
||||
|
||||
if (step == 1)
|
||||
{
|
||||
/* VCEQ */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
|
||||
instruction[1] = (sljit_u16)(cmp1_ind << 12);
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
return;
|
||||
}
|
||||
|
||||
if (compare_type != vector_compare_match2)
|
||||
{
|
||||
if (step == 0 && compare_type == vector_compare_match1i)
|
||||
{
|
||||
/* VO */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
|
||||
instruction[1] = (sljit_u16)(cmp2_ind << 12);
|
||||
instruction[2] = (sljit_u16)((0xe << 8) | 0x6a);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
switch (step)
|
||||
{
|
||||
case 0:
|
||||
/* VCEQ */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (tmp_ind << 4) | dst_ind);
|
||||
instruction[1] = (sljit_u16)(cmp2_ind << 12);
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
return;
|
||||
|
||||
case 2:
|
||||
/* VO */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
|
||||
instruction[1] = (sljit_u16)(tmp_ind << 12);
|
||||
instruction[2] = (sljit_u16)((0xe << 8) | 0x6a);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
|
||||
|
||||
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
sljit_u16 instruction[3];
|
||||
struct sljit_label *start;
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
struct sljit_label *restart;
|
||||
#endif
|
||||
struct sljit_jump *quit;
|
||||
struct sljit_jump *partial_quit[2];
|
||||
vector_compare_type compare_type = vector_compare_match1;
|
||||
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
|
||||
sljit_s32 data_ind = 0;
|
||||
sljit_s32 tmp_ind = 1;
|
||||
sljit_s32 cmp1_ind = 2;
|
||||
sljit_s32 cmp2_ind = 3;
|
||||
sljit_s32 zero_ind = 4;
|
||||
sljit_u32 bit = 0;
|
||||
int i;
|
||||
|
||||
SLJIT_UNUSED_ARG(offset);
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
bit = char1 ^ char2;
|
||||
compare_type = vector_compare_match1i;
|
||||
|
||||
if (!is_powerof2(bit))
|
||||
{
|
||||
bit = 0;
|
||||
compare_type = vector_compare_match2;
|
||||
}
|
||||
}
|
||||
|
||||
partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||
add_jump(compiler, &common->failed_match, partial_quit[0]);
|
||||
|
||||
/* First part (unaligned start) */
|
||||
|
||||
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4));
|
||||
instruction[1] = (sljit_u16)(char1 | bit);
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4));
|
||||
instruction[1] = (sljit_u16)(bit != 0 ? bit : char2);
|
||||
/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
#else /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP1);
|
||||
|
||||
if (char1 != char2)
|
||||
replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP1);
|
||||
}
|
||||
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
|
||||
if (compare_type == vector_compare_match2)
|
||||
{
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
|
||||
instruction[1] = 0;
|
||||
instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
restart = LABEL();
|
||||
#endif
|
||||
|
||||
load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0);
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15);
|
||||
|
||||
if (compare_type != vector_compare_match2)
|
||||
{
|
||||
if (compare_type == vector_compare_match1i)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VFEE */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||
instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < 3; i++)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VFENE */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
||||
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
/* VLGVB */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind);
|
||||
instruction[1] = 7;
|
||||
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||
quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16);
|
||||
|
||||
/* Second part (aligned) */
|
||||
start = LABEL();
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||
|
||||
partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||
add_jump(compiler, &common->failed_match, partial_quit[1]);
|
||||
|
||||
load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0);
|
||||
|
||||
if (compare_type != vector_compare_match2)
|
||||
{
|
||||
if (compare_type == vector_compare_match1i)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VFEE */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||
instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < 3; i++)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VFENE */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
||||
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW);
|
||||
JUMPTO(SLJIT_OVERFLOW, start);
|
||||
|
||||
/* VLGVB */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind);
|
||||
instruction[1] = 7;
|
||||
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||
|
||||
JUMPHERE(quit);
|
||||
|
||||
if (common->mode != PCRE2_JIT_COMPLETE)
|
||||
{
|
||||
JUMPHERE(partial_quit[0]);
|
||||
JUMPHERE(partial_quit[1]);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
|
||||
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
|
||||
}
|
||||
else
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf && offset > 0)
|
||||
{
|
||||
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
|
||||
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
|
||||
|
||||
quit = jump_if_utf_char_start(compiler, TMP1);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||
JUMPTO(SLJIT_JUMP, restart);
|
||||
|
||||
JUMPHERE(quit);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD 1
|
||||
|
||||
static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
sljit_u16 instruction[3];
|
||||
struct sljit_label *start;
|
||||
struct sljit_jump *quit;
|
||||
jump_list *not_found = NULL;
|
||||
vector_compare_type compare_type = vector_compare_match1;
|
||||
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||
sljit_s32 tmp3_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP3);
|
||||
sljit_s32 data_ind = 0;
|
||||
sljit_s32 tmp_ind = 1;
|
||||
sljit_s32 cmp1_ind = 2;
|
||||
sljit_s32 cmp2_ind = 3;
|
||||
sljit_s32 zero_ind = 4;
|
||||
sljit_u32 bit = 0;
|
||||
int i;
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
bit = char1 ^ char2;
|
||||
compare_type = vector_compare_match1i;
|
||||
|
||||
if (!is_powerof2(bit))
|
||||
{
|
||||
bit = 0;
|
||||
compare_type = vector_compare_match2;
|
||||
}
|
||||
}
|
||||
|
||||
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||
|
||||
/* First part (unaligned start) */
|
||||
|
||||
OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, 16);
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4));
|
||||
instruction[1] = (sljit_u16)(char1 | bit);
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4));
|
||||
instruction[1] = (sljit_u16)(bit != 0 ? bit : char2);
|
||||
/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
#else /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP3);
|
||||
|
||||
if (char1 != char2)
|
||||
replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP3);
|
||||
}
|
||||
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
|
||||
if (compare_type == vector_compare_match2)
|
||||
{
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
|
||||
instruction[1] = 0;
|
||||
instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0);
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15);
|
||||
|
||||
if (compare_type != vector_compare_match2)
|
||||
{
|
||||
if (compare_type == vector_compare_match1i)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VFEE */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||
instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < 3; i++)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VFENE */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
||||
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
/* VLGVB */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind);
|
||||
instruction[1] = 7;
|
||||
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
|
||||
quit = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0);
|
||||
|
||||
OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 16);
|
||||
|
||||
/* Second part (aligned) */
|
||||
start = LABEL();
|
||||
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 16);
|
||||
|
||||
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||
|
||||
load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0);
|
||||
|
||||
if (compare_type != vector_compare_match2)
|
||||
{
|
||||
if (compare_type == vector_compare_match1i)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VFEE */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||
instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < 3; i++)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VFENE */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
||||
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW);
|
||||
JUMPTO(SLJIT_OVERFLOW, start);
|
||||
|
||||
/* VLGVB */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind);
|
||||
instruction[1] = 7;
|
||||
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
|
||||
|
||||
JUMPHERE(quit);
|
||||
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||
|
||||
return not_found;
|
||||
}
|
||||
|
||||
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1
|
||||
|
||||
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
|
||||
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
sljit_u16 instruction[3];
|
||||
struct sljit_label *start;
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
struct sljit_label *restart;
|
||||
#endif
|
||||
struct sljit_jump *quit;
|
||||
struct sljit_jump *jump[2];
|
||||
vector_compare_type compare1_type = vector_compare_match1;
|
||||
vector_compare_type compare2_type = vector_compare_match1;
|
||||
sljit_u32 bit1 = 0;
|
||||
sljit_u32 bit2 = 0;
|
||||
sljit_s32 diff = IN_UCHARS(offs2 - offs1);
|
||||
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||
sljit_s32 tmp2_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP2);
|
||||
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
|
||||
sljit_s32 data1_ind = 0;
|
||||
sljit_s32 data2_ind = 1;
|
||||
sljit_s32 tmp1_ind = 2;
|
||||
sljit_s32 tmp2_ind = 3;
|
||||
sljit_s32 cmp1a_ind = 4;
|
||||
sljit_s32 cmp1b_ind = 5;
|
||||
sljit_s32 cmp2a_ind = 6;
|
||||
sljit_s32 cmp2b_ind = 7;
|
||||
sljit_s32 zero_ind = 8;
|
||||
int i;
|
||||
|
||||
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
|
||||
SLJIT_ASSERT(-diff <= (sljit_s32)IN_UCHARS(max_fast_forward_char_pair_offset()));
|
||||
SLJIT_ASSERT(tmp1_reg_ind != 0 && tmp2_reg_ind != 0);
|
||||
|
||||
if (char1a != char1b)
|
||||
{
|
||||
bit1 = char1a ^ char1b;
|
||||
compare1_type = vector_compare_match1i;
|
||||
|
||||
if (!is_powerof2(bit1))
|
||||
{
|
||||
bit1 = 0;
|
||||
compare1_type = vector_compare_match2;
|
||||
}
|
||||
}
|
||||
|
||||
if (char2a != char2b)
|
||||
{
|
||||
bit2 = char2a ^ char2b;
|
||||
compare2_type = vector_compare_match1i;
|
||||
|
||||
if (!is_powerof2(bit2))
|
||||
{
|
||||
bit2 = 0;
|
||||
compare2_type = vector_compare_match2;
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize. */
|
||||
if (common->match_end_ptr != 0)
|
||||
{
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
|
||||
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0);
|
||||
SELECT(SLJIT_LESS, STR_END, TMP1, 0, STR_END);
|
||||
}
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15);
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
|
||||
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
|
||||
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (cmp1a_ind << 4));
|
||||
instruction[1] = (sljit_u16)(char1a | bit1);
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
if (char1a != char1b)
|
||||
{
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (cmp1b_ind << 4));
|
||||
instruction[1] = (sljit_u16)(bit1 != 0 ? bit1 : char1b);
|
||||
/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (cmp2a_ind << 4));
|
||||
instruction[1] = (sljit_u16)(char2a | bit2);
|
||||
/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
if (char2a != char2b)
|
||||
{
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (cmp2b_ind << 4));
|
||||
instruction[1] = (sljit_u16)(bit2 != 0 ? bit2 : char2b);
|
||||
/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
#else /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
replicate_imm_vector(compiler, i, cmp1a_ind, char1a | bit1, TMP1);
|
||||
|
||||
if (char1a != char1b)
|
||||
replicate_imm_vector(compiler, i, cmp1b_ind, bit1 != 0 ? bit1 : char1b, TMP1);
|
||||
|
||||
replicate_imm_vector(compiler, i, cmp2a_ind, char2a | bit2, TMP1);
|
||||
|
||||
if (char2a != char2b)
|
||||
replicate_imm_vector(compiler, i, cmp2b_ind, bit2 != 0 ? bit2 : char2b, TMP1);
|
||||
}
|
||||
|
||||
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
|
||||
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
|
||||
instruction[1] = 0;
|
||||
instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
restart = LABEL();
|
||||
#endif
|
||||
|
||||
jump[0] = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0);
|
||||
load_from_mem_vector(compiler, TRUE, data2_ind, tmp1_reg_ind, 0);
|
||||
jump[1] = JUMP(SLJIT_JUMP);
|
||||
JUMPHERE(jump[0]);
|
||||
load_from_mem_vector(compiler, FALSE, data2_ind, tmp1_reg_ind, 0);
|
||||
JUMPHERE(jump[1]);
|
||||
|
||||
load_from_mem_vector(compiler, TRUE, data1_ind, str_ptr_reg_ind, 0);
|
||||
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 16);
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
|
||||
}
|
||||
|
||||
/* VN */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
|
||||
instruction[1] = (sljit_u16)(data2_ind << 12);
|
||||
instruction[2] = (sljit_u16)((0xe << 8) | 0x68);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
/* VFENE */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
|
||||
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
||||
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
/* VLGVB */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data1_ind);
|
||||
instruction[1] = 7;
|
||||
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||
quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16);
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, diff);
|
||||
|
||||
/* Main loop. */
|
||||
start = LABEL();
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
load_from_mem_vector(compiler, FALSE, data1_ind, str_ptr_reg_ind, 0);
|
||||
load_from_mem_vector(compiler, FALSE, data2_ind, str_ptr_reg_ind, tmp1_reg_ind);
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
|
||||
}
|
||||
|
||||
/* VN */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
|
||||
instruction[1] = (sljit_u16)(data2_ind << 12);
|
||||
instruction[2] = (sljit_u16)((0xe << 8) | 0x68);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
/* VFENE */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
|
||||
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
||||
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW);
|
||||
JUMPTO(SLJIT_OVERFLOW, start);
|
||||
|
||||
/* VLGVB */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (tmp2_reg_ind << 4) | data1_ind);
|
||||
instruction[1] = 7;
|
||||
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
JUMPHERE(quit);
|
||||
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf)
|
||||
{
|
||||
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
|
||||
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
|
||||
|
||||
quit = jump_if_utf_char_start(compiler, TMP1);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
/* TMP1 contains diff. */
|
||||
OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15);
|
||||
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
|
||||
JUMPTO(SLJIT_JUMP, restart);
|
||||
|
||||
JUMPHERE(quit);
|
||||
}
|
||||
#endif
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
|
||||
|
||||
if (common->match_end_ptr != 0)
|
||||
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
|
||||
}
|
||||
|
||||
#endif /* SLJIT_CONFIG_S390X */
|
||||
|
||||
#if (defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64)
|
||||
|
||||
#ifdef __linux__
|
||||
/* Using getauxval(AT_HWCAP) under Linux for detecting whether LSX is available */
|
||||
#include <sys/auxv.h>
|
||||
#define LOONGARCH_HWCAP_LSX (1 << 4)
|
||||
#define HAS_LSX_SUPPORT ((getauxval(AT_HWCAP) & LOONGARCH_HWCAP_LSX) != 0)
|
||||
#else
|
||||
#define HAS_LSX_SUPPORT 0
|
||||
#endif
|
||||
|
||||
typedef sljit_ins sljit_u32;
|
||||
|
||||
#define SI12_IMM_MASK 0x003ffc00
|
||||
#define UI5_IMM_MASK 0x00007c00
|
||||
#define UI2_IMM_MASK 0x00000c00
|
||||
|
||||
#define VD(vd) ((sljit_ins)vd << 0)
|
||||
#define VJ(vj) ((sljit_ins)vj << 5)
|
||||
#define VK(vk) ((sljit_ins)vk << 10)
|
||||
#define RD_V(rd) ((sljit_ins)rd << 0)
|
||||
#define RJ_V(rj) ((sljit_ins)rj << 5)
|
||||
|
||||
#define IMM_SI12(imm) (((sljit_ins)(imm) << 10) & SI12_IMM_MASK)
|
||||
#define IMM_UI5(imm) (((sljit_ins)(imm) << 10) & UI5_IMM_MASK)
|
||||
#define IMM_UI2(imm) (((sljit_ins)(imm) << 10) & UI2_IMM_MASK)
|
||||
|
||||
// LSX OPCODES:
|
||||
#define VLD 0x2c000000
|
||||
#define VOR_V 0x71268000
|
||||
#define VAND_V 0x71260000
|
||||
#define VBSLL_V 0x728e0000
|
||||
#define VMSKLTZ_B 0x729c4000
|
||||
#define VPICKVE2GR_WU 0x72f3e000
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define VREPLGR2VR 0x729f0000
|
||||
#define VSEQ 0x70000000
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
#define VREPLGR2VR 0x729f0400
|
||||
#define VSEQ 0x70008000
|
||||
#else
|
||||
#define VREPLGR2VR 0x729f0800
|
||||
#define VSEQ 0x70010000
|
||||
#endif
|
||||
|
||||
static void fast_forward_char_pair_lsx_compare(struct sljit_compiler *compiler, vector_compare_type compare_type,
|
||||
sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
|
||||
{
|
||||
if (compare_type != vector_compare_match2)
|
||||
{
|
||||
if (compare_type == vector_compare_match1i)
|
||||
{
|
||||
/* VOR.V vd, vj, vk */
|
||||
push_inst(compiler, VOR_V | VD(dst_ind) | VJ(cmp2_ind) | VK(dst_ind));
|
||||
}
|
||||
|
||||
/* VSEQ.B/H/W vd, vj, vk */
|
||||
push_inst(compiler, VSEQ | VD(dst_ind) | VJ(dst_ind) | VK(cmp1_ind));
|
||||
return;
|
||||
}
|
||||
|
||||
/* VBSLL.V vd, vj, ui5 */
|
||||
push_inst(compiler, VBSLL_V | VD(tmp_ind) | VJ(dst_ind) | IMM_UI5(0));
|
||||
|
||||
/* VSEQ.B/H/W vd, vj, vk */
|
||||
push_inst(compiler, VSEQ | VD(dst_ind) | VJ(dst_ind) | VK(cmp1_ind));
|
||||
|
||||
/* VSEQ.B/H/W vd, vj, vk */
|
||||
push_inst(compiler, VSEQ | VD(tmp_ind) | VJ(tmp_ind) | VK(cmp2_ind));
|
||||
|
||||
/* VOR vd, vj, vk */
|
||||
push_inst(compiler, VOR_V | VD(dst_ind) | VJ(tmp_ind) | VK(dst_ind));
|
||||
return;
|
||||
}
|
||||
|
||||
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD HAS_LSX_SUPPORT
|
||||
|
||||
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
struct sljit_label *start;
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
struct sljit_label *restart;
|
||||
#endif
|
||||
struct sljit_jump *quit;
|
||||
struct sljit_jump *partial_quit[2];
|
||||
vector_compare_type compare_type = vector_compare_match1;
|
||||
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
|
||||
sljit_s32 data_ind = 0;
|
||||
sljit_s32 tmp_ind = 1;
|
||||
sljit_s32 cmp1_ind = 2;
|
||||
sljit_s32 cmp2_ind = 3;
|
||||
sljit_u32 bit = 0;
|
||||
|
||||
SLJIT_UNUSED_ARG(offset);
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
bit = char1 ^ char2;
|
||||
compare_type = vector_compare_match1i;
|
||||
|
||||
if (!is_powerof2(bit))
|
||||
{
|
||||
bit = 0;
|
||||
compare_type = vector_compare_match2;
|
||||
}
|
||||
}
|
||||
|
||||
partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||
add_jump(compiler, &common->failed_match, partial_quit[0]);
|
||||
|
||||
/* First part (unaligned start) */
|
||||
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1 | bit);
|
||||
|
||||
/* VREPLGR2VR.B/H/W vd, rj */
|
||||
push_inst(compiler, VREPLGR2VR | VD(cmp1_ind) | RJ_V(tmp1_reg_ind));
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, bit != 0 ? bit : char2);
|
||||
|
||||
/* VREPLGR2VR.B/H/W vd, rj */
|
||||
push_inst(compiler, VREPLGR2VR | VD(cmp2_ind) | RJ_V(tmp1_reg_ind));
|
||||
}
|
||||
|
||||
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
restart = LABEL();
|
||||
#endif
|
||||
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
/* VLD vd, rj, si12 */
|
||||
push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
|
||||
fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VMSKLTZ.B vd, vj */
|
||||
push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
|
||||
|
||||
/* VPICKVE2GR.WU rd, vj, ui2 */
|
||||
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
|
||||
quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
/* Second part (aligned) */
|
||||
start = LABEL();
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||
|
||||
partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||
add_jump(compiler, &common->failed_match, partial_quit[1]);
|
||||
|
||||
/* VLD vd, rj, si12 */
|
||||
push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
|
||||
fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VMSKLTZ.B vd, vj */
|
||||
push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
|
||||
|
||||
/* VPICKVE2GR.WU rd, vj, ui2 */
|
||||
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
|
||||
|
||||
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
|
||||
|
||||
JUMPHERE(quit);
|
||||
|
||||
/* CTZ.W rd, rj */
|
||||
push_inst(compiler, CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind));
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||
|
||||
if (common->mode != PCRE2_JIT_COMPLETE)
|
||||
{
|
||||
JUMPHERE(partial_quit[0]);
|
||||
JUMPHERE(partial_quit[1]);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
|
||||
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
|
||||
}
|
||||
else
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf && offset > 0)
|
||||
{
|
||||
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
|
||||
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
|
||||
|
||||
quit = jump_if_utf_char_start(compiler, TMP1);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
|
||||
JUMPTO(SLJIT_JUMP, restart);
|
||||
|
||||
JUMPHERE(quit);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD HAS_LSX_SUPPORT
|
||||
|
||||
static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
struct sljit_label *start;
|
||||
struct sljit_jump *quit;
|
||||
jump_list *not_found = NULL;
|
||||
vector_compare_type compare_type = vector_compare_match1;
|
||||
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
|
||||
sljit_s32 data_ind = 0;
|
||||
sljit_s32 tmp_ind = 1;
|
||||
sljit_s32 cmp1_ind = 2;
|
||||
sljit_s32 cmp2_ind = 3;
|
||||
sljit_u32 bit = 0;
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
bit = char1 ^ char2;
|
||||
compare_type = vector_compare_match1i;
|
||||
|
||||
if (!is_powerof2(bit))
|
||||
{
|
||||
bit = 0;
|
||||
compare_type = vector_compare_match2;
|
||||
}
|
||||
}
|
||||
|
||||
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||
OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
|
||||
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
|
||||
|
||||
/* First part (unaligned start) */
|
||||
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1 | bit);
|
||||
|
||||
/* VREPLGR2VR vd, rj */
|
||||
push_inst(compiler, VREPLGR2VR | VD(cmp1_ind) | RJ_V(tmp1_reg_ind));
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, bit != 0 ? bit : char2);
|
||||
/* VREPLGR2VR vd, rj */
|
||||
push_inst(compiler, VREPLGR2VR | VD(cmp2_ind) | RJ_V(tmp1_reg_ind));
|
||||
}
|
||||
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
/* VLD vd, rj, si12 */
|
||||
push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
|
||||
fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VMSKLTZ.B vd, vj */
|
||||
push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
|
||||
|
||||
/* VPICKVE2GR.WU rd, vj, ui2 */
|
||||
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
|
||||
quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
/* Second part (aligned) */
|
||||
start = LABEL();
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||
|
||||
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
/* VLD vd, rj, si12 */
|
||||
push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
|
||||
fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VMSKLTZ.B vd, vj */
|
||||
push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
|
||||
|
||||
/* VPICKVE2GR.WU rd, vj, ui2 */
|
||||
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
|
||||
|
||||
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
|
||||
|
||||
JUMPHERE(quit);
|
||||
|
||||
/* CTZ.W rd, rj */
|
||||
push_inst(compiler, CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind));
|
||||
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0);
|
||||
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
|
||||
return not_found;
|
||||
}
|
||||
|
||||
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD HAS_LSX_SUPPORT
|
||||
|
||||
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
|
||||
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
vector_compare_type compare1_type = vector_compare_match1;
|
||||
vector_compare_type compare2_type = vector_compare_match1;
|
||||
sljit_u32 bit1 = 0;
|
||||
sljit_u32 bit2 = 0;
|
||||
sljit_u32 diff = IN_UCHARS(offs1 - offs2);
|
||||
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||
sljit_s32 tmp2_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP2);
|
||||
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
|
||||
sljit_s32 data1_ind = 0;
|
||||
sljit_s32 data2_ind = 1;
|
||||
sljit_s32 tmp1_ind = 2;
|
||||
sljit_s32 tmp2_ind = 3;
|
||||
sljit_s32 cmp1a_ind = 4;
|
||||
sljit_s32 cmp1b_ind = 5;
|
||||
sljit_s32 cmp2a_ind = 6;
|
||||
sljit_s32 cmp2b_ind = 7;
|
||||
struct sljit_label *start;
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
struct sljit_label *restart;
|
||||
#endif
|
||||
struct sljit_jump *jump[2];
|
||||
|
||||
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
|
||||
SLJIT_ASSERT(diff <= (unsigned)IN_UCHARS(max_fast_forward_char_pair_offset()));
|
||||
|
||||
/* Initialize. */
|
||||
if (common->match_end_ptr != 0)
|
||||
{
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
|
||||
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0);
|
||||
SELECT(SLJIT_LESS, STR_END, TMP1, 0, STR_END);
|
||||
}
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
if (char1a == char1b)
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1a);
|
||||
else
|
||||
{
|
||||
bit1 = char1a ^ char1b;
|
||||
if (is_powerof2(bit1))
|
||||
{
|
||||
compare1_type = vector_compare_match1i;
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1a | bit1);
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, bit1);
|
||||
}
|
||||
else
|
||||
{
|
||||
compare1_type = vector_compare_match2;
|
||||
bit1 = 0;
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1a);
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, char1b);
|
||||
}
|
||||
}
|
||||
|
||||
/* VREPLGR2VR vd, rj */
|
||||
push_inst(compiler, VREPLGR2VR | VD(cmp1a_ind) | RJ_V(tmp1_reg_ind));
|
||||
|
||||
if (char1a != char1b)
|
||||
{
|
||||
/* VREPLGR2VR vd, rj */
|
||||
push_inst(compiler, VREPLGR2VR | VD(cmp1b_ind) | RJ_V(tmp2_reg_ind));
|
||||
}
|
||||
|
||||
if (char2a == char2b)
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char2a);
|
||||
else
|
||||
{
|
||||
bit2 = char2a ^ char2b;
|
||||
if (is_powerof2(bit2))
|
||||
{
|
||||
compare2_type = vector_compare_match1i;
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char2a | bit2);
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, bit2);
|
||||
}
|
||||
else
|
||||
{
|
||||
compare2_type = vector_compare_match2;
|
||||
bit2 = 0;
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char2a);
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, char2b);
|
||||
}
|
||||
}
|
||||
|
||||
/* VREPLGR2VR vd, rj */
|
||||
push_inst(compiler, VREPLGR2VR | VD(cmp2a_ind) | RJ_V(tmp1_reg_ind));
|
||||
|
||||
if (char2a != char2b)
|
||||
{
|
||||
/* VREPLGR2VR vd, rj */
|
||||
push_inst(compiler, VREPLGR2VR | VD(cmp2b_ind) | RJ_V(tmp2_reg_ind));
|
||||
}
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
restart = LABEL();
|
||||
#endif
|
||||
|
||||
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff);
|
||||
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
/* VLD vd, rj, si12 */
|
||||
push_inst(compiler, VLD | VD(data1_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
|
||||
|
||||
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0);
|
||||
|
||||
/* VLD vd, rj, si12 */
|
||||
push_inst(compiler, VLD | VD(data2_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(-(sljit_s8)diff));
|
||||
jump[1] = JUMP(SLJIT_JUMP);
|
||||
|
||||
JUMPHERE(jump[0]);
|
||||
|
||||
/* VBSLL.V vd, vj, ui5 */
|
||||
push_inst(compiler, VBSLL_V | VD(data2_ind) | VJ(data1_ind) | IMM_UI5(diff));
|
||||
|
||||
JUMPHERE(jump[1]);
|
||||
|
||||
fast_forward_char_pair_lsx_compare(compiler, compare2_type, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
|
||||
fast_forward_char_pair_lsx_compare(compiler, compare1_type, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
|
||||
|
||||
/* VAND vd, vj, vk */
|
||||
push_inst(compiler, VOR_V | VD(data1_ind) | VJ(data1_ind) | VK(data2_ind));
|
||||
|
||||
/* VMSKLTZ.B vd, vj */
|
||||
push_inst(compiler, VMSKLTZ_B | VD(tmp1_ind) | VJ(data1_ind));
|
||||
|
||||
/* VPICKVE2GR.WU rd, vj, ui2 */
|
||||
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp1_ind) | IMM_UI2(0));
|
||||
|
||||
/* Ignore matches before the first STR_PTR. */
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
|
||||
jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
/* Main loop. */
|
||||
start = LABEL();
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
/* VLD vd, rj, si12 */
|
||||
push_inst(compiler, VLD | VD(data1_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
|
||||
push_inst(compiler, VLD | VD(data2_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(-(sljit_s8)diff));
|
||||
|
||||
fast_forward_char_pair_lsx_compare(compiler, compare1_type, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind);
|
||||
fast_forward_char_pair_lsx_compare(compiler, compare2_type, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind);
|
||||
|
||||
/* VAND.V vd, vj, vk */
|
||||
push_inst(compiler, VAND_V | VD(data1_ind) | VJ(data1_ind) | VK(data2_ind));
|
||||
|
||||
/* VMSKLTZ.B vd, vj */
|
||||
push_inst(compiler, VMSKLTZ_B | VD(tmp1_ind) | VJ(data1_ind));
|
||||
|
||||
/* VPICKVE2GR.WU rd, vj, ui2 */
|
||||
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp1_ind) | IMM_UI2(0));
|
||||
|
||||
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
|
||||
|
||||
JUMPHERE(jump[0]);
|
||||
|
||||
/* CTZ.W rd, rj */
|
||||
push_inst(compiler, CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind));
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf)
|
||||
{
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
|
||||
|
||||
jump[0] = jump_if_utf_char_start(compiler, TMP1);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
|
||||
|
||||
add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));
|
||||
|
||||
JUMPHERE(jump[0]);
|
||||
}
|
||||
#endif
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
|
||||
|
||||
if (common->match_end_ptr != 0)
|
||||
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
|
||||
}
|
||||
|
||||
#endif /* SLJIT_CONFIG_LOONGARCH_64 */
|
||||
|
||||
#endif /* !SUPPORT_VALGRIND */
|
||||
@@ -1,2541 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define PCRE2_CODE_UNIT_WIDTH 0
|
||||
#include "pcre2.h"
|
||||
|
||||
/*
|
||||
Letter characters:
|
||||
\xe6\x92\xad = 0x64ad = 25773 (kanji)
|
||||
Non-letter characters:
|
||||
\xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
|
||||
\xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
|
||||
\xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
|
||||
\xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
|
||||
Newlines:
|
||||
\xc2\x85 = 0x85 = 133 (NExt Line = NEL)
|
||||
\xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
|
||||
Othercase pairs:
|
||||
\xc3\xa9 = 0xe9 = 233 (e')
|
||||
\xc3\x89 = 0xc9 = 201 (E')
|
||||
\xc3\xa1 = 0xe1 = 225 (a')
|
||||
\xc3\x81 = 0xc1 = 193 (A')
|
||||
\x53 = 0x53 = S
|
||||
\x73 = 0x73 = s
|
||||
\xc5\xbf = 0x17f = 383 (long S)
|
||||
\xc8\xba = 0x23a = 570
|
||||
\xe2\xb1\xa5 = 0x2c65 = 11365
|
||||
\xe1\xbd\xb8 = 0x1f78 = 8056
|
||||
\xe1\xbf\xb8 = 0x1ff8 = 8184
|
||||
\xf0\x90\x90\x80 = 0x10400 = 66560
|
||||
\xf0\x90\x90\xa8 = 0x10428 = 66600
|
||||
\xc7\x84 = 0x1c4 = 452
|
||||
\xc7\x85 = 0x1c5 = 453
|
||||
\xc7\x86 = 0x1c6 = 454
|
||||
Caseless sets:
|
||||
ucp_Armenian - \x{531}-\x{556} -> \x{561}-\x{586}
|
||||
ucp_Coptic - \x{2c80}-\x{2ce3} -> caseless: XOR 0x1
|
||||
ucp_Latin - \x{ff21}-\x{ff3a} -> \x{ff41]-\x{ff5a}
|
||||
|
||||
Mark property:
|
||||
\xcc\x8d = 0x30d = 781
|
||||
Special:
|
||||
\xc2\x80 = 0x80 = 128 (lowest 2 byte character)
|
||||
\xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
|
||||
\xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
|
||||
\xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
|
||||
\xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
|
||||
\xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
|
||||
*/
|
||||
|
||||
static int regression_tests(void);
|
||||
static int invalid_utf8_regression_tests(void);
|
||||
static int invalid_utf16_regression_tests(void);
|
||||
static int invalid_utf32_regression_tests(void);
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int jit = 0;
|
||||
#if defined SUPPORT_PCRE2_8
|
||||
pcre2_config_8(PCRE2_CONFIG_JIT, &jit);
|
||||
#elif defined SUPPORT_PCRE2_16
|
||||
pcre2_config_16(PCRE2_CONFIG_JIT, &jit);
|
||||
#elif defined SUPPORT_PCRE2_32
|
||||
pcre2_config_32(PCRE2_CONFIG_JIT, &jit);
|
||||
#endif
|
||||
if (!jit) {
|
||||
printf("JIT must be enabled to run pcre2_jit_test\n");
|
||||
return 1;
|
||||
}
|
||||
return regression_tests()
|
||||
| invalid_utf8_regression_tests()
|
||||
| invalid_utf16_regression_tests()
|
||||
| invalid_utf32_regression_tests();
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------------- */
|
||||
|
||||
#if !(defined SUPPORT_PCRE2_8) && !(defined SUPPORT_PCRE2_16) && !(defined SUPPORT_PCRE2_32)
|
||||
#error SUPPORT_PCRE2_8 or SUPPORT_PCRE2_16 or SUPPORT_PCRE2_32 must be defined
|
||||
#endif
|
||||
|
||||
#define MU (PCRE2_MULTILINE | PCRE2_UTF)
|
||||
#define MUP (PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
|
||||
#define CMU (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF)
|
||||
#define CMUP (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
|
||||
#define M (PCRE2_MULTILINE)
|
||||
#define MP (PCRE2_MULTILINE | PCRE2_UCP)
|
||||
#define U (PCRE2_UTF)
|
||||
#define CM (PCRE2_CASELESS | PCRE2_MULTILINE)
|
||||
|
||||
#define BSR(x) ((x) << 16)
|
||||
#define A PCRE2_NEWLINE_ANYCRLF
|
||||
|
||||
#define GET_NEWLINE(x) ((x) & 0xffff)
|
||||
#define GET_BSR(x) ((x) >> 16)
|
||||
|
||||
#define OFFSET_MASK 0x00ffff
|
||||
#define F_NO8 0x010000
|
||||
#define F_NO16 0x020000
|
||||
#define F_NO32 0x020000
|
||||
#define F_NOMATCH 0x040000
|
||||
#define F_DIFF 0x080000
|
||||
#define F_FORCECONV 0x100000
|
||||
#define F_PROPERTY 0x200000
|
||||
|
||||
struct regression_test_case {
|
||||
uint32_t compile_options;
|
||||
int newline;
|
||||
int match_options;
|
||||
int start_offset;
|
||||
const char *pattern;
|
||||
const char *input;
|
||||
};
|
||||
|
||||
static struct regression_test_case regression_test_cases[] = {
|
||||
/* Constant strings. */
|
||||
{ MU, A, 0, 0, "AbC", "AbAbC" },
|
||||
{ MU, A, 0, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
|
||||
{ CMU, A, 0, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
|
||||
{ M, A, 0, 0, "[^a]", "aAbB" },
|
||||
{ CM, A, 0, 0, "[^m]", "mMnN" },
|
||||
{ M, A, 0, 0, "a[^b][^#]", "abacd" },
|
||||
{ CM, A, 0, 0, "A[^B][^E]", "abacd" },
|
||||
{ CMU, A, 0, 0, "[^x][^#]", "XxBll" },
|
||||
{ MU, A, 0, 0, "[^a]", "aaa\xc3\xa1#Ab" },
|
||||
{ CMU, A, 0, 0, "[^A]", "aA\xe6\x92\xad" },
|
||||
{ MU, A, 0, 0, "\\W(\\W)?\\w", "\r\n+bc" },
|
||||
{ MU, A, 0, 0, "\\W(\\W)?\\w", "\n\r+bc" },
|
||||
{ MU, A, 0, 0, "\\W(\\W)?\\w", "\r\r+bc" },
|
||||
{ MU, A, 0, 0, "\\W(\\W)?\\w", "\n\n+bc" },
|
||||
{ MU, A, 0, 0, "[axd]", "sAXd" },
|
||||
{ CMU, A, 0, 0, "[axd]", "sAXd" },
|
||||
{ CMU, A, 0, 0 | F_NOMATCH, "[^axd]", "DxA" },
|
||||
{ MU, A, 0, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
|
||||
{ MU, A, 0, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
|
||||
{ CMU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
|
||||
{ MU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
|
||||
{ MU, A, 0, 0, "[^a]", "\xc2\x80[]" },
|
||||
{ CMU, A, 0, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
|
||||
{ CM, A, 0, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
|
||||
{ PCRE2_CASELESS, 0, 0, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
|
||||
{ PCRE2_CASELESS, 0, 0, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
|
||||
{ PCRE2_CASELESS, 0, 0, 0, "a1", "Aa1" },
|
||||
#ifndef NEVER_BACKSLASH_C
|
||||
{ M, A, 0, 0, "\\Ca", "cda" },
|
||||
{ CM, A, 0, 0, "\\Ca", "CDA" },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "\\Cx", "cda" },
|
||||
{ CM, A, 0, 0 | F_NOMATCH, "\\Cx", "CDA" },
|
||||
#endif /* !NEVER_BACKSLASH_C */
|
||||
{ CMUP, A, 0, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
|
||||
{ CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
|
||||
{ CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
|
||||
{ CMUP, A, 0, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
|
||||
{ M, A, 0, 0, "[3-57-9]", "5" },
|
||||
{ PCRE2_AUTO_CALLOUT, A, 0, 0, "12345678901234567890123456789012345678901234567890123456789012345678901234567890",
|
||||
"12345678901234567890123456789012345678901234567890123456789012345678901234567890" },
|
||||
{ 0, A, 0, 0, "..a.......b", "bbbbbbbbbbbbbbbbbbbbbabbbbbbbb" },
|
||||
{ 0, A, 0, 0, "..a.....b", "bbbbbbbbbbbbbbbbbbbbbabbbbbbbb" },
|
||||
|
||||
/* Assertions. */
|
||||
{ MU, A, 0, 0, "\\b[^A]", "A_B#" },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "\\b\\W", "\n*" },
|
||||
{ MU, A, 0, 0, "\\B[^,]\\b[^s]\\b", "#X" },
|
||||
{ MP, A, 0, 0, "\\B", "_\xa1" },
|
||||
{ MP, A, 0, 0 | F_PROPERTY, "\\b_\\b[,A]\\B", "_," },
|
||||
{ MUP, A, 0, 0, "\\b", "\xe6\x92\xad!" },
|
||||
{ MUP, A, 0, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
|
||||
{ MUP, A, 0, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
|
||||
{ MUP, A, 0, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
|
||||
{ CMUP, A, 0, 0, "\\By", "\xf0\x90\x90\xa8y" },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "\\R^", "\n" },
|
||||
{ M, A, 0, 1 | F_NOMATCH, "^", "\n" },
|
||||
{ 0, 0, 0, 0, "^ab", "ab" },
|
||||
{ 0, 0, 0, 0 | F_NOMATCH, "^ab", "aab" },
|
||||
{ M, PCRE2_NEWLINE_CRLF, 0, 0, "^a", "\r\raa\n\naa\r\naa" },
|
||||
{ MU, A, 0, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
|
||||
{ M, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--b--\x85--" },
|
||||
{ MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xe2\x80\xa8--" },
|
||||
{ MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xc2\x85--" },
|
||||
{ 0, 0, 0, 0, "ab$", "ab" },
|
||||
{ 0, 0, 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
|
||||
{ PCRE2_DOLLAR_ENDONLY, 0, 0, 0 | F_NOMATCH, "ab$", "abab\r\n" },
|
||||
{ M, PCRE2_NEWLINE_CRLF, 0, 0, "a$", "\r\raa\n\naa\r\naa" },
|
||||
{ M, PCRE2_NEWLINE_ANY, 0, 0, "a$", "aaa" },
|
||||
{ MU, PCRE2_NEWLINE_ANYCRLF, 0, 0, "#$", "#\xc2\x85###\r#" },
|
||||
{ MU, PCRE2_NEWLINE_ANY, 0, 0, "#$", "#\xe2\x80\xa9" },
|
||||
{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0 | F_NOMATCH, "^a", "aa\naa" },
|
||||
{ M, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0, "^a", "aa\naa" },
|
||||
{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\naa" },
|
||||
{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\r\n" },
|
||||
{ U | PCRE2_DOLLAR_ENDONLY, PCRE2_NEWLINE_ANY, 0, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
|
||||
{ M, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0, "a$", "aa\naa" },
|
||||
{ 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa" },
|
||||
{ U, PCRE2_NEWLINE_CR, 0, 0, "a\\Z", "aaa\r" },
|
||||
{ 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa\n" },
|
||||
{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r" },
|
||||
{ U, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\n" },
|
||||
{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r\n" },
|
||||
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
|
||||
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
|
||||
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
|
||||
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
|
||||
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
|
||||
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
|
||||
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
|
||||
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
|
||||
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
|
||||
{ U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xc2\x85" },
|
||||
{ U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
|
||||
{ M, A, 0, 0, "\\Aa", "aaa" },
|
||||
{ M, A, 0, 1 | F_NOMATCH, "\\Aa", "aaa" },
|
||||
{ M, A, 0, 1, "\\Ga", "aaa" },
|
||||
{ M, A, 0, 1 | F_NOMATCH, "\\Ga", "aba" },
|
||||
{ M, A, 0, 0, "a\\z", "aaa" },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "a\\z", "aab" },
|
||||
|
||||
/* Brackets and alternatives. */
|
||||
{ MU, A, 0, 0, "(ab|bb|cd)", "bacde" },
|
||||
{ MU, A, 0, 0, "(?:ab|a)(bc|c)", "ababc" },
|
||||
{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
|
||||
{ CMU, A, 0, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
|
||||
{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
|
||||
{ MU, A, 0, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
|
||||
{ MU, A, 0, 0, "\xc7\x82|\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
|
||||
{ MU, A, 0, 0, "=\xc7\x82|#\xc6\x82", "\xf1\x83\x82\x82=\xc7\x82\xc7\x83" },
|
||||
{ MU, A, 0, 0, "\xc7\x82\xc7\x83|\xc6\x82\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
|
||||
{ MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" },
|
||||
{ U, A, 0, 0, "\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80", "\xdf\xbf\xc2\x80\xe4\x84\x80" },
|
||||
{ U, A, 0, 0, "(?:\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80)#", "\xdf\xbf\xc2\x80#\xe4\x84\x80#" },
|
||||
{ CM, A, 0, 0, "ab|cd", "CD" },
|
||||
{ CM, A, 0, 0, "a1277|a1377|bX487", "bx487" },
|
||||
{ CM, A, 0, 0, "a1277|a1377|bx487", "bX487" },
|
||||
{ 0, A, 0, 0, "(a|)b*+a", "a" },
|
||||
{ 0, A, 0, 0 | F_NOMATCH, "(.|.|.|.|.)(|.|.|.|.)(.||.|.|.)(.|.||.|.)(.|.|.||.)(.|.|.|.|)(A|.|.|.|.)(.|A|.|.|.)(.|.|A|.|.)(.|.|.|A|.)(.|.|.|.|A)(B|.|.|.|.)(.|B|.|.|.)(.|.|B|.|.)(.|.|.|B|.)(.|.|.|.|B)xa", "1234567890123456ax" },
|
||||
|
||||
/* Greedy and non-greedy ? operators. */
|
||||
{ MU, A, 0, 0, "(?:a)?a", "laab" },
|
||||
{ CMU, A, 0, 0, "(A)?A", "llaab" },
|
||||
{ MU, A, 0, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
|
||||
{ MU, A, 0, 0, "(a)?a", "manm" },
|
||||
{ CMU, A, 0, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
|
||||
{ MU, A, 0, 0, "(a|b)?\?d((?:e)?)", "abcde" },
|
||||
{ MU, A, 0, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
|
||||
{ M, A, 0, 0, "(?:a?|a)b", "ba" },
|
||||
|
||||
/* Greedy and non-greedy + operators */
|
||||
{ MU, A, 0, 0, "(aa)+aa", "aaaaaaa" },
|
||||
{ MU, A, 0, 0, "(aa)+?aa", "aaaaaaa" },
|
||||
{ MU, A, 0, 0, "(?:aba|ab|a)+l", "ababamababal" },
|
||||
{ MU, A, 0, 0, "(?:aba|ab|a)+?l", "ababamababal" },
|
||||
{ MU, A, 0, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
|
||||
{ MU, A, 0, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
|
||||
{ MU, A, 0, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
|
||||
{ MU, A, 0, 0, "(aa|bb){8,1000}", "abaabbaabbaabbaab_aabbaabbaabbaabbaabbaabb_" },
|
||||
|
||||
/* Greedy and non-greedy * operators */
|
||||
{ CMU, A, 0, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
|
||||
{ MU, A, 0, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
|
||||
{ MU, A, 0, 0, "(aa|ab)*ab", "aaabaaab" },
|
||||
{ CMU, A, 0, 0, "(aa|Ab)*?aB", "aaabaaab" },
|
||||
{ MU, A, 0, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
|
||||
{ MU, A, 0, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
|
||||
{ M, A, 0, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
|
||||
{ M, A, 0, 0, "((?:a|)*){0}a", "a" },
|
||||
|
||||
/* Combining ? + * operators */
|
||||
{ MU, A, 0, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
|
||||
{ MU, A, 0, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
|
||||
{ MU, A, 0, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
|
||||
{ MU, A, 0, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
|
||||
{ MU, A, 0, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
|
||||
|
||||
/* Single character iterators. */
|
||||
{ MU, A, 0, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
|
||||
{ MU, A, 0, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
|
||||
{ MU, A, 0, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
|
||||
{ MU, A, 0, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
|
||||
{ MU, A, 0, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
|
||||
{ MU, A, 0, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
|
||||
{ MU, A, 0, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
|
||||
{ MU, A, 0, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
|
||||
{ MU, A, 0, 0, "(ba{2})+c", "baabaaabacbaabaac" },
|
||||
{ MU, A, 0, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
|
||||
{ MU, A, 0, 0, "(a?+[^b])+", "babaacacb" },
|
||||
{ MU, A, 0, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
|
||||
{ CMU, A, 0, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
|
||||
{ CMU, A, 0, 0, "[c-f]+k", "DemmFke" },
|
||||
{ MU, A, 0, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
|
||||
{ MU, A, 0, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
|
||||
{ CMU, A, 0, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
|
||||
{ CMU, A, 0, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
|
||||
{ CMU, A, 0, 0, "[ace]{3,}", "AcbDAcEEcEd" },
|
||||
{ CMU, A, 0, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
|
||||
{ MU, A, 0, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
|
||||
{ CMU, A, 0, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
|
||||
{ MU, A, 0, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
|
||||
{ MU, A, 0, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
|
||||
{ MU, A, 0, 0, "\\b\\w+\\B", "x,a_cd" },
|
||||
{ MUP, A, 0, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
|
||||
{ CMU, A, 0, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
|
||||
{ CMUP, A, 0, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
|
||||
{ CMU, A, 0, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
|
||||
{ CMU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
|
||||
{ MU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
|
||||
{ MU, A, 0, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
|
||||
{ MU, A, 0, 0, "\\d+123", "987654321,01234" },
|
||||
{ MU, A, 0, 0, "abcd*|\\w+xy", "aaaaa,abxyz" },
|
||||
{ MU, A, 0, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" },
|
||||
{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."},
|
||||
{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."},
|
||||
{ MU, A, 0, 0, ".[ab]*.", "xx" },
|
||||
{ MU, A, 0, 0, ".[ab]*a", "xxa" },
|
||||
{ MU, A, 0, 0, ".[ab]?.", "xx" },
|
||||
{ MU, A, 0, 0, "_[ab]+_*a", "_aa" },
|
||||
{ MU, A, 0, 0, "#(A+)#\\d+", "#A#A#0" },
|
||||
{ MU, A, 0, 0, "(?P<size>\\d+)m|M", "4M" },
|
||||
{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\n?.+#", "\n,\n,#" },
|
||||
{ 0, A, 0, 0, "<(\\w+)[\\s\\w]+id>", "<br><div id>" },
|
||||
{ MU, A, 0, 0, "([a-z]{0,3}c;)+", "ccccc;c;cc;ccc;cccccccccccccccc;" },
|
||||
|
||||
/* Bracket repeats with limit. */
|
||||
{ MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
|
||||
{ MU, A, 0, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
|
||||
{ MU, A, 0, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
|
||||
{ MU, A, 0, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
|
||||
{ MU, A, 0, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
|
||||
{ MU, A, 0, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
|
||||
{ MU, A, 0, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
|
||||
{ MU, A, 0, 0, "(ab){4,6}?M", "abababababababM" },
|
||||
|
||||
/* Basic character sets. */
|
||||
{ MU, A, 0, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
|
||||
{ MU, A, 0, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
|
||||
{ MU, A, 0, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
|
||||
{ MU, A, 0, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
|
||||
{ MU, A, 0, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
|
||||
{ MU, A, 0, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
|
||||
{ MU, A, 0, 0, "x[bcef]+", "xaxdxecbfg" },
|
||||
{ MU, A, 0, 0, "x[bcdghij]+", "xaxexfxdgbjk" },
|
||||
{ MU, A, 0, 0, "x[^befg]+", "xbxexacdhg" },
|
||||
{ MU, A, 0, 0, "x[^bcdl]+", "xlxbxaekmd" },
|
||||
{ MU, A, 0, 0, "x[^bcdghi]+", "xbxdxgxaefji" },
|
||||
{ MU, A, 0, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" },
|
||||
{ CMU, A, 0, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" },
|
||||
{ CMU, A, 0, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" },
|
||||
{ MU, A, 0, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" },
|
||||
{ MU, A, 0, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" },
|
||||
{ MU, A, 0, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" },
|
||||
{ MU, A, 0, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" },
|
||||
{ MU, A, 0, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" },
|
||||
{ MU, A, 0, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
|
||||
{ MU, A, 0, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
|
||||
{ MU, A, 0, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
|
||||
{ CMU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "^[\\x{100}-\\x{17f}]", " " },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "[^\\S\\W]{6}", "abcdefghijk" },
|
||||
|
||||
/* Unicode properties. */
|
||||
{ MUP, A, 0, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
|
||||
{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
|
||||
{ MUP, A, 0, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
|
||||
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
|
||||
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
|
||||
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
|
||||
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
|
||||
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
|
||||
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
|
||||
{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
|
||||
{ MUP, A, 0, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
|
||||
{ MUP, A, 0, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
|
||||
{ CMUP, A, 0, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
|
||||
{ MUP, A, 0, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
|
||||
{ MUP, A, 0, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
|
||||
{ MU, A, 0, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
|
||||
{ CMUP, A, 0, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
|
||||
{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
|
||||
{ MUP, A, 0, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
|
||||
{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "[a-b\\s]{2,5}[^a]", "AB baaa" },
|
||||
{ MUP, 0, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Hangul}\\p{Z}]", " " },
|
||||
{ MUP, 0, 0, 0, "[\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" },
|
||||
{ MUP, 0, 0, 0, "[\\x{a92e}\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" },
|
||||
{ CMUP, 0, 0, 0, "[^S]\\B", "\xe2\x80\x8a" },
|
||||
{ MUP, 0, 0, 0 | F_NOMATCH, "[^[:print:]\\x{f6f6}]", "\xef\x9b\xb6" },
|
||||
{ MUP, 0, 0, 0, "[[:xdigit:]\\x{6500}]#", "\xe6\x94\x80#" },
|
||||
{ MUP, 0, 0, 0 | F_PROPERTY, "[\\pC\\PC]#", "A#" },
|
||||
{ MUP, 0, 0, 0 | F_PROPERTY, "[\\x80-\\xff\\x{800}\\x{802}\\x{804}\\p{Cc}]", "\xdf\xbf\xe0\xa0\x80" },
|
||||
|
||||
/* Possible empty brackets. */
|
||||
{ MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
|
||||
{ MU, A, 0, 0, "(|ab||bc|a)+d", "abcxabcabd" },
|
||||
{ MU, A, 0, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
|
||||
{ MU, A, 0, 0, "(|ab||bc|a)*d", "abcxabcabd" },
|
||||
{ MU, A, 0, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
|
||||
{ MU, A, 0, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
|
||||
{ MU, A, 0, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
|
||||
{ MU, A, 0, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
|
||||
{ MU, A, 0, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
|
||||
{ MU, A, 0, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
|
||||
|
||||
/* Start offset. */
|
||||
{ MU, A, 0, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
|
||||
{ MU, A, 0, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
|
||||
{ MU, A, 0, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
|
||||
{ MU, A, 0, 1, "(\\w\\W\\w)+", "ab#d" },
|
||||
|
||||
/* Newline. */
|
||||
{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
|
||||
{ M, PCRE2_NEWLINE_CR, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
|
||||
{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{1,3}[^#]", "\r\n##...." },
|
||||
{ MU, A, PCRE2_NO_UTF_CHECK, 1, "^.a", "\n\x80\nxa" },
|
||||
{ MU, A, 0, 1, "^", "\r\n" },
|
||||
{ M, PCRE2_NEWLINE_CRLF, 0, 1 | F_NOMATCH, "^", "\r\n" },
|
||||
{ M, PCRE2_NEWLINE_CRLF, 0, 1, "^", "\r\na" },
|
||||
|
||||
/* Any character except newline or any newline. */
|
||||
{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
|
||||
{ U, PCRE2_NEWLINE_CRLF, 0, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
|
||||
{ 0, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
|
||||
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
|
||||
{ U, PCRE2_NEWLINE_ANY, 0, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
|
||||
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
|
||||
{ 0, PCRE2_NEWLINE_ANY, 0, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
|
||||
{ U, PCRE2_NEWLINE_ANY, 0, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
|
||||
{ 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\r" },
|
||||
{ 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\x85#\r\n#" },
|
||||
{ U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\xe2\x80\xa8#c" },
|
||||
{ U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\r\nc" },
|
||||
{ U, PCRE2_NEWLINE_CRLF | BSR(PCRE2_BSR_UNICODE), 0, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "\\R+", "ab" },
|
||||
{ MU, A, 0, 0, "\\R+", "ab\r\n\r" },
|
||||
{ MU, A, 0, 0, "\\R*", "ab\r\n\r" },
|
||||
{ MU, A, 0, 0, "\\R*", "\r\n\r" },
|
||||
{ M, A, 0, 0, "\\R+\x85", "\r\n\n\r#\r\x85\n" },
|
||||
{ MU, A, 0, 0, "\\R{2,4}", "\r\nab\r\r" },
|
||||
{ MU, A, 0, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
|
||||
{ MU, A, 0, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
|
||||
{ MU, A, 0, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
|
||||
{ MU, A, 0, 0, "\\R{2,4}\n", "\r\n\nab\r\r\nab\r\r\n\n" },
|
||||
{ MU, A, 0, 0, "\\R{2,4}\n", "\r\n\nab\n\n\n\r\r\n" },
|
||||
{ MU, A, 0, 0, "\\R{3,}\n", "\r\n\r\n\nab\n\n\n\r\r\n\n" },
|
||||
{ MU, A, 0, 0, "\\R{0,3}\n", "\r\n\r\n\r\n\n" },
|
||||
{ MU, A, 0, 0, "\\R{0,3}\n", "\r\n\r\n\r\n\r" },
|
||||
{ MU, A, 0, 0, "(\\R{0,3}\n;)+", "\r\n\r\n\r\n\r\n\n;\n;\n\n;\n\n\n;\n\n\n\n\n;" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
|
||||
{ MU, A, 0, 0, "\\R+\\R\\R", "\r\r\r" },
|
||||
{ MU, A, 0, 0, "\\R*\\R\\R", "\n\r" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
|
||||
{ MU, A, 0, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
|
||||
|
||||
/* Atomic groups (no fallback from "next" direction). */
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
|
||||
{ MU, A, 0, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
|
||||
"bababcdedefgheijijklmlmnop" },
|
||||
{ MU, A, 0, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
|
||||
{ MU, A, 0, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
|
||||
{ MU, A, 0, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
|
||||
{ MU, A, 0, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
|
||||
{ MU, A, 0, 0, "((?>a|)+?)b", "aaacaaab" },
|
||||
{ MU, A, 0, 0, "(?>x|)*$", "aaa" },
|
||||
{ MU, A, 0, 0, "(?>(x)|)*$", "aaa" },
|
||||
{ MU, A, 0, 0, "(?>x|())*$", "aaa" },
|
||||
{ MU, A, 0, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
|
||||
{ MU, A, 0, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
|
||||
{ MU, A, 0, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
|
||||
{ MU, A, 0, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
|
||||
{ MU, A, 0, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
|
||||
{ MU, A, 0, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
|
||||
{ MU, A, 0, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
|
||||
{ MU, A, 0, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
|
||||
{ MU, A, 0, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
|
||||
{ MU, A, 0, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
|
||||
{ MU, A, 0, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
|
||||
{ MU, A, 0, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
|
||||
{ MU, A, 0, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
|
||||
{ MU, A, 0, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
|
||||
{ CM, A, 0, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
|
||||
{ MU, A, 0, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
|
||||
{ MU, A, 0, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
|
||||
{ MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
|
||||
{ MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
|
||||
{ MU, A, 0, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
|
||||
{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
|
||||
{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
|
||||
{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
|
||||
{ MU, A, 0, 0, "(c(ab)?+ab)+", "cabcababcab" },
|
||||
{ MU, A, 0, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?>a*|)a", "aaa" },
|
||||
|
||||
/* Possessive quantifiers. */
|
||||
{ MU, A, 0, 0, "(?:a|b)++m", "mababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "(a|b)++m", "mababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "(a|b)*+m", "mababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "(a|b)*+m", "ababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "(a|b(*ACCEPT))++m", "maaxab" },
|
||||
{ MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxm" },
|
||||
{ MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
|
||||
{ MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxm" },
|
||||
{ MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
|
||||
{ MU, A, 0, 0, "(b*)++m", "bxbbxbbbxm" },
|
||||
{ MU, A, 0, 0, "(b*)++m", "bxbbxbbbxbbm" },
|
||||
{ MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxm" },
|
||||
{ MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxbbm" },
|
||||
{ MU, A, 0, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "(a|(b))++m", "mababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "((a)|b)*+m", "mababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
|
||||
{ MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxm" },
|
||||
{ MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
|
||||
{ MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
|
||||
{ MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
|
||||
{ MU, A, 0, 0, "((b*))++m", "bxbbxbbbxm" },
|
||||
{ MU, A, 0, 0, "((b*))++m", "bxbbxbbbxbbm" },
|
||||
{ MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxm" },
|
||||
{ MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxbbm" },
|
||||
{ MU, A, 0, 0, "(A)*+$", "ABC" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
|
||||
{ MU, A, 0, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
|
||||
{ MU, A, 0, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
|
||||
{ MU, A, 0, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
|
||||
{ MU, A, 0, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
|
||||
|
||||
/* Back references. */
|
||||
{ MU, A, 0, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
|
||||
{ CMU, A, 0, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
|
||||
{ CM, A, 0, 0, "(a{2,4})\\1", "AaAaaAaA" },
|
||||
{ MU, A, 0, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
|
||||
{ MU, A, 0, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
|
||||
{ MU, A, 0, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
|
||||
{ MU, A, 0, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
|
||||
{ MU, A, 0, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
|
||||
{ MU, A, 0, 0, "(?:(aa)|b)\\1?b", "bb" },
|
||||
{ CMU, A, 0, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
|
||||
{ MU, A, 0, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
|
||||
{ CMU, A, 0, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
|
||||
{ MU, A, 0, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
|
||||
{ CM, A, 0, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
|
||||
{ MU, A, 0, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
|
||||
{ MU, A, 0, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
|
||||
{ M, A, 0, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
|
||||
{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
|
||||
{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
|
||||
{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
|
||||
{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
|
||||
{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
|
||||
{ CMUP, A, 0, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
|
||||
{ MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
|
||||
{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
|
||||
{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
|
||||
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
|
||||
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
|
||||
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
|
||||
{ MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
|
||||
{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
|
||||
{ MU | PCRE2_DUPNAMES, A, 0, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
|
||||
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
|
||||
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
|
||||
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
|
||||
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
|
||||
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
|
||||
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
|
||||
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
|
||||
{ MU | PCRE2_DUPNAMES, A, 0, 0, "^(?P<NAME>..)(?P<NAME>..)\\k<NAME>{2,4}", "AaAAAaAaAaaA" },
|
||||
{ MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "(a)|\\1+c", "xxc" },
|
||||
{ MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\1+?()", "" },
|
||||
|
||||
/* Assertions. */
|
||||
{ MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
|
||||
{ MU, A, 0, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
|
||||
{ MU, A, 0, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
|
||||
{ MU, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
|
||||
{ MU, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
|
||||
{ M, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
|
||||
{ M, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
|
||||
{ MU, A, 0, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
|
||||
{ MU, A, 0, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
|
||||
{ MU, A, 0, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
|
||||
{ MU, A, 0, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
|
||||
{ MU, A, 0, 0, "((?(?=(a))a)+k)", "bbak" },
|
||||
{ MU, A, 0, 0, "((?(?=a)a)+k)", "bbak" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
|
||||
{ MU, A, 0, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
|
||||
{ MU, A, 0, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
|
||||
{ MU, A, 0, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
|
||||
{ MU, A, 0, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
|
||||
{ MU, A, 0, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
|
||||
{ MU, A, 0, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
|
||||
{ MU, A, 0, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
|
||||
{ MU, A, 0, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
|
||||
{ MU, A, 0, 0, "a(?=(?C)\\B(?C`x`))b", "ab" },
|
||||
{ MU, A, 0, 0, "a(?!(?C)\\B(?C`x`))bb|ab", "abb" },
|
||||
{ MU, A, 0, 0, "a(?=\\b|(?C)\\B(?C`x`))b", "ab" },
|
||||
{ MU, A, 0, 0, "a(?!\\b|(?C)\\B(?C`x`))bb|ab", "abb" },
|
||||
{ MU, A, 0, 0, "c(?(?=(?C)\\B(?C`x`))ab|a)", "cab" },
|
||||
{ MU, A, 0, 0, "c(?(?!(?C)\\B(?C`x`))ab|a)", "cab" },
|
||||
{ MU, A, 0, 0, "c(?(?=\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
|
||||
{ MU, A, 0, 0, "c(?(?!\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
|
||||
{ MU, A, 0, 0, "a(?=)b", "ab" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "a(?!)b", "ab" },
|
||||
{ MU, A, 0, 0, "(?(?<!|(|a)))", "a" },
|
||||
|
||||
/* Not empty, ACCEPT, FAIL */
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0, "a*", "bcaad" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0, "a*?", "bcaad" },
|
||||
{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
|
||||
{ MU, A, 0, 0, "a(*ACCEPT)b", "ab" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
|
||||
{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
|
||||
{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
|
||||
{ MU, A, 0, 0, "((a(*ACCEPT)b))", "ab" },
|
||||
{ MU, A, 0, 0, "(a(*FAIL)a|a)", "aaa" },
|
||||
{ MU, A, 0, 0, "(?=ab(*ACCEPT)b)a", "ab" },
|
||||
{ MU, A, 0, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
|
||||
{ MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?=A)", "AB" },
|
||||
{ MU | PCRE2_ENDANCHORED, A, 0, 0, "aa(*ACCEPT)aa", "aaa" },
|
||||
|
||||
/* Conditional blocks. */
|
||||
{ MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
|
||||
{ MU, A, 0, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
|
||||
{ MU, A, 0, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
|
||||
{ MU, A, 0, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
|
||||
{ MU, A, 0, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
|
||||
{ MU, A, 0, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
|
||||
{ MU, A, 0, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
|
||||
{ MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
|
||||
{ MU, A, 0, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
|
||||
{ MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
|
||||
{ MU, A, 0, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
|
||||
{ MU, A, 0, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
|
||||
{ MU, A, 0, 0, "(?(?=a)ab)", "a" },
|
||||
{ MU, A, 0, 0, "(?(?<!b)c)", "b" },
|
||||
{ MU, A, 0, 0, "(?(DEFINE)a(b))", "a" },
|
||||
{ MU, A, 0, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
|
||||
{ MU, A, 0, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
|
||||
{ MU, A, 0, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
|
||||
{ MU, A, 0, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
|
||||
{ MU, A, 0, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
|
||||
{ MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
|
||||
{ MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cbb" },
|
||||
{ MU, A, 0, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
|
||||
{ MU, A, 0, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
|
||||
{ MU, A, 0, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
|
||||
{ MU, A, 0, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
|
||||
{ MU, A, 0, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
|
||||
{ MU, A, 0, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
|
||||
{ MU, A, 0, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
|
||||
{ MU, A, 0, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
|
||||
{ MU, A, 0, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
|
||||
{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
|
||||
{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
|
||||
{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
|
||||
{ MU, A, 0, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
|
||||
{ MU, A, 0, 0, "(?(?!)a|b)", "ab" },
|
||||
{ MU, A, 0, 0, "(?(?!)a)", "ab" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?(?!)a|b)", "ac" },
|
||||
|
||||
/* Set start of match. */
|
||||
{ MU, A, 0, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
|
||||
{ MU, A, 0, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
|
||||
{ MU, A, 0, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
|
||||
{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
|
||||
|
||||
/* First line. */
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 0, "(?<=a)", "a" },
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[^a][^b]", "ab" },
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "a", "\na" },
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[abc]", "\na" },
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^a", "\na" },
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
|
||||
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\xc2\x85#" },
|
||||
{ M | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\x85#" },
|
||||
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
|
||||
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
|
||||
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
|
||||
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, "a", "\ra" },
|
||||
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
|
||||
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
|
||||
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 1, ".", "\r\n" },
|
||||
{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_LF, 0, 0 | F_NOMATCH, "ab.", "ab" },
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
|
||||
{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_ANY, 0, 0, "....a", "012\n0a" },
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 0, "[aC]", "a" },
|
||||
|
||||
/* Recurse. */
|
||||
{ MU, A, 0, 0, "(a)(?1)", "aa" },
|
||||
{ MU, A, 0, 0, "((a))(?1)", "aa" },
|
||||
{ MU, A, 0, 0, "(b|a)(?1)", "aa" },
|
||||
{ MU, A, 0, 0, "(b|(a))(?1)", "aa" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
|
||||
{ MU, A, 0, 0, "((a)(b)(?:a*))(?1)", "abab" },
|
||||
{ MU, A, 0, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
|
||||
{ MU, A, 0, 0, "((?2)b|(a)){2}(?1)", "aabab" },
|
||||
{ MU, A, 0, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
|
||||
{ MU, A, 0, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
|
||||
{ MU, A, 0, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
|
||||
{ MU, A, 0, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
|
||||
{ MU, A, 0, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
|
||||
{ MU, A, 0, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
|
||||
{ MU, A, 0, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
|
||||
{ MU, A, 0, 0, "b|<(?R)*>", "<<b>" },
|
||||
{ MU, A, 0, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
|
||||
{ MU, A, 0, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
|
||||
{ MU, A, 0, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
|
||||
{ MU, A, 0, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
|
||||
{ MU, A, 0, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
|
||||
{ MU, A, 0, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
|
||||
{ MU, A, 0, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
|
||||
{ MU, A, 0, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
|
||||
{ MU, A, 0, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
|
||||
{ MU, A, 0, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
|
||||
{ MU, A, 0, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
|
||||
{ MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
|
||||
{ MU, A, 0, 0, "((.)(?:.|\\2(?1))){0}#(?1)#", "#aabbccdde# #aabbccddee#" },
|
||||
{ MU, A, 0, 0, "((.)(?:\\2|\\2{4}b)){0}#(?:(?1))+#", "#aaaab# #aaaaab#" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?1)$((.|\\2xx){1,2})", "abc" },
|
||||
|
||||
/* 16 bit specific tests. */
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
|
||||
{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
|
||||
{ CM, A, 0, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
|
||||
{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
|
||||
{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
|
||||
{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
|
||||
{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
|
||||
{ M, A, 0, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
|
||||
{ M, A, 0, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
|
||||
{ CM | PCRE2_EXTENDED, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
|
||||
{ M, PCRE2_NEWLINE_ANY, 0, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
|
||||
{ 0, BSR(PCRE2_BSR_UNICODE), 0, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
|
||||
{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
|
||||
{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
|
||||
{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
|
||||
{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
|
||||
|
||||
/* Partial matching. */
|
||||
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "ab", "a" },
|
||||
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "ab|a", "a" },
|
||||
{ MU, A, PCRE2_PARTIAL_HARD, 0, "ab|a", "a" },
|
||||
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "\\b#", "a" },
|
||||
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
|
||||
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
|
||||
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a\\B", "a" },
|
||||
{ MU, A, PCRE2_PARTIAL_HARD, 0, "a\\b", "a" },
|
||||
{ M | PCRE2_DUPNAMES, A, PCRE2_PARTIAL_HARD, 0, "^(?P<NAME>..)(?P<NAME>..)\\k<NAME>{2,4}", "AaAAAaAaAaA" },
|
||||
{ M | PCRE2_DUPNAMES, A, PCRE2_PARTIAL_HARD, 0, "^(?P<NAME>..)(?P<NAME>..)\\k<NAME>{2,4}", "AaAAAaAaAaa" },
|
||||
|
||||
/* (*MARK) verb. */
|
||||
{ MU, A, 0, 0, "a(*MARK:aa)a", "ababaa" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
|
||||
{ MU, A, 0, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
|
||||
{ MU, A, 0, 0, "(?>a(*:aa))b|ac", "ac" },
|
||||
{ MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
|
||||
{ MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
|
||||
{ MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
|
||||
{ MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
|
||||
{ MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
|
||||
{ MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(*:mark)m", "a" },
|
||||
|
||||
/* (*COMMIT) verb. */
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
|
||||
{ MU, A, 0, 0, "aa(*COMMIT)b", "xaxaab" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
|
||||
|
||||
/* (*PRUNE) verb. */
|
||||
{ MU, A, 0, 0, "aa\\K(*PRUNE)b", "aaab" },
|
||||
{ MU, A, 0, 0, "aa(*PRUNE:bb)b|a", "aa" },
|
||||
{ MU, A, 0, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
|
||||
{ MU, A, 0, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
|
||||
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
|
||||
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
|
||||
{ MU, A, 0, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
|
||||
{ MU, A, 0, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
|
||||
{ MU, A, 0, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
|
||||
{ MU, A, 0, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
|
||||
{ MU, A, 0, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
|
||||
{ MU, A, 0, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
|
||||
{ MU, A, 0, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
|
||||
{ MU, A, 0, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
|
||||
{ MU, A, 0, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
|
||||
{ MU, A, 0, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
|
||||
{ MU, A, 0, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
|
||||
{ MU, A, 0, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
|
||||
|
||||
/* (*SKIP) verb. */
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
|
||||
{ MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," },
|
||||
{ MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" },
|
||||
|
||||
/* (*THEN) verb. */
|
||||
{ MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
|
||||
{ MU, A, 0, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
|
||||
{ MU, A, 0, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
|
||||
{ MU, A, 0, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
|
||||
{ MU, A, 0, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
|
||||
{ MU, A, 0, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
|
||||
{ MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
|
||||
{ MU, A, 0, 0, "(?=(*THEN: ))* ", " " },
|
||||
{ MU, A, 0, 0, "a(*THEN)(?R) |", "a" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?<!(*THEN)a|(*THEN)b|(*THEN)ab?|(*THEN)ba?|)", "c" },
|
||||
|
||||
/* Recurse and control verbs. */
|
||||
{ MU, A, 0, 0, "(a(*ACCEPT)b){0}a(?1)b", "aacaabb" },
|
||||
{ MU, A, 0, 0, "((a)\\2(*ACCEPT)b){0}a(?1)b", "aaacaaabb" },
|
||||
{ MU, A, 0, 0, "((ab|a(*ACCEPT)x)+|ababababax){0}_(?1)_", "_ababababax_ _ababababa_" },
|
||||
{ MU, A, 0, 0, "((.)(?:A(*ACCEPT)|(?1)\\2)){0}_(?1)_", "_bcdaAdcb_bcdaAdcb_" },
|
||||
{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_", "_ab_" },
|
||||
{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_|(_aa_)", "_aa_" },
|
||||
{ MU, A, 0, 0, "(a(*COMMIT)(?:b|bb)|c(*ACCEPT)d|dd){0}_(?1)+_", "_ax_ _cd_ _abbb_ _abcd_ _abbcdd_" },
|
||||
{ MU, A, 0, 0, "((.)(?:.|(*COMMIT)\\2{3}(*ACCEPT).*|.*)){0}_(?1){0,4}_", "_aaaabbbbccccddd_ _aaaabbbbccccdddd_" },
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/* Script runs and iterations. */
|
||||
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
||||
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
||||
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
||||
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
||||
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
||||
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)++#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
||||
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)?#", "!ab!abc!ab!ab#" },
|
||||
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)??#", "!ab!abc!ab!ab#" },
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Deep recursion. */
|
||||
{ MU, A, 0, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
|
||||
{ MU, A, 0, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
|
||||
{ MU, A, 0, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
|
||||
|
||||
/* Deep recursion: Stack limit reached. */
|
||||
{ M, A, 0, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
|
||||
|
||||
{ 0, 0, 0, 0, NULL, NULL }
|
||||
};
|
||||
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
static pcre2_jit_stack_8* callback8(void *arg)
|
||||
{
|
||||
return (pcre2_jit_stack_8 *)arg;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
static pcre2_jit_stack_16* callback16(void *arg)
|
||||
{
|
||||
return (pcre2_jit_stack_16 *)arg;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
static pcre2_jit_stack_32* callback32(void *arg)
|
||||
{
|
||||
return (pcre2_jit_stack_32 *)arg;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
static pcre2_jit_stack_8 *stack8;
|
||||
|
||||
static pcre2_jit_stack_8 *getstack8(void)
|
||||
{
|
||||
if (!stack8)
|
||||
stack8 = pcre2_jit_stack_create_8(1, 1024 * 1024, NULL);
|
||||
return stack8;
|
||||
}
|
||||
|
||||
static void setstack8(pcre2_match_context_8 *mcontext)
|
||||
{
|
||||
if (!mcontext) {
|
||||
if (stack8)
|
||||
pcre2_jit_stack_free_8(stack8);
|
||||
stack8 = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
pcre2_jit_stack_assign_8(mcontext, callback8, getstack8());
|
||||
}
|
||||
#endif /* SUPPORT_PCRE2_8 */
|
||||
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
static pcre2_jit_stack_16 *stack16;
|
||||
|
||||
static pcre2_jit_stack_16 *getstack16(void)
|
||||
{
|
||||
if (!stack16)
|
||||
stack16 = pcre2_jit_stack_create_16(1, 1024 * 1024, NULL);
|
||||
return stack16;
|
||||
}
|
||||
|
||||
static void setstack16(pcre2_match_context_16 *mcontext)
|
||||
{
|
||||
if (!mcontext) {
|
||||
if (stack16)
|
||||
pcre2_jit_stack_free_16(stack16);
|
||||
stack16 = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
pcre2_jit_stack_assign_16(mcontext, callback16, getstack16());
|
||||
}
|
||||
#endif /* SUPPORT_PCRE2_16 */
|
||||
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
static pcre2_jit_stack_32 *stack32;
|
||||
|
||||
static pcre2_jit_stack_32 *getstack32(void)
|
||||
{
|
||||
if (!stack32)
|
||||
stack32 = pcre2_jit_stack_create_32(1, 1024 * 1024, NULL);
|
||||
return stack32;
|
||||
}
|
||||
|
||||
static void setstack32(pcre2_match_context_32 *mcontext)
|
||||
{
|
||||
if (!mcontext) {
|
||||
if (stack32)
|
||||
pcre2_jit_stack_free_32(stack32);
|
||||
stack32 = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
pcre2_jit_stack_assign_32(mcontext, callback32, getstack32());
|
||||
}
|
||||
#endif /* SUPPORT_PCRE2_32 */
|
||||
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
|
||||
static int convert_utf8_to_utf16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int *offsetmap, int max_length)
|
||||
{
|
||||
PCRE2_SPTR8 iptr = input;
|
||||
PCRE2_UCHAR16 *optr = output;
|
||||
unsigned int c;
|
||||
|
||||
if (max_length == 0)
|
||||
return 0;
|
||||
|
||||
while (*iptr && max_length > 1) {
|
||||
c = 0;
|
||||
if (offsetmap)
|
||||
*offsetmap++ = (int)(iptr - (unsigned char*)input);
|
||||
|
||||
if (*iptr < 0xc0)
|
||||
c = *iptr++;
|
||||
else if (!(*iptr & 0x20)) {
|
||||
c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
|
||||
iptr += 2;
|
||||
} else if (!(*iptr & 0x10)) {
|
||||
c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
|
||||
iptr += 3;
|
||||
} else if (!(*iptr & 0x08)) {
|
||||
c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
|
||||
iptr += 4;
|
||||
}
|
||||
|
||||
if (c < 65536) {
|
||||
*optr++ = c;
|
||||
max_length--;
|
||||
} else if (max_length <= 2) {
|
||||
*optr = '\0';
|
||||
return (int)(optr - output);
|
||||
} else {
|
||||
c -= 0x10000;
|
||||
*optr++ = 0xd800 | ((c >> 10) & 0x3ff);
|
||||
*optr++ = 0xdc00 | (c & 0x3ff);
|
||||
max_length -= 2;
|
||||
if (offsetmap)
|
||||
offsetmap++;
|
||||
}
|
||||
}
|
||||
if (offsetmap)
|
||||
*offsetmap = (int)(iptr - (unsigned char*)input);
|
||||
*optr = '\0';
|
||||
return (int)(optr - output);
|
||||
}
|
||||
|
||||
static int copy_char8_to_char16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int max_length)
|
||||
{
|
||||
PCRE2_SPTR8 iptr = input;
|
||||
PCRE2_UCHAR16 *optr = output;
|
||||
|
||||
if (max_length == 0)
|
||||
return 0;
|
||||
|
||||
while (*iptr && max_length > 1) {
|
||||
*optr++ = *iptr++;
|
||||
max_length--;
|
||||
}
|
||||
*optr = '\0';
|
||||
return (int)(optr - output);
|
||||
}
|
||||
|
||||
#define REGTEST_MAX_LENGTH16 4096
|
||||
static PCRE2_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
|
||||
static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
|
||||
|
||||
#endif /* SUPPORT_PCRE2_16 */
|
||||
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
|
||||
static int convert_utf8_to_utf32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int *offsetmap, int max_length)
|
||||
{
|
||||
PCRE2_SPTR8 iptr = input;
|
||||
PCRE2_UCHAR32 *optr = output;
|
||||
unsigned int c;
|
||||
|
||||
if (max_length == 0)
|
||||
return 0;
|
||||
|
||||
while (*iptr && max_length > 1) {
|
||||
c = 0;
|
||||
if (offsetmap)
|
||||
*offsetmap++ = (int)(iptr - (unsigned char*)input);
|
||||
|
||||
if (*iptr < 0xc0)
|
||||
c = *iptr++;
|
||||
else if (!(*iptr & 0x20)) {
|
||||
c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
|
||||
iptr += 2;
|
||||
} else if (!(*iptr & 0x10)) {
|
||||
c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
|
||||
iptr += 3;
|
||||
} else if (!(*iptr & 0x08)) {
|
||||
c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
|
||||
iptr += 4;
|
||||
}
|
||||
|
||||
*optr++ = c;
|
||||
max_length--;
|
||||
}
|
||||
if (offsetmap)
|
||||
*offsetmap = (int)(iptr - (unsigned char*)input);
|
||||
*optr = 0;
|
||||
return (int)(optr - output);
|
||||
}
|
||||
|
||||
static int copy_char8_to_char32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int max_length)
|
||||
{
|
||||
PCRE2_SPTR8 iptr = input;
|
||||
PCRE2_UCHAR32 *optr = output;
|
||||
|
||||
if (max_length == 0)
|
||||
return 0;
|
||||
|
||||
while (*iptr && max_length > 1) {
|
||||
*optr++ = *iptr++;
|
||||
max_length--;
|
||||
}
|
||||
*optr = '\0';
|
||||
return (int)(optr - output);
|
||||
}
|
||||
|
||||
#define REGTEST_MAX_LENGTH32 4096
|
||||
static PCRE2_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
|
||||
static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
|
||||
|
||||
#endif /* SUPPORT_PCRE2_32 */
|
||||
|
||||
static int check_ascii(const char *input)
|
||||
{
|
||||
const unsigned char *ptr = (unsigned char *)input;
|
||||
while (*ptr) {
|
||||
if (*ptr > 127)
|
||||
return 0;
|
||||
ptr++;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
#define OVECTOR_SIZE 15
|
||||
|
||||
static int regression_tests(void)
|
||||
{
|
||||
struct regression_test_case *current = regression_test_cases;
|
||||
int error;
|
||||
PCRE2_SIZE err_offs;
|
||||
int is_successful;
|
||||
int is_ascii;
|
||||
int total = 0;
|
||||
int successful = 0;
|
||||
int successful_row = 0;
|
||||
int counter = 0;
|
||||
int jit_compile_mode;
|
||||
int utf = 0;
|
||||
uint32_t disabled_options = 0;
|
||||
int i;
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
pcre2_code_8 *re8;
|
||||
pcre2_compile_context_8 *ccontext8;
|
||||
pcre2_match_data_8 *mdata8_1;
|
||||
pcre2_match_data_8 *mdata8_2;
|
||||
pcre2_match_context_8 *mcontext8;
|
||||
PCRE2_SIZE *ovector8_1 = NULL;
|
||||
PCRE2_SIZE *ovector8_2 = NULL;
|
||||
int return_value8[2];
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
pcre2_code_16 *re16;
|
||||
pcre2_compile_context_16 *ccontext16;
|
||||
pcre2_match_data_16 *mdata16_1;
|
||||
pcre2_match_data_16 *mdata16_2;
|
||||
pcre2_match_context_16 *mcontext16;
|
||||
PCRE2_SIZE *ovector16_1 = NULL;
|
||||
PCRE2_SIZE *ovector16_2 = NULL;
|
||||
int return_value16[2];
|
||||
int length16;
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
pcre2_code_32 *re32;
|
||||
pcre2_compile_context_32 *ccontext32;
|
||||
pcre2_match_data_32 *mdata32_1;
|
||||
pcre2_match_data_32 *mdata32_2;
|
||||
pcre2_match_context_32 *mcontext32;
|
||||
PCRE2_SIZE *ovector32_1 = NULL;
|
||||
PCRE2_SIZE *ovector32_2 = NULL;
|
||||
int return_value32[2];
|
||||
int length32;
|
||||
#endif
|
||||
|
||||
#if defined SUPPORT_PCRE2_8
|
||||
PCRE2_UCHAR8 cpu_info[128];
|
||||
#elif defined SUPPORT_PCRE2_16
|
||||
PCRE2_UCHAR16 cpu_info[128];
|
||||
#elif defined SUPPORT_PCRE2_32
|
||||
PCRE2_UCHAR32 cpu_info[128];
|
||||
#endif
|
||||
#if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
|
||||
int return_value;
|
||||
#endif
|
||||
|
||||
/* This test compares the behaviour of interpreter and JIT. Although disabling
|
||||
utf or ucp may make tests fail, if the pcre2_match result is the SAME, it is
|
||||
still considered successful from pcre2_jit_test point of view. */
|
||||
|
||||
#if defined SUPPORT_PCRE2_8
|
||||
pcre2_config_8(PCRE2_CONFIG_JITTARGET, &cpu_info);
|
||||
#elif defined SUPPORT_PCRE2_16
|
||||
pcre2_config_16(PCRE2_CONFIG_JITTARGET, &cpu_info);
|
||||
#elif defined SUPPORT_PCRE2_32
|
||||
pcre2_config_32(PCRE2_CONFIG_JITTARGET, &cpu_info);
|
||||
#endif
|
||||
|
||||
printf("Running JIT regression tests\n");
|
||||
printf(" target CPU of SLJIT compiler: ");
|
||||
for (i = 0; cpu_info[i]; i++)
|
||||
printf("%c", (char)(cpu_info[i]));
|
||||
printf("\n");
|
||||
|
||||
#if defined SUPPORT_PCRE2_8
|
||||
pcre2_config_8(PCRE2_CONFIG_UNICODE, &utf);
|
||||
#elif defined SUPPORT_PCRE2_16
|
||||
pcre2_config_16(PCRE2_CONFIG_UNICODE, &utf);
|
||||
#elif defined SUPPORT_PCRE2_32
|
||||
pcre2_config_32(PCRE2_CONFIG_UNICODE, &utf);
|
||||
#endif
|
||||
|
||||
if (!utf)
|
||||
disabled_options |= PCRE2_UTF;
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
printf(" in 8 bit mode with UTF-8 %s:\n", utf ? "enabled" : "disabled");
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
printf(" in 16 bit mode with UTF-16 %s:\n", utf ? "enabled" : "disabled");
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
printf(" in 32 bit mode with UTF-32 %s:\n", utf ? "enabled" : "disabled");
|
||||
#endif
|
||||
|
||||
while (current->pattern) {
|
||||
/* printf("\nPattern: %s :\n", current->pattern); */
|
||||
total++;
|
||||
is_ascii = 0;
|
||||
if (!(current->start_offset & F_PROPERTY))
|
||||
is_ascii = check_ascii(current->pattern) && check_ascii(current->input);
|
||||
|
||||
if (current->match_options & PCRE2_PARTIAL_SOFT)
|
||||
jit_compile_mode = PCRE2_JIT_PARTIAL_SOFT;
|
||||
else if (current->match_options & PCRE2_PARTIAL_HARD)
|
||||
jit_compile_mode = PCRE2_JIT_PARTIAL_HARD;
|
||||
else
|
||||
jit_compile_mode = PCRE2_JIT_COMPLETE;
|
||||
error = 0;
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
re8 = NULL;
|
||||
ccontext8 = pcre2_compile_context_create_8(NULL);
|
||||
if (ccontext8) {
|
||||
if (GET_NEWLINE(current->newline))
|
||||
pcre2_set_newline_8(ccontext8, GET_NEWLINE(current->newline));
|
||||
if (GET_BSR(current->newline))
|
||||
pcre2_set_bsr_8(ccontext8, GET_BSR(current->newline));
|
||||
|
||||
if (!(current->start_offset & F_NO8)) {
|
||||
re8 = pcre2_compile_8((PCRE2_SPTR8)current->pattern, PCRE2_ZERO_TERMINATED,
|
||||
current->compile_options & ~disabled_options,
|
||||
&error, &err_offs, ccontext8);
|
||||
|
||||
if (!re8 && (utf || is_ascii))
|
||||
printf("\n8 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
|
||||
}
|
||||
pcre2_compile_context_free_8(ccontext8);
|
||||
}
|
||||
else
|
||||
printf("\n8 bit: Cannot allocate compile context\n");
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
|
||||
convert_utf8_to_utf16((PCRE2_SPTR8)current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
|
||||
else
|
||||
copy_char8_to_char16((PCRE2_SPTR8)current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
|
||||
|
||||
re16 = NULL;
|
||||
ccontext16 = pcre2_compile_context_create_16(NULL);
|
||||
if (ccontext16) {
|
||||
if (GET_NEWLINE(current->newline))
|
||||
pcre2_set_newline_16(ccontext16, GET_NEWLINE(current->newline));
|
||||
if (GET_BSR(current->newline))
|
||||
pcre2_set_bsr_16(ccontext16, GET_BSR(current->newline));
|
||||
|
||||
if (!(current->start_offset & F_NO16)) {
|
||||
re16 = pcre2_compile_16(regtest_buf16, PCRE2_ZERO_TERMINATED,
|
||||
current->compile_options & ~disabled_options,
|
||||
&error, &err_offs, ccontext16);
|
||||
|
||||
if (!re16 && (utf || is_ascii))
|
||||
printf("\n16 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
|
||||
}
|
||||
pcre2_compile_context_free_16(ccontext16);
|
||||
}
|
||||
else
|
||||
printf("\n16 bit: Cannot allocate compile context\n");
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
|
||||
convert_utf8_to_utf32((PCRE2_SPTR8)current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
|
||||
else
|
||||
copy_char8_to_char32((PCRE2_SPTR8)current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
|
||||
|
||||
re32 = NULL;
|
||||
ccontext32 = pcre2_compile_context_create_32(NULL);
|
||||
if (ccontext32) {
|
||||
if (GET_NEWLINE(current->newline))
|
||||
pcre2_set_newline_32(ccontext32, GET_NEWLINE(current->newline));
|
||||
if (GET_BSR(current->newline))
|
||||
pcre2_set_bsr_32(ccontext32, GET_BSR(current->newline));
|
||||
|
||||
if (!(current->start_offset & F_NO32)) {
|
||||
re32 = pcre2_compile_32(regtest_buf32, PCRE2_ZERO_TERMINATED,
|
||||
current->compile_options & ~disabled_options,
|
||||
&error, &err_offs, ccontext32);
|
||||
|
||||
if (!re32 && (utf || is_ascii))
|
||||
printf("\n32 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
|
||||
}
|
||||
pcre2_compile_context_free_32(ccontext32);
|
||||
}
|
||||
else
|
||||
printf("\n32 bit: Cannot allocate compile context\n");
|
||||
#endif
|
||||
|
||||
counter++;
|
||||
if ((counter & 0x3) != 0) {
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
setstack8(NULL);
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
setstack16(NULL);
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
setstack32(NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
return_value8[0] = -1000;
|
||||
return_value8[1] = -1000;
|
||||
mdata8_1 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
|
||||
mdata8_2 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
|
||||
mcontext8 = pcre2_match_context_create_8(NULL);
|
||||
if (!mdata8_1 || !mdata8_2 || !mcontext8) {
|
||||
printf("\n8 bit: Cannot allocate match data\n");
|
||||
pcre2_match_data_free_8(mdata8_1);
|
||||
pcre2_match_data_free_8(mdata8_2);
|
||||
pcre2_match_context_free_8(mcontext8);
|
||||
pcre2_code_free_8(re8);
|
||||
re8 = NULL;
|
||||
} else {
|
||||
ovector8_1 = pcre2_get_ovector_pointer_8(mdata8_1);
|
||||
ovector8_2 = pcre2_get_ovector_pointer_8(mdata8_2);
|
||||
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||
ovector8_1[i] = (PCRE2_SIZE)(-2);
|
||||
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||
ovector8_2[i] = (PCRE2_SIZE)(-2);
|
||||
pcre2_set_match_limit_8(mcontext8, 10000000);
|
||||
}
|
||||
if (re8) {
|
||||
return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, mcontext8);
|
||||
|
||||
if (pcre2_jit_compile_8(re8, jit_compile_mode)) {
|
||||
printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern);
|
||||
} else if ((counter & 0x1) != 0) {
|
||||
setstack8(mcontext8);
|
||||
return_value8[0] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
|
||||
} else {
|
||||
pcre2_jit_stack_assign_8(mcontext8, NULL, getstack8());
|
||||
return_value8[0] = pcre2_jit_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
return_value16[0] = -1000;
|
||||
return_value16[1] = -1000;
|
||||
mdata16_1 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
|
||||
mdata16_2 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
|
||||
mcontext16 = pcre2_match_context_create_16(NULL);
|
||||
if (!mdata16_1 || !mdata16_2 || !mcontext16) {
|
||||
printf("\n16 bit: Cannot allocate match data\n");
|
||||
pcre2_match_data_free_16(mdata16_1);
|
||||
pcre2_match_data_free_16(mdata16_2);
|
||||
pcre2_match_context_free_16(mcontext16);
|
||||
pcre2_code_free_16(re16);
|
||||
re16 = NULL;
|
||||
} else {
|
||||
ovector16_1 = pcre2_get_ovector_pointer_16(mdata16_1);
|
||||
ovector16_2 = pcre2_get_ovector_pointer_16(mdata16_2);
|
||||
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||
ovector16_1[i] = (PCRE2_SIZE)(-2);
|
||||
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||
ovector16_2[i] = (PCRE2_SIZE)(-2);
|
||||
pcre2_set_match_limit_16(mcontext16, 10000000);
|
||||
}
|
||||
if (re16) {
|
||||
if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
|
||||
length16 = convert_utf8_to_utf16((PCRE2_SPTR8)current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
|
||||
else
|
||||
length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
|
||||
|
||||
return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16,
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, mcontext16);
|
||||
|
||||
if (pcre2_jit_compile_16(re16, jit_compile_mode)) {
|
||||
printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern);
|
||||
} else if ((counter & 0x1) != 0) {
|
||||
setstack16(mcontext16);
|
||||
return_value16[0] = pcre2_match_16(re16, regtest_buf16, length16,
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
|
||||
} else {
|
||||
pcre2_jit_stack_assign_16(mcontext16, NULL, getstack16());
|
||||
return_value16[0] = pcre2_jit_match_16(re16, regtest_buf16, length16,
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
return_value32[0] = -1000;
|
||||
return_value32[1] = -1000;
|
||||
mdata32_1 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
|
||||
mdata32_2 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
|
||||
mcontext32 = pcre2_match_context_create_32(NULL);
|
||||
if (!mdata32_1 || !mdata32_2 || !mcontext32) {
|
||||
printf("\n32 bit: Cannot allocate match data\n");
|
||||
pcre2_match_data_free_32(mdata32_1);
|
||||
pcre2_match_data_free_32(mdata32_2);
|
||||
pcre2_match_context_free_32(mcontext32);
|
||||
pcre2_code_free_32(re32);
|
||||
re32 = NULL;
|
||||
} else {
|
||||
ovector32_1 = pcre2_get_ovector_pointer_32(mdata32_1);
|
||||
ovector32_2 = pcre2_get_ovector_pointer_32(mdata32_2);
|
||||
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||
ovector32_1[i] = (PCRE2_SIZE)(-2);
|
||||
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||
ovector32_2[i] = (PCRE2_SIZE)(-2);
|
||||
pcre2_set_match_limit_32(mcontext32, 10000000);
|
||||
}
|
||||
if (re32) {
|
||||
if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
|
||||
length32 = convert_utf8_to_utf32((PCRE2_SPTR8)current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
|
||||
else
|
||||
length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
|
||||
|
||||
return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32,
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, mcontext32);
|
||||
|
||||
if (pcre2_jit_compile_32(re32, jit_compile_mode)) {
|
||||
printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern);
|
||||
} else if ((counter & 0x1) != 0) {
|
||||
setstack32(mcontext32);
|
||||
return_value32[0] = pcre2_match_32(re32, regtest_buf32, length32,
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
|
||||
} else {
|
||||
pcre2_jit_stack_assign_32(mcontext32, NULL, getstack32());
|
||||
return_value32[0] = pcre2_jit_match_32(re32, regtest_buf32, length32,
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
|
||||
return_value8[0], return_value16[0], return_value32[0],
|
||||
(int)ovector8_1[0], (int)ovector8_1[1],
|
||||
(int)ovector16_1[0], (int)ovector16_1[1],
|
||||
(int)ovector32_1[0], (int)ovector32_1[1],
|
||||
(current->compile_options & PCRE2_CASELESS) ? "C" : ""); */
|
||||
|
||||
/* If F_DIFF is set, just run the test, but do not compare the results.
|
||||
Segfaults can still be captured. */
|
||||
|
||||
is_successful = 1;
|
||||
if (!(current->start_offset & F_DIFF)) {
|
||||
#if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
|
||||
if (!(current->start_offset & F_FORCECONV)) {
|
||||
|
||||
/* All results must be the same. */
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
if ((return_value = return_value8[0]) != return_value8[1]) {
|
||||
printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
|
||||
return_value8[0], return_value8[1], total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
} else
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
if ((return_value = return_value16[0]) != return_value16[1]) {
|
||||
printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
|
||||
return_value16[0], return_value16[1], total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
} else
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
if ((return_value = return_value32[0]) != return_value32[1]) {
|
||||
printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
|
||||
return_value32[0], return_value32[1], total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
} else
|
||||
#endif
|
||||
#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
|
||||
if (return_value8[0] != return_value16[0]) {
|
||||
printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
|
||||
return_value8[0], return_value16[0],
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
} else
|
||||
#endif
|
||||
#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
|
||||
if (return_value8[0] != return_value32[0]) {
|
||||
printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
|
||||
return_value8[0], return_value32[0],
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
} else
|
||||
#endif
|
||||
#if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
|
||||
if (return_value16[0] != return_value32[0]) {
|
||||
printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
|
||||
return_value16[0], return_value32[0],
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
} else
|
||||
#endif
|
||||
if (return_value >= 0 || return_value == PCRE2_ERROR_PARTIAL) {
|
||||
if (return_value == PCRE2_ERROR_PARTIAL) {
|
||||
return_value = 2;
|
||||
} else {
|
||||
return_value *= 2;
|
||||
}
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
return_value8[0] = return_value;
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
return_value16[0] = return_value;
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
return_value32[0] = return_value;
|
||||
#endif
|
||||
/* Transform back the results. */
|
||||
if (current->compile_options & PCRE2_UTF) {
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
for (i = 0; i < return_value; ++i) {
|
||||
if (ovector16_1[i] != PCRE2_UNSET)
|
||||
ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
|
||||
if (ovector16_2[i] != PCRE2_UNSET)
|
||||
ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
|
||||
}
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
for (i = 0; i < return_value; ++i) {
|
||||
if (ovector32_1[i] != PCRE2_UNSET)
|
||||
ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
|
||||
if (ovector32_2[i] != PCRE2_UNSET)
|
||||
ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
for (i = 0; i < return_value; ++i) {
|
||||
#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
|
||||
if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
|
||||
printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
|
||||
i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector16_1[i], (int)ovector16_2[i],
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
#endif
|
||||
#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
|
||||
if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
|
||||
printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
|
||||
i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
#endif
|
||||
#if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
|
||||
if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) {
|
||||
printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
|
||||
i, (int)ovector16_1[i], (int)ovector16_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
} else
|
||||
#endif /* more than one of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16 and SUPPORT_PCRE2_32 */
|
||||
{
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
if (return_value8[0] != return_value8[1]) {
|
||||
printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
||||
return_value8[0], return_value8[1], total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
} else if (return_value8[0] >= 0 || return_value8[0] == PCRE2_ERROR_PARTIAL) {
|
||||
if (return_value8[0] == PCRE2_ERROR_PARTIAL)
|
||||
return_value8[0] = 2;
|
||||
else
|
||||
return_value8[0] *= 2;
|
||||
|
||||
for (i = 0; i < return_value8[0]; ++i)
|
||||
if (ovector8_1[i] != ovector8_2[i]) {
|
||||
printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
||||
i, (int)ovector8_1[i], (int)ovector8_2[i], total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
if (return_value16[0] != return_value16[1]) {
|
||||
printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
||||
return_value16[0], return_value16[1], total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
} else if (return_value16[0] >= 0 || return_value16[0] == PCRE2_ERROR_PARTIAL) {
|
||||
if (return_value16[0] == PCRE2_ERROR_PARTIAL)
|
||||
return_value16[0] = 2;
|
||||
else
|
||||
return_value16[0] *= 2;
|
||||
|
||||
for (i = 0; i < return_value16[0]; ++i)
|
||||
if (ovector16_1[i] != ovector16_2[i]) {
|
||||
printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
||||
i, (int)ovector16_1[i], (int)ovector16_2[i], total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
if (return_value32[0] != return_value32[1]) {
|
||||
printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
||||
return_value32[0], return_value32[1], total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
} else if (return_value32[0] >= 0 || return_value32[0] == PCRE2_ERROR_PARTIAL) {
|
||||
if (return_value32[0] == PCRE2_ERROR_PARTIAL)
|
||||
return_value32[0] = 2;
|
||||
else
|
||||
return_value32[0] *= 2;
|
||||
|
||||
for (i = 0; i < return_value32[0]; ++i)
|
||||
if (ovector32_1[i] != ovector32_2[i]) {
|
||||
printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
||||
i, (int)ovector32_1[i], (int)ovector32_2[i], total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
if (is_successful) {
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
if (!(current->start_offset & F_NO8) && (utf || is_ascii)) {
|
||||
if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
|
||||
printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
|
||||
if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
|
||||
printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
if (!(current->start_offset & F_NO16) && (utf || is_ascii)) {
|
||||
if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
|
||||
printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
|
||||
if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
|
||||
printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
if (!(current->start_offset & F_NO32) && (utf || is_ascii)) {
|
||||
if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
|
||||
printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
|
||||
if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
|
||||
printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (is_successful) {
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
if (re8 && !(current->start_offset & F_NO8) && pcre2_get_mark_8(mdata8_1) != pcre2_get_mark_8(mdata8_2)) {
|
||||
printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
if (re16 && !(current->start_offset & F_NO16) && pcre2_get_mark_16(mdata16_1) != pcre2_get_mark_16(mdata16_2)) {
|
||||
printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
if (re32 && !(current->start_offset & F_NO32) && pcre2_get_mark_32(mdata32_1) != pcre2_get_mark_32(mdata32_2)) {
|
||||
printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
pcre2_code_free_8(re8);
|
||||
pcre2_match_data_free_8(mdata8_1);
|
||||
pcre2_match_data_free_8(mdata8_2);
|
||||
pcre2_match_context_free_8(mcontext8);
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
pcre2_code_free_16(re16);
|
||||
pcre2_match_data_free_16(mdata16_1);
|
||||
pcre2_match_data_free_16(mdata16_2);
|
||||
pcre2_match_context_free_16(mcontext16);
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
pcre2_code_free_32(re32);
|
||||
pcre2_match_data_free_32(mdata32_1);
|
||||
pcre2_match_data_free_32(mdata32_2);
|
||||
pcre2_match_context_free_32(mcontext32);
|
||||
#endif
|
||||
|
||||
if (is_successful) {
|
||||
successful++;
|
||||
successful_row++;
|
||||
printf(".");
|
||||
if (successful_row >= 60) {
|
||||
successful_row = 0;
|
||||
printf("\n");
|
||||
}
|
||||
} else
|
||||
successful_row = 0;
|
||||
|
||||
fflush(stdout);
|
||||
current++;
|
||||
}
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
setstack8(NULL);
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
setstack16(NULL);
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
setstack32(NULL);
|
||||
#endif
|
||||
|
||||
if (total == successful) {
|
||||
printf("\nAll JIT regression tests are successfully passed.\n");
|
||||
return 0;
|
||||
} else {
|
||||
printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined SUPPORT_UNICODE
|
||||
|
||||
static int check_invalid_utf_result(int pattern_index, const char *type, int result,
|
||||
int match_start, int match_end, PCRE2_SIZE *ovector)
|
||||
{
|
||||
if (match_start < 0) {
|
||||
if (result != -1) {
|
||||
printf("Pattern[%d] %s result is not -1.\n", pattern_index, type);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (result <= 0) {
|
||||
printf("Pattern[%d] %s result (%d) is not greater than 0.\n", pattern_index, type, result);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (ovector[0] != (PCRE2_SIZE)match_start) {
|
||||
printf("Pattern[%d] %s ovector[0] is unexpected (%d instead of %d)\n",
|
||||
pattern_index, type, (int)ovector[0], match_start);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (ovector[1] != (PCRE2_SIZE)match_end) {
|
||||
printf("Pattern[%d] %s ovector[1] is unexpected (%d instead of %d)\n",
|
||||
pattern_index, type, (int)ovector[1], match_end);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_8
|
||||
|
||||
#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
|
||||
#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
|
||||
#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
|
||||
|
||||
struct invalid_utf8_regression_test_case {
|
||||
uint32_t compile_options;
|
||||
int jit_compile_options;
|
||||
int start_offset;
|
||||
int skip_left;
|
||||
int skip_right;
|
||||
int match_start;
|
||||
int match_end;
|
||||
const char *pattern[2];
|
||||
const char *input;
|
||||
};
|
||||
|
||||
static const char invalid_utf8_newline_cr;
|
||||
|
||||
static const struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cases[] = {
|
||||
{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf0\x90\x80\x80" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf4\x90\x80\x80" },
|
||||
{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\x7f" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\xc0" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x8f\xbf\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf#" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80#" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80" },
|
||||
{ UDA, CI, 0, 0, 2, -1, -1, { ".", NULL }, "\xef\xbf\xbf#" },
|
||||
{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xef\xbf\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\x7f#" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\xc0" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf#" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xed\x9f\xbf#" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xa0\x80#" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xee\x80\x80#" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xbf\xbf#" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf##" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf#" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80##" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80#" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80##" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0##" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf##" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80###" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8###" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 1, { ".", NULL }, "\x7f" },
|
||||
|
||||
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80\xf4\xa0\x80\x80" },
|
||||
{ UDA, CPI, 4, 1, 1, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf" },
|
||||
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xef\xbf\xbf#" },
|
||||
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xe0\xa0\x80#" },
|
||||
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf0\x90\x80\x80#" },
|
||||
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf\xf0\x8f\xbf\xbf" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80\xf5\x80\x80\x80" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80\xf4\x90\x80\x80" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff\xf4\x8f\xbf\xff" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf\xf4\x8f\xff\xbf" },
|
||||
{ UDA, CPI, 4, 0, 1, -1, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80\xef\x80\x80" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80\x80\x80\x80\x80" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf\xe0\x9f\xbf#" },
|
||||
{ UDA, CPI, 4, 2, 2, -1, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80\xe0\xa0\x80#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xf0\x80\x80\xf0\x80\x80#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xed\xa0\x80\xed\xa0\x80#" },
|
||||
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xdf\xbf#" },
|
||||
{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xdf\xbf#" },
|
||||
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xc2\x80#" },
|
||||
{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xc2\x80#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xc1\xbf\xc1\xbf##" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xdf\xc0\xdf\xc0##" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80\xe0\x80##" },
|
||||
|
||||
{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xef\xbf\xbf#" },
|
||||
{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xe0\xa0\x80#" },
|
||||
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf\xe0\x9f\xbf" },
|
||||
{ UDA, CPI, 3, 1, 1, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xbf\xef\xbf\xbf" },
|
||||
{ UDA, CPI, 3, 0, 1, -1, -1, { "\\B", "\\b" }, "\xdf\x80\x80\xdf\x80" },
|
||||
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xff\xef\xbf\xff" },
|
||||
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xff\xbf\xef\xff\xbf" },
|
||||
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xed\xbf\xbf\xed\xbf\xbf" },
|
||||
|
||||
{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xdf\xbf#" },
|
||||
{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xc2\x80#" },
|
||||
{ UDA, CPI, 2, 1, 1, -1, -1, { "\\B", "\\b" }, "\xdf\xbf\xdf\xbf" },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xc1\xbf\xc1\xbf" },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x80\xe0\x80" },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xff\xdf\xff" },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xff\xbf\xff\xbf" },
|
||||
|
||||
{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x7f#" },
|
||||
{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x01#" },
|
||||
{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80" },
|
||||
{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\xb0\xb0" },
|
||||
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "a\xff" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "\xc2\x80\x80" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 6, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 8, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
|
||||
|
||||
{ UDA, CPI, 0, 0, 0, 0, 1, { "\\X", NULL }, "A" },
|
||||
{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xff" },
|
||||
{ UDA, CPI, 0, 0, 0, 0, 2, { "\\X", NULL }, "\xc3\xa1" },
|
||||
{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xc3\xa1" },
|
||||
{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xc3\x7f" },
|
||||
{ UDA, CPI, 0, 0, 0, 0, 3, { "\\X", NULL }, "\xe1\xbd\xb8" },
|
||||
{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xe1\xbd\xb8" },
|
||||
{ UDA, CPI, 0, 0, 0, 0, 4, { "\\X", NULL }, "\xf0\x90\x90\x80" },
|
||||
{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xf0\x90\x90\x80" },
|
||||
|
||||
{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "#" },
|
||||
{ UDA, CPI, 0, 0, 0, 0, 4, { "[^#]", NULL }, "\xf4\x8f\xbf\xbf" },
|
||||
{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xf4\x90\x80\x80" },
|
||||
{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xc1\x80" },
|
||||
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { "^\\W", NULL }, " \x0a#"},
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 14, 15, { "^\\W", NULL }, " \xc0\x8a#\xe0\x80\x8a#\xf0\x80\x80\x8a#\x0a#"},
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf8\x0a#"},
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xc3\x0a#"},
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf1\x0a#"},
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xf2\xbf\x0a#"},
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \xf2\xbf\xbf\x0a#"},
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xef\x0a#"},
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xef\xbf\x0a#"},
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \x85#\xc2\x85#"},
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 7, 8, { "^\\W", NULL }, " \xe2\x80\xf8\xe2\x80\xa8#"},
|
||||
|
||||
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xe2\x80\xf8\xe2\x80\xa8#"},
|
||||
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 3, 4, { "#", NULL }, "\xe2\x80\xf8#\xe2\x80\xa8#"},
|
||||
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "abcd\xc2\x85#"},
|
||||
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 1, 2, { "#", NULL }, "\x85#\xc2\x85#"},
|
||||
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 5, 6, { "#", NULL }, "\xef,\x80,\xf8#\x0a"},
|
||||
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xef,\x80,\xf8\x0a#"},
|
||||
|
||||
{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
|
||||
{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
|
||||
{ PCRE2_UTF, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
|
||||
{ PCRE2_UTF, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
|
||||
|
||||
{ PCRE2_UTF | PCRE2_UCP, CI, 0, 0, 0, -1, -1, { "[\\s]", NULL }, "\xed\xa0\x80" },
|
||||
{ PCRE2_UTF, CI, 0, 0, 0, 0, 3, { "[\\D]", NULL }, "\xe0\xab\xaa@" },
|
||||
{ PCRE2_UTF, CI, 0, 0, 0, 0, 3, { "\\D+", NULL }, "n\xc3\xb1" },
|
||||
{ PCRE2_UTF, CI, 0, 0, 0, 0, 5, { "\\W+", NULL }, "@\xf0\x9d\x84\x9e" },
|
||||
|
||||
/* These two are not invalid UTF tests, but this infrastructure fits better for them. */
|
||||
{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\X{2}", NULL }, "\r\n\n" },
|
||||
{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\R{2}", NULL }, "\r\n\n" },
|
||||
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 0, 0, 0, -1, -1, { "^.a", &invalid_utf8_newline_cr }, "\xc3\xa7#a" },
|
||||
|
||||
{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
|
||||
};
|
||||
|
||||
#undef UDA
|
||||
#undef CI
|
||||
#undef CPI
|
||||
|
||||
static int run_invalid_utf8_test(const struct invalid_utf8_regression_test_case *current,
|
||||
int pattern_index, int i, pcre2_compile_context_8 *ccontext, pcre2_match_data_8 *mdata)
|
||||
{
|
||||
pcre2_code_8 *code;
|
||||
int result, errorcode;
|
||||
PCRE2_SIZE length, erroroffset;
|
||||
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_8(mdata);
|
||||
|
||||
if (current->pattern[i] == NULL)
|
||||
return 1;
|
||||
|
||||
code = pcre2_compile_8((PCRE2_UCHAR8*)current->pattern[i], PCRE2_ZERO_TERMINATED,
|
||||
current->compile_options, &errorcode, &erroroffset, ccontext);
|
||||
|
||||
if (!code) {
|
||||
printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (pcre2_jit_compile_8(code, current->jit_compile_options) != 0) {
|
||||
printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
|
||||
pcre2_code_free_8(code);
|
||||
return 0;
|
||||
}
|
||||
|
||||
length = (PCRE2_SIZE)(strlen(current->input) - current->skip_left - current->skip_right);
|
||||
|
||||
if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
|
||||
result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
|
||||
length, current->start_offset - current->skip_left, 0, mdata, NULL);
|
||||
|
||||
if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
|
||||
pcre2_code_free_8(code);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
|
||||
result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
|
||||
length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
|
||||
|
||||
if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
|
||||
pcre2_code_free_8(code);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
pcre2_code_free_8(code);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int invalid_utf8_regression_tests(void)
|
||||
{
|
||||
const struct invalid_utf8_regression_test_case *current;
|
||||
pcre2_compile_context_8 *ccontext;
|
||||
pcre2_match_data_8 *mdata;
|
||||
int total = 0, successful = 0;
|
||||
int result;
|
||||
|
||||
printf("\nRunning invalid-utf8 JIT regression tests\n");
|
||||
|
||||
ccontext = pcre2_compile_context_create_8(NULL);
|
||||
pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
|
||||
mdata = pcre2_match_data_create_8(4, NULL);
|
||||
|
||||
for (current = invalid_utf8_regression_test_cases; current->pattern[0]; current++) {
|
||||
/* printf("\nPattern: %s :\n", current->pattern); */
|
||||
total++;
|
||||
|
||||
result = 1;
|
||||
if (current->pattern[1] != &invalid_utf8_newline_cr)
|
||||
{
|
||||
if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
|
||||
result = 0;
|
||||
if (!run_invalid_utf8_test(current, total - 1, 1, ccontext, mdata))
|
||||
result = 0;
|
||||
} else {
|
||||
pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_CR);
|
||||
if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
|
||||
result = 0;
|
||||
pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
|
||||
}
|
||||
|
||||
if (result) {
|
||||
successful++;
|
||||
}
|
||||
|
||||
printf(".");
|
||||
if ((total % 60) == 0)
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
if ((total % 60) != 0)
|
||||
printf("\n");
|
||||
|
||||
pcre2_match_data_free_8(mdata);
|
||||
pcre2_compile_context_free_8(ccontext);
|
||||
|
||||
if (total == successful) {
|
||||
printf("\nAll invalid UTF8 JIT regression tests are successfully passed.\n");
|
||||
return 0;
|
||||
} else {
|
||||
printf("\nInvalid UTF8 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_8 */
|
||||
|
||||
static int invalid_utf8_regression_tests(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_8 */
|
||||
|
||||
#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_16
|
||||
|
||||
#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
|
||||
#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
|
||||
#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
|
||||
|
||||
struct invalid_utf16_regression_test_case {
|
||||
uint32_t compile_options;
|
||||
int jit_compile_options;
|
||||
int start_offset;
|
||||
int skip_left;
|
||||
int skip_right;
|
||||
int match_start;
|
||||
int match_end;
|
||||
const PCRE2_UCHAR16 *pattern[2];
|
||||
const PCRE2_UCHAR16 *input;
|
||||
};
|
||||
|
||||
static PCRE2_UCHAR16 allany16[] = { '.', 0 };
|
||||
static PCRE2_UCHAR16 non_word_boundary16[] = { '\\', 'B', 0 };
|
||||
static PCRE2_UCHAR16 word_boundary16[] = { '\\', 'b', 0 };
|
||||
static PCRE2_UCHAR16 backreference16[] = { '(', '.', ')', '\\', '1', 0 };
|
||||
static PCRE2_UCHAR16 grapheme16[] = { '\\', 'X', 0 };
|
||||
static PCRE2_UCHAR16 nothashmark16[] = { '[', '^', '#', ']', 0 };
|
||||
static PCRE2_UCHAR16 afternl16[] = { '^', '\\', 'W', 0 };
|
||||
static PCRE2_UCHAR16 generic16[] = { '#', 0xd800, 0xdc00, '#', 0 };
|
||||
static PCRE2_UCHAR16 test16_1[] = { 0xd7ff, 0xe000, 0xffff, 0x01, '#', 0 };
|
||||
static PCRE2_UCHAR16 test16_2[] = { 0xd800, 0xdc00, 0xd800, 0xdc00, 0 };
|
||||
static PCRE2_UCHAR16 test16_3[] = { 0xdbff, 0xdfff, 0xdbff, 0xdfff, 0 };
|
||||
static PCRE2_UCHAR16 test16_4[] = { 0xd800, 0xdbff, 0xd800, 0xdbff, 0 };
|
||||
static PCRE2_UCHAR16 test16_5[] = { '#', 0xd800, 0xdc00, '#', 0 };
|
||||
static PCRE2_UCHAR16 test16_6[] = { 'a', 'A', 0xdc28, 0 };
|
||||
static PCRE2_UCHAR16 test16_7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 };
|
||||
static PCRE2_UCHAR16 test16_8[] = { '#', 0xd800, 0xdc00, 0 };
|
||||
static PCRE2_UCHAR16 test16_9[] = { ' ', 0x2028, '#', 0 };
|
||||
static PCRE2_UCHAR16 test16_10[] = { ' ', 0xdc00, 0xd800, 0x2028, '#', 0 };
|
||||
static PCRE2_UCHAR16 test16_11[] = { 0xdc00, 0xdc00, 0xd800, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
|
||||
static PCRE2_UCHAR16 test16_12[] = { '#', 0xd800, 0xdc00, 0xd800, '#', 0xd800, 0xdc00, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
|
||||
|
||||
static const struct invalid_utf16_regression_test_case invalid_utf16_regression_test_cases[] = {
|
||||
{ UDA, CI, 0, 0, 0, 0, 1, { allany16, NULL }, test16_1 },
|
||||
{ UDA, CI, 1, 0, 0, 1, 2, { allany16, NULL }, test16_1 },
|
||||
{ UDA, CI, 2, 0, 0, 2, 3, { allany16, NULL }, test16_1 },
|
||||
{ UDA, CI, 3, 0, 0, 3, 4, { allany16, NULL }, test16_1 },
|
||||
{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_2 },
|
||||
{ UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_2 },
|
||||
{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_2 },
|
||||
{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_3 },
|
||||
{ UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_3 },
|
||||
{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_3 },
|
||||
|
||||
{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary16, NULL }, test16_1 },
|
||||
{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_1 },
|
||||
{ UDA, CPI, 3, 0, 0, 3, 3, { non_word_boundary16, NULL }, test16_1 },
|
||||
{ UDA, CPI, 4, 0, 0, 4, 4, { non_word_boundary16, NULL }, test16_1 },
|
||||
{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_2 },
|
||||
{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_3 },
|
||||
{ UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_2 },
|
||||
{ UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_3 },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_4 },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_5 },
|
||||
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference16, NULL }, test16_6 },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference16, NULL }, test16_6 },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { backreference16, NULL }, test16_7 },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { backreference16, NULL }, test16_7 },
|
||||
|
||||
{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme16, NULL }, test16_6 },
|
||||
{ UDA, CPI, 1, 0, 0, 1, 2, { grapheme16, NULL }, test16_6 },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme16, NULL }, test16_6 },
|
||||
{ UDA, CPI, 0, 0, 0, 0, 2, { grapheme16, NULL }, test16_7 },
|
||||
{ UDA, CPI, 2, 0, 0, 2, 4, { grapheme16, NULL }, test16_7 },
|
||||
{ UDA, CPI, 1, 0, 0, -1, -1, { grapheme16, NULL }, test16_7 },
|
||||
|
||||
{ UDA, CPI, 0, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
|
||||
{ UDA, CPI, 1, 0, 0, 1, 3, { nothashmark16, NULL }, test16_8 },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
|
||||
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl16, NULL }, test16_9 },
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { afternl16, NULL }, test16_10 },
|
||||
|
||||
{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
|
||||
{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
|
||||
{ PCRE2_UTF, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
|
||||
{ PCRE2_UTF, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
|
||||
|
||||
{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
|
||||
};
|
||||
|
||||
#undef UDA
|
||||
#undef CI
|
||||
#undef CPI
|
||||
|
||||
static int run_invalid_utf16_test(const struct invalid_utf16_regression_test_case *current,
|
||||
int pattern_index, int i, pcre2_compile_context_16 *ccontext, pcre2_match_data_16 *mdata)
|
||||
{
|
||||
pcre2_code_16 *code;
|
||||
int result, errorcode;
|
||||
PCRE2_SIZE length, erroroffset;
|
||||
const PCRE2_UCHAR16 *input;
|
||||
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(mdata);
|
||||
|
||||
if (current->pattern[i] == NULL)
|
||||
return 1;
|
||||
|
||||
code = pcre2_compile_16(current->pattern[i], PCRE2_ZERO_TERMINATED,
|
||||
current->compile_options, &errorcode, &erroroffset, ccontext);
|
||||
|
||||
if (!code) {
|
||||
printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (pcre2_jit_compile_16(code, current->jit_compile_options) != 0) {
|
||||
printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
|
||||
pcre2_code_free_16(code);
|
||||
return 0;
|
||||
}
|
||||
|
||||
input = current->input;
|
||||
length = 0;
|
||||
|
||||
while (*input++ != 0)
|
||||
length++;
|
||||
|
||||
length -= current->skip_left + current->skip_right;
|
||||
|
||||
if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
|
||||
result = pcre2_jit_match_16(code, (current->input + current->skip_left),
|
||||
length, current->start_offset - current->skip_left, 0, mdata, NULL);
|
||||
|
||||
if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
|
||||
pcre2_code_free_16(code);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
|
||||
result = pcre2_jit_match_16(code, (current->input + current->skip_left),
|
||||
length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
|
||||
|
||||
if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
|
||||
pcre2_code_free_16(code);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
pcre2_code_free_16(code);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int invalid_utf16_regression_tests(void)
|
||||
{
|
||||
const struct invalid_utf16_regression_test_case *current;
|
||||
pcre2_compile_context_16 *ccontext;
|
||||
pcre2_match_data_16 *mdata;
|
||||
int total = 0, successful = 0;
|
||||
int result;
|
||||
|
||||
printf("\nRunning invalid-utf16 JIT regression tests\n");
|
||||
|
||||
ccontext = pcre2_compile_context_create_16(NULL);
|
||||
pcre2_set_newline_16(ccontext, PCRE2_NEWLINE_ANY);
|
||||
mdata = pcre2_match_data_create_16(4, NULL);
|
||||
|
||||
for (current = invalid_utf16_regression_test_cases; current->pattern[0]; current++) {
|
||||
/* printf("\nPattern: %s :\n", current->pattern); */
|
||||
total++;
|
||||
|
||||
result = 1;
|
||||
if (!run_invalid_utf16_test(current, total - 1, 0, ccontext, mdata))
|
||||
result = 0;
|
||||
if (!run_invalid_utf16_test(current, total - 1, 1, ccontext, mdata))
|
||||
result = 0;
|
||||
|
||||
if (result) {
|
||||
successful++;
|
||||
}
|
||||
|
||||
printf(".");
|
||||
if ((total % 60) == 0)
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
if ((total % 60) != 0)
|
||||
printf("\n");
|
||||
|
||||
pcre2_match_data_free_16(mdata);
|
||||
pcre2_compile_context_free_16(ccontext);
|
||||
|
||||
if (total == successful) {
|
||||
printf("\nAll invalid UTF16 JIT regression tests are successfully passed.\n");
|
||||
return 0;
|
||||
} else {
|
||||
printf("\nInvalid UTF16 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_16 */
|
||||
|
||||
static int invalid_utf16_regression_tests(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_16 */
|
||||
|
||||
#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_32
|
||||
|
||||
#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
|
||||
#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
|
||||
#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
|
||||
|
||||
struct invalid_utf32_regression_test_case {
|
||||
uint32_t compile_options;
|
||||
int jit_compile_options;
|
||||
int start_offset;
|
||||
int skip_left;
|
||||
int skip_right;
|
||||
int match_start;
|
||||
int match_end;
|
||||
const PCRE2_UCHAR32 *pattern[2];
|
||||
const PCRE2_UCHAR32 *input;
|
||||
};
|
||||
|
||||
static PCRE2_UCHAR32 allany32[] = { '.', 0 };
|
||||
static PCRE2_UCHAR32 non_word_boundary32[] = { '\\', 'B', 0 };
|
||||
static PCRE2_UCHAR32 word_boundary32[] = { '\\', 'b', 0 };
|
||||
static PCRE2_UCHAR32 backreference32[] = { '(', '.', ')', '\\', '1', 0 };
|
||||
static PCRE2_UCHAR32 grapheme32[] = { '\\', 'X', 0 };
|
||||
static PCRE2_UCHAR32 nothashmark32[] = { '[', '^', '#', ']', 0 };
|
||||
static PCRE2_UCHAR32 afternl32[] = { '^', '\\', 'W', 0 };
|
||||
static PCRE2_UCHAR32 test32_1[] = { 0x10ffff, 0x10ffff, 0x110000, 0x110000, 0x10ffff, 0 };
|
||||
static PCRE2_UCHAR32 test32_2[] = { 0xd7ff, 0xe000, 0xd800, 0xdfff, 0xe000, 0xdfff, 0xd800, 0 };
|
||||
static PCRE2_UCHAR32 test32_3[] = { 'a', 'A', 0x110000, 0 };
|
||||
static PCRE2_UCHAR32 test32_4[] = { '#', 0x10ffff, 0x110000, 0 };
|
||||
static PCRE2_UCHAR32 test32_5[] = { ' ', 0x2028, '#', 0 };
|
||||
static PCRE2_UCHAR32 test32_6[] = { ' ', 0x110000, 0x2028, '#', 0 };
|
||||
|
||||
static const struct invalid_utf32_regression_test_case invalid_utf32_regression_test_cases[] = {
|
||||
{ UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_1 },
|
||||
{ UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_1 },
|
||||
{ UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_2 },
|
||||
{ UDA, CI, 1, 0, 0, 1, 2, { allany32, NULL }, test32_2 },
|
||||
{ UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
|
||||
{ UDA, CI, 3, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
|
||||
|
||||
{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_1 },
|
||||
{ UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 },
|
||||
{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_2 },
|
||||
{ UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
|
||||
{ UDA, CPI, 6, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
|
||||
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference32, NULL }, test32_3 },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference32, NULL }, test32_3 },
|
||||
|
||||
{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme32, NULL }, test32_1 },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_1 },
|
||||
{ UDA, CPI, 1, 0, 0, 1, 2, { grapheme32, NULL }, test32_2 },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
|
||||
{ UDA, CPI, 3, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
|
||||
{ UDA, CPI, 4, 0, 0, 4, 5, { grapheme32, NULL }, test32_2 },
|
||||
|
||||
{ UDA, CPI, 0, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
|
||||
{ UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_4 },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
|
||||
{ UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_2 },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_2 },
|
||||
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl32, NULL }, test32_5 },
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { afternl32, NULL }, test32_6 },
|
||||
|
||||
{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
|
||||
};
|
||||
|
||||
#undef UDA
|
||||
#undef CI
|
||||
#undef CPI
|
||||
|
||||
static int run_invalid_utf32_test(const struct invalid_utf32_regression_test_case *current,
|
||||
int pattern_index, int i, pcre2_compile_context_32 *ccontext, pcre2_match_data_32 *mdata)
|
||||
{
|
||||
pcre2_code_32 *code;
|
||||
int result, errorcode;
|
||||
PCRE2_SIZE length, erroroffset;
|
||||
const PCRE2_UCHAR32 *input;
|
||||
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_32(mdata);
|
||||
|
||||
if (current->pattern[i] == NULL)
|
||||
return 1;
|
||||
|
||||
code = pcre2_compile_32(current->pattern[i], PCRE2_ZERO_TERMINATED,
|
||||
current->compile_options, &errorcode, &erroroffset, ccontext);
|
||||
|
||||
if (!code) {
|
||||
printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (pcre2_jit_compile_32(code, current->jit_compile_options) != 0) {
|
||||
printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
|
||||
pcre2_code_free_32(code);
|
||||
return 0;
|
||||
}
|
||||
|
||||
input = current->input;
|
||||
length = 0;
|
||||
|
||||
while (*input++ != 0)
|
||||
length++;
|
||||
|
||||
length -= current->skip_left + current->skip_right;
|
||||
|
||||
if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
|
||||
result = pcre2_jit_match_32(code, (current->input + current->skip_left),
|
||||
length, current->start_offset - current->skip_left, 0, mdata, NULL);
|
||||
|
||||
if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
|
||||
pcre2_code_free_32(code);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
|
||||
result = pcre2_jit_match_32(code, (current->input + current->skip_left),
|
||||
length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
|
||||
|
||||
if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
|
||||
pcre2_code_free_32(code);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
pcre2_code_free_32(code);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int invalid_utf32_regression_tests(void)
|
||||
{
|
||||
const struct invalid_utf32_regression_test_case *current;
|
||||
pcre2_compile_context_32 *ccontext;
|
||||
pcre2_match_data_32 *mdata;
|
||||
int total = 0, successful = 0;
|
||||
int result;
|
||||
|
||||
printf("\nRunning invalid-utf32 JIT regression tests\n");
|
||||
|
||||
ccontext = pcre2_compile_context_create_32(NULL);
|
||||
pcre2_set_newline_32(ccontext, PCRE2_NEWLINE_ANY);
|
||||
mdata = pcre2_match_data_create_32(4, NULL);
|
||||
|
||||
for (current = invalid_utf32_regression_test_cases; current->pattern[0]; current++) {
|
||||
/* printf("\nPattern: %s :\n", current->pattern); */
|
||||
total++;
|
||||
|
||||
result = 1;
|
||||
if (!run_invalid_utf32_test(current, total - 1, 0, ccontext, mdata))
|
||||
result = 0;
|
||||
if (!run_invalid_utf32_test(current, total - 1, 1, ccontext, mdata))
|
||||
result = 0;
|
||||
|
||||
if (result) {
|
||||
successful++;
|
||||
}
|
||||
|
||||
printf(".");
|
||||
if ((total % 60) == 0)
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
if ((total % 60) != 0)
|
||||
printf("\n");
|
||||
|
||||
pcre2_match_data_free_32(mdata);
|
||||
pcre2_compile_context_free_32(ccontext);
|
||||
|
||||
if (total == successful) {
|
||||
printf("\nAll invalid UTF32 JIT regression tests are successfully passed.\n");
|
||||
return 0;
|
||||
} else {
|
||||
printf("\nInvalid UTF32 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_32 */
|
||||
|
||||
static int invalid_utf32_regression_tests(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_32 */
|
||||
|
||||
/* End of pcre2_jit_test.c */
|
||||
@@ -1,165 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre2_maketables(), which builds
|
||||
character tables for PCRE2 in the current locale. The file is compiled on its
|
||||
own as part of the PCRE2 library. It is also included in the compilation of
|
||||
pcre2_dftables.c as a freestanding program, in which case the macro
|
||||
PCRE2_DFTABLES is defined. */
|
||||
|
||||
#ifndef PCRE2_DFTABLES /* Compiling the library */
|
||||
# ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
# endif
|
||||
# include "pcre2_internal.h"
|
||||
#endif
|
||||
|
||||
/*************************************************
|
||||
* Create PCRE2 character tables *
|
||||
*************************************************/
|
||||
|
||||
/* This function builds a set of character tables for use by PCRE2 and returns
|
||||
a pointer to them. They are build using the ctype functions, and consequently
|
||||
their contents will depend upon the current locale setting. When compiled as
|
||||
part of the library, the store is obtained via a general context malloc, if
|
||||
supplied, but when PCRE2_DFTABLES is defined (when compiling the pcre2_dftables
|
||||
freestanding auxiliary program) malloc() is used, and the function has a
|
||||
different name so as not to clash with the prototype in pcre2.h.
|
||||
|
||||
Arguments: none when PCRE2_DFTABLES is defined
|
||||
else a PCRE2 general context or NULL
|
||||
Returns: pointer to the contiguous block of data
|
||||
else NULL if memory allocation failed
|
||||
*/
|
||||
|
||||
#ifdef PCRE2_DFTABLES /* Included in freestanding pcre2_dftables program */
|
||||
static const uint8_t *maketables(void)
|
||||
{
|
||||
uint8_t *yield = (uint8_t *)malloc(TABLES_LENGTH);
|
||||
|
||||
#else /* Not PCRE2_DFTABLES, that is, compiling the library */
|
||||
PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION
|
||||
pcre2_maketables(pcre2_general_context *gcontext)
|
||||
{
|
||||
uint8_t *yield = (uint8_t *)((gcontext != NULL)?
|
||||
gcontext->memctl.malloc(TABLES_LENGTH, gcontext->memctl.memory_data) :
|
||||
malloc(TABLES_LENGTH));
|
||||
#endif /* PCRE2_DFTABLES */
|
||||
|
||||
int i;
|
||||
uint8_t *p;
|
||||
|
||||
if (yield == NULL) return NULL;
|
||||
p = yield;
|
||||
|
||||
/* First comes the lower casing table */
|
||||
|
||||
for (i = 0; i < 256; i++) *p++ = tolower(i);
|
||||
|
||||
/* Next the case-flipping table */
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
int c = islower(i)? toupper(i) : tolower(i);
|
||||
*p++ = (c < 256)? c : i;
|
||||
}
|
||||
|
||||
/* Then the character class tables. Don't try to be clever and save effort on
|
||||
exclusive ones - in some locales things may be different.
|
||||
|
||||
Note that the table for "space" includes everything "isspace" gives, including
|
||||
VT in the default locale. This makes it work for the POSIX class [:space:].
|
||||
From PCRE1 release 8.34 and for all PCRE2 releases it is also correct for Perl
|
||||
space, because Perl added VT at release 5.18.
|
||||
|
||||
Note also that it is possible for a character to be alnum or alpha without
|
||||
being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
|
||||
fr_FR locale (at least under Debian Linux's locales as of 12/2005). So we must
|
||||
test for alnum specially. */
|
||||
|
||||
memset(p, 0, cbit_length);
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if (isdigit(i)) p[cbit_digit + i/8] |= 1u << (i&7);
|
||||
if (isupper(i)) p[cbit_upper + i/8] |= 1u << (i&7);
|
||||
if (islower(i)) p[cbit_lower + i/8] |= 1u << (i&7);
|
||||
if (isalnum(i)) p[cbit_word + i/8] |= 1u << (i&7);
|
||||
if (i == '_') p[cbit_word + i/8] |= 1u << (i&7);
|
||||
if (isspace(i)) p[cbit_space + i/8] |= 1u << (i&7);
|
||||
if (isxdigit(i)) p[cbit_xdigit + i/8] |= 1u << (i&7);
|
||||
if (isgraph(i)) p[cbit_graph + i/8] |= 1u << (i&7);
|
||||
if (isprint(i)) p[cbit_print + i/8] |= 1u << (i&7);
|
||||
if (ispunct(i)) p[cbit_punct + i/8] |= 1u << (i&7);
|
||||
if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1u << (i&7);
|
||||
}
|
||||
p += cbit_length;
|
||||
|
||||
/* Finally, the character type table. In this, we used to exclude VT from the
|
||||
white space chars, because Perl didn't recognize it as such for \s and for
|
||||
comments within regexes. However, Perl changed at release 5.18, so PCRE1
|
||||
changed at release 8.34 and it's always been this way for PCRE2. */
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
int x = 0;
|
||||
if (isspace(i)) x += ctype_space;
|
||||
if (isalpha(i)) x += ctype_letter;
|
||||
if (islower(i)) x += ctype_lcletter;
|
||||
if (isdigit(i)) x += ctype_digit;
|
||||
if (isalnum(i) || i == '_') x += ctype_word;
|
||||
*p++ = x;
|
||||
}
|
||||
|
||||
return yield;
|
||||
}
|
||||
|
||||
#ifndef PCRE2_DFTABLES /* Compiling the library */
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables)
|
||||
{
|
||||
if (gcontext != NULL)
|
||||
gcontext->memctl.free((void *)tables, gcontext->memctl.memory_data);
|
||||
else
|
||||
free((void *)tables);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* End of pcre2_maketables.c */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,187 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create a match data block given ovector size *
|
||||
*************************************************/
|
||||
|
||||
/* A minimum of 1 is imposed on the number of ovector pairs. A maximum is also
|
||||
imposed because the oveccount field in a match data block is uintt6_t. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_match_data *yield;
|
||||
if (oveccount < 1) oveccount = 1;
|
||||
if (oveccount > UINT16_MAX) oveccount = UINT16_MAX;
|
||||
yield = PRIV(memctl_malloc)(
|
||||
offsetof(pcre2_match_data, ovector) + 2*oveccount*sizeof(PCRE2_SIZE),
|
||||
(pcre2_memctl *)gcontext);
|
||||
if (yield == NULL) return NULL;
|
||||
yield->oveccount = oveccount;
|
||||
yield->flags = 0;
|
||||
yield->heapframes = NULL;
|
||||
yield->heapframes_size = 0;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create a match data block using pattern data *
|
||||
*************************************************/
|
||||
|
||||
/* If no context is supplied, use the memory allocator from the code. This code
|
||||
assumes that a general context contains nothing other than a memory allocator.
|
||||
If that ever changes, this code will need fixing. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_data_create_from_pattern(const pcre2_code *code,
|
||||
pcre2_general_context *gcontext)
|
||||
{
|
||||
if (gcontext == NULL) gcontext = (pcre2_general_context *)code;
|
||||
return pcre2_match_data_create(((const pcre2_real_code *)code)->top_bracket + 1,
|
||||
gcontext);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free a match data block *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_match_data_free(pcre2_match_data *match_data)
|
||||
{
|
||||
if (match_data != NULL)
|
||||
{
|
||||
if (match_data->heapframes != NULL)
|
||||
match_data->memctl.free(match_data->heapframes,
|
||||
match_data->memctl.memory_data);
|
||||
if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
|
||||
match_data->memctl.free((void *)match_data->subject,
|
||||
match_data->memctl.memory_data);
|
||||
match_data->memctl.free(match_data, match_data->memctl.memory_data);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get last mark in match *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_SPTR PCRE2_CALL_CONVENTION
|
||||
pcre2_get_mark(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->mark;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get pointer to ovector *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_SIZE * PCRE2_CALL_CONVENTION
|
||||
pcre2_get_ovector_pointer(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->ovector;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get number of ovector slots *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN uint32_t PCRE2_CALL_CONVENTION
|
||||
pcre2_get_ovector_count(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->oveccount;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get starting code unit in match *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
|
||||
pcre2_get_startchar(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->startchar;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get size of match data block *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
|
||||
pcre2_get_match_data_size(pcre2_match_data *match_data)
|
||||
{
|
||||
return offsetof(pcre2_match_data, ovector) +
|
||||
2 * (match_data->oveccount) * sizeof(PCRE2_SIZE);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get heapframes size *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
|
||||
pcre2_get_match_data_heapframes_size(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->heapframes_size;
|
||||
}
|
||||
|
||||
/* End of pcre2_match_data.c */
|
||||
@@ -1,243 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains internal functions for testing newlines when more than
|
||||
one kind of newline is to be recognized. When a newline is found, its length is
|
||||
returned. In principle, we could implement several newline "types", each
|
||||
referring to a different set of newline characters. At present, PCRE2 supports
|
||||
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
|
||||
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
|
||||
http://unicode.org/unicode/reports/tr18/. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check for newline at given position *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called only via the IS_NEWLINE macro, which does so only
|
||||
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
|
||||
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit
|
||||
pointed to by ptr is less than the end of the string.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
type the newline type
|
||||
endptr pointer to the end of the string
|
||||
lenptr where to return the length
|
||||
utf TRUE if in utf mode
|
||||
|
||||
Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
|
||||
uint32_t *lenptr, BOOL utf)
|
||||
{
|
||||
uint32_t c;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) { GETCHAR(c, ptr); } else c = *ptr;
|
||||
#else
|
||||
(void)utf;
|
||||
c = *ptr;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case CHAR_LF:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
case CHAR_CR:
|
||||
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* NLTYPE_ANY */
|
||||
|
||||
else switch(c)
|
||||
{
|
||||
#ifdef EBCDIC
|
||||
case CHAR_NEL:
|
||||
#endif
|
||||
case CHAR_LF:
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
case CHAR_CR:
|
||||
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
#ifndef EBCDIC
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
case CHAR_NEL:
|
||||
*lenptr = utf? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 3;
|
||||
return TRUE;
|
||||
|
||||
#else /* 16-bit or 32-bit code units */
|
||||
case CHAR_NEL:
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
#endif
|
||||
#endif /* Not EBCDIC */
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check for newline at previous position *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called only via the WAS_NEWLINE macro, which does so only
|
||||
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
|
||||
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the initial
|
||||
value of ptr is greater than the start of the string that is being processed.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
type the newline type
|
||||
startptr pointer to the start of the string
|
||||
lenptr where to return the length
|
||||
utf TRUE if in utf mode
|
||||
|
||||
Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
|
||||
uint32_t *lenptr, BOOL utf)
|
||||
{
|
||||
uint32_t c;
|
||||
ptr--;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
BACKCHAR(ptr);
|
||||
GETCHAR(c, ptr);
|
||||
}
|
||||
else c = *ptr;
|
||||
#else
|
||||
(void)utf;
|
||||
c = *ptr;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case CHAR_LF:
|
||||
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
case CHAR_CR:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* NLTYPE_ANY */
|
||||
|
||||
else switch(c)
|
||||
{
|
||||
case CHAR_LF:
|
||||
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
#ifdef EBCDIC
|
||||
case CHAR_NEL:
|
||||
#endif
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
case CHAR_CR:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
#ifndef EBCDIC
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
case CHAR_NEL:
|
||||
*lenptr = utf? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 3;
|
||||
return TRUE;
|
||||
|
||||
#else /* 16-bit or 32-bit code units */
|
||||
case CHAR_NEL:
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
#endif
|
||||
#endif /* Not EBCDIC */
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_newline.c */
|
||||
@@ -1,120 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This file contains a function that converts a Unicode character code point
|
||||
into a UTF string. The behaviour is different for each code unit width. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/* If SUPPORT_UNICODE is not defined, this function will never be called.
|
||||
Supply a dummy function because some compilers do not like empty source
|
||||
modules. */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
unsigned int
|
||||
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
|
||||
{
|
||||
(void)(cvalue);
|
||||
(void)(buffer);
|
||||
return 0;
|
||||
}
|
||||
#else /* SUPPORT_UNICODE */
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Convert code point to UTF *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
cvalue the character value
|
||||
buffer pointer to buffer for result
|
||||
|
||||
Returns: number of code units placed in the buffer
|
||||
*/
|
||||
|
||||
unsigned int
|
||||
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
|
||||
{
|
||||
/* Convert to UTF-8 */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
int i, j;
|
||||
for (i = 0; i < PRIV(utf8_table1_size); i++)
|
||||
if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
|
||||
buffer += i;
|
||||
for (j = i; j > 0; j--)
|
||||
{
|
||||
*buffer-- = 0x80 | (cvalue & 0x3f);
|
||||
cvalue >>= 6;
|
||||
}
|
||||
*buffer = PRIV(utf8_table2)[i] | cvalue;
|
||||
return i + 1;
|
||||
|
||||
/* Convert to UTF-16 */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
if (cvalue <= 0xffff)
|
||||
{
|
||||
*buffer = (PCRE2_UCHAR)cvalue;
|
||||
return 1;
|
||||
}
|
||||
cvalue -= 0x10000;
|
||||
*buffer++ = 0xd800 | (cvalue >> 10);
|
||||
*buffer = 0xdc00 | (cvalue & 0x3ff);
|
||||
return 2;
|
||||
|
||||
/* Convert to UTF-32 */
|
||||
|
||||
#else
|
||||
*buffer = (PCRE2_UCHAR)cvalue;
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* End of pcre2_ord2utf.c */
|
||||
@@ -1,434 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return info about compiled pattern *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
code points to compiled code
|
||||
what what information is required
|
||||
where where to put the information; if NULL, return length
|
||||
|
||||
Returns: 0 when data returned
|
||||
> 0 when length requested
|
||||
< 0 on error or unset value
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
|
||||
{
|
||||
const pcre2_real_code *re = (const pcre2_real_code *)code;
|
||||
|
||||
if (where == NULL) /* Requests field length */
|
||||
{
|
||||
switch(what)
|
||||
{
|
||||
case PCRE2_INFO_ALLOPTIONS:
|
||||
case PCRE2_INFO_ARGOPTIONS:
|
||||
case PCRE2_INFO_BACKREFMAX:
|
||||
case PCRE2_INFO_BSR:
|
||||
case PCRE2_INFO_CAPTURECOUNT:
|
||||
case PCRE2_INFO_DEPTHLIMIT:
|
||||
case PCRE2_INFO_EXTRAOPTIONS:
|
||||
case PCRE2_INFO_FIRSTCODETYPE:
|
||||
case PCRE2_INFO_FIRSTCODEUNIT:
|
||||
case PCRE2_INFO_HASBACKSLASHC:
|
||||
case PCRE2_INFO_HASCRORLF:
|
||||
case PCRE2_INFO_HEAPLIMIT:
|
||||
case PCRE2_INFO_JCHANGED:
|
||||
case PCRE2_INFO_LASTCODETYPE:
|
||||
case PCRE2_INFO_LASTCODEUNIT:
|
||||
case PCRE2_INFO_MATCHEMPTY:
|
||||
case PCRE2_INFO_MATCHLIMIT:
|
||||
case PCRE2_INFO_MAXLOOKBEHIND:
|
||||
case PCRE2_INFO_MINLENGTH:
|
||||
case PCRE2_INFO_NAMEENTRYSIZE:
|
||||
case PCRE2_INFO_NAMECOUNT:
|
||||
case PCRE2_INFO_NEWLINE:
|
||||
return sizeof(uint32_t);
|
||||
|
||||
case PCRE2_INFO_FIRSTBITMAP:
|
||||
return sizeof(const uint8_t *);
|
||||
|
||||
case PCRE2_INFO_JITSIZE:
|
||||
case PCRE2_INFO_SIZE:
|
||||
case PCRE2_INFO_FRAMESIZE:
|
||||
return sizeof(size_t);
|
||||
|
||||
case PCRE2_INFO_NAMETABLE:
|
||||
return sizeof(PCRE2_SPTR);
|
||||
}
|
||||
}
|
||||
|
||||
if (re == NULL) return PCRE2_ERROR_NULL;
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
return with PCRE2_ERROR_BADMAGIC. */
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||
|
||||
/* Check that this pattern was compiled in the correct bit mode */
|
||||
|
||||
if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
|
||||
|
||||
switch(what)
|
||||
{
|
||||
case PCRE2_INFO_ALLOPTIONS:
|
||||
*((uint32_t *)where) = re->overall_options;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_ARGOPTIONS:
|
||||
*((uint32_t *)where) = re->compile_options;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_BACKREFMAX:
|
||||
*((uint32_t *)where) = re->top_backref;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_BSR:
|
||||
*((uint32_t *)where) = re->bsr_convention;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_CAPTURECOUNT:
|
||||
*((uint32_t *)where) = re->top_bracket;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_DEPTHLIMIT:
|
||||
*((uint32_t *)where) = re->limit_depth;
|
||||
if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_EXTRAOPTIONS:
|
||||
*((uint32_t *)where) = re->extra_options;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_FIRSTCODETYPE:
|
||||
*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :
|
||||
((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_FIRSTCODEUNIT:
|
||||
*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)?
|
||||
re->first_codeunit : 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_FIRSTBITMAP:
|
||||
*((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)?
|
||||
&(re->start_bitmap[0]) : NULL;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_FRAMESIZE:
|
||||
*((size_t *)where) = offsetof(heapframe, ovector) +
|
||||
re->top_bracket * 2 * sizeof(PCRE2_SIZE);
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_HASBACKSLASHC:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_HASCRORLF:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_HEAPLIMIT:
|
||||
*((uint32_t *)where) = re->limit_heap;
|
||||
if (re->limit_heap == UINT32_MAX) return PCRE2_ERROR_UNSET;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_JCHANGED:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_JITSIZE:
|
||||
#ifdef SUPPORT_JIT
|
||||
*((size_t *)where) = (re->executable_jit != NULL)?
|
||||
PRIV(jit_get_size)(re->executable_jit) : 0;
|
||||
#else
|
||||
*((size_t *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_LASTCODETYPE:
|
||||
*((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_LASTCODEUNIT:
|
||||
*((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)?
|
||||
re->last_codeunit : 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_MATCHEMPTY:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_MATCHLIMIT:
|
||||
*((uint32_t *)where) = re->limit_match;
|
||||
if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_MAXLOOKBEHIND:
|
||||
*((uint32_t *)where) = re->max_lookbehind;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_MINLENGTH:
|
||||
*((uint32_t *)where) = re->minlength;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_NAMEENTRYSIZE:
|
||||
*((uint32_t *)where) = re->name_entry_size;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_NAMECOUNT:
|
||||
*((uint32_t *)where) = re->name_count;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_NAMETABLE:
|
||||
*((PCRE2_SPTR *)where) = (PCRE2_SPTR)((const char *)re +
|
||||
sizeof(pcre2_real_code));
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_NEWLINE:
|
||||
*((uint32_t *)where) = re->newline_convention;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_SIZE:
|
||||
*((size_t *)where) = re->blocksize;
|
||||
break;
|
||||
|
||||
default: return PCRE2_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Callout enumerator *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
code points to compiled code
|
||||
callback function called for each callout block
|
||||
callout_data user data passed to the callback
|
||||
|
||||
Returns: 0 when successfully completed
|
||||
< 0 on local error
|
||||
!= 0 for callback error
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_callout_enumerate(const pcre2_code *code,
|
||||
int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data)
|
||||
{
|
||||
const pcre2_real_code *re = (const pcre2_real_code *)code;
|
||||
pcre2_callout_enumerate_block cb;
|
||||
PCRE2_SPTR cc;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf;
|
||||
#endif
|
||||
|
||||
if (re == NULL) return PCRE2_ERROR_NULL;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
#endif
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
return with PCRE2_ERROR_BADMAGIC. */
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||
|
||||
/* Check that this pattern was compiled in the correct bit mode */
|
||||
|
||||
if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
|
||||
|
||||
cb.version = 0;
|
||||
cc = (PCRE2_SPTR)((const uint8_t *)re + sizeof(pcre2_real_code))
|
||||
+ re->name_count * re->name_entry_size;
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
int rc;
|
||||
switch (*cc)
|
||||
{
|
||||
case OP_END:
|
||||
return 0;
|
||||
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
case OP_NOT:
|
||||
case OP_NOTI:
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_EXACT:
|
||||
case OP_POSSTAR:
|
||||
case OP_POSPLUS:
|
||||
case OP_POSQUERY:
|
||||
case OP_POSUPTO:
|
||||
case OP_STARI:
|
||||
case OP_MINSTARI:
|
||||
case OP_PLUSI:
|
||||
case OP_MINPLUSI:
|
||||
case OP_QUERYI:
|
||||
case OP_MINQUERYI:
|
||||
case OP_UPTOI:
|
||||
case OP_MINUPTOI:
|
||||
case OP_EXACTI:
|
||||
case OP_POSSTARI:
|
||||
case OP_POSPLUSI:
|
||||
case OP_POSQUERYI:
|
||||
case OP_POSUPTOI:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTPOSQUERY:
|
||||
case OP_NOTPOSUPTO:
|
||||
case OP_NOTSTARI:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_NOTEXACTI:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_NOTPOSQUERYI:
|
||||
case OP_NOTPOSUPTOI:
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEPOSQUERY:
|
||||
case OP_TYPEPOSUPTO:
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2;
|
||||
#endif
|
||||
break;
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_XCLASS:
|
||||
case OP_ECLASS:
|
||||
cc += GET(cc, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
cc += PRIV(OP_lengths)[*cc] + cc[1];
|
||||
break;
|
||||
|
||||
case OP_CALLOUT:
|
||||
cb.pattern_position = GET(cc, 1);
|
||||
cb.next_item_length = GET(cc, 1 + LINK_SIZE);
|
||||
cb.callout_number = cc[1 + 2*LINK_SIZE];
|
||||
cb.callout_string_offset = 0;
|
||||
cb.callout_string_length = 0;
|
||||
cb.callout_string = NULL;
|
||||
rc = callback(&cb, callout_data);
|
||||
if (rc != 0) return rc;
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
cb.pattern_position = GET(cc, 1);
|
||||
cb.next_item_length = GET(cc, 1 + LINK_SIZE);
|
||||
cb.callout_number = 0;
|
||||
cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE);
|
||||
cb.callout_string_length =
|
||||
GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2;
|
||||
cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1;
|
||||
rc = callback(&cb, callout_data);
|
||||
if (rc != 0) return rc;
|
||||
cc += GET(cc, 1 + 2*LINK_SIZE);
|
||||
break;
|
||||
|
||||
default:
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_pattern_info.c */
|
||||
@@ -1,1111 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains a PCRE private debugging function for printing out the
|
||||
internal form of a compiled regular expression, along with some supporting
|
||||
local functions. This source file is #included in pcre2test.c at each supported
|
||||
code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
|
||||
that comprise the library. It can also optionally be included in
|
||||
pcre2_compile.c for detailed debugging in error situations. */
|
||||
|
||||
|
||||
/* Tables of operator names. The same 8-bit table is used for all code unit
|
||||
widths, so it must be defined only once. The list itself is defined in
|
||||
pcre2_internal.h, which is #included by pcre2test before this file. */
|
||||
|
||||
#ifndef OP_LISTS_DEFINED
|
||||
static const char *OP_names[] = { OP_NAME_LIST };
|
||||
STATIC_ASSERT(sizeof(OP_names)/sizeof(*OP_names) == OP_TABLE_LENGTH, OP_names);
|
||||
#define OP_LISTS_DEFINED
|
||||
#endif
|
||||
|
||||
/* The functions and tables herein must all have mode-dependent names. */
|
||||
|
||||
#define OP_lengths PCRE2_SUFFIX(OP_lengths_)
|
||||
#define get_ucpname PCRE2_SUFFIX(get_ucpname_)
|
||||
#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
|
||||
#define print_char PCRE2_SUFFIX(print_char_)
|
||||
#define print_custring PCRE2_SUFFIX(print_custring_)
|
||||
#define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_)
|
||||
#define print_prop PCRE2_SUFFIX(print_prop_)
|
||||
#define print_char_list PCRE2_SUFFIX(print_char_list_)
|
||||
#define print_map PCRE2_SUFFIX(print_map_)
|
||||
#define print_class PCRE2_SUFFIX(print_class_)
|
||||
|
||||
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||
the definition is next to the definition of the opcodes in pcre2_internal.h.
|
||||
The contents of the table are, however, mode-dependent. */
|
||||
|
||||
static const uint8_t OP_lengths[] = { OP_LENGTHS };
|
||||
STATIC_ASSERT(sizeof(OP_lengths)/sizeof(*OP_lengths) == OP_TABLE_LENGTH,
|
||||
PCRE2_SUFFIX(OP_lengths_));
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print one character from a string *
|
||||
*************************************************/
|
||||
|
||||
/* In UTF mode the character may occupy more than one code unit.
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
ptr pointer to first code unit of the character
|
||||
utf TRUE if string is UTF (will be FALSE if UTF is not supported)
|
||||
|
||||
Returns: number of additional code units used
|
||||
*/
|
||||
|
||||
static unsigned int
|
||||
print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
|
||||
{
|
||||
uint32_t c = *ptr;
|
||||
BOOL one_code_unit = !utf;
|
||||
|
||||
/* If UTF is supported and requested, check for a valid single code unit. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
one_code_unit = c < 0x80;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
one_code_unit = (c & 0xfc00) != 0xd800;
|
||||
#else
|
||||
one_code_unit = (c & 0xfffff800u) != 0xd800u;
|
||||
#endif /* CODE_UNIT_WIDTH */
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Handle a valid one-code-unit character at any width. */
|
||||
|
||||
if (one_code_unit)
|
||||
{
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
|
||||
else if (c < 0x80) fprintf(f, "\\x%02x", c);
|
||||
else fprintf(f, "\\x{%02x}", c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Code for invalid UTF code units and multi-unit UTF characters is different
|
||||
for each width. If UTF is not supported, control should never get here, but we
|
||||
need a return statement to keep the compiler happy. */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
return 0;
|
||||
#else
|
||||
|
||||
/* Malformed UTF-8 should occur only if the sanity check has been turned off.
|
||||
Rather than swallow random bytes, just stop if we hit a bad one. Print it with
|
||||
\X instead of \x as an indication. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if ((c & 0xc0) != 0xc0)
|
||||
{
|
||||
fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
|
||||
int s = 6*a;
|
||||
c = (c & PRIV(utf8_table3)[a]) << s;
|
||||
for (i = 1; i <= a; i++)
|
||||
{
|
||||
if ((ptr[i] & 0xc0) != 0x80)
|
||||
{
|
||||
fprintf(f, "\\X{%x}", c); /* Invalid secondary byte */
|
||||
return i - 1;
|
||||
}
|
||||
s -= 6;
|
||||
c |= (ptr[i] & 0x3f) << s;
|
||||
}
|
||||
fprintf(f, "\\x{%x}", c);
|
||||
return a;
|
||||
}
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
|
||||
/* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
|
||||
Print it with \X instead of \x as an indication. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 16
|
||||
if ((ptr[1] & 0xfc00) != 0xdc00)
|
||||
{
|
||||
fprintf(f, "\\X{%x}", c);
|
||||
return 0;
|
||||
}
|
||||
c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
|
||||
fprintf(f, "\\x{%x}", c);
|
||||
return 1;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
|
||||
|
||||
/* For UTF-32 we get here only for a malformed code unit, which should only
|
||||
occur if the sanity check has been turned off. Print it with \X instead of \x
|
||||
as an indication. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
fprintf(f, "\\X{%x}", c);
|
||||
return 0;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print string as a list of code units *
|
||||
*************************************************/
|
||||
|
||||
/* These take no account of UTF as they always print each individual code unit.
|
||||
The string is zero-terminated for print_custring(); the length is given for
|
||||
print_custring_bylen().
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
ptr point to the string
|
||||
len length for print_custring_bylen()
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
print_custring(FILE *f, PCRE2_SPTR ptr)
|
||||
{
|
||||
while (*ptr != '\0')
|
||||
{
|
||||
uint32_t c = *ptr++;
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
|
||||
{
|
||||
for (; len > 0; len--)
|
||||
{
|
||||
uint32_t c = *ptr++;
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find Unicode property name *
|
||||
*************************************************/
|
||||
|
||||
/* When there is no UTF/UCP support, the table of names does not exist. This
|
||||
function should not be called in such configurations, because a pattern that
|
||||
tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
|
||||
into the main code, however, we just put one into this function.
|
||||
|
||||
Now that the table contains both full names and their abbreviations, we do some
|
||||
fiddling to try to get the full name, which is either the longer of two found
|
||||
names, or a 3-character script name. */
|
||||
|
||||
static const char *
|
||||
get_ucpname(unsigned int ptype, unsigned int pvalue)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
int count = 0;
|
||||
const char *yield = "??";
|
||||
size_t len = 0;
|
||||
unsigned int ptypex = (ptype == PT_SC)? PT_SCX : ptype;
|
||||
|
||||
for (ptrdiff_t i = PRIV(utt_size) - 1; i >= 0; i--)
|
||||
{
|
||||
const ucp_type_table *u = PRIV(utt) + i;
|
||||
|
||||
if ((ptype == u->type || ptypex == u->type) && pvalue == u->value)
|
||||
{
|
||||
const char *s = PRIV(utt_names) + u->name_offset;
|
||||
size_t sl = strlen(s);
|
||||
|
||||
if (sl == 3 && (u->type == PT_SC || u->type == PT_SCX))
|
||||
{
|
||||
yield = s;
|
||||
break;
|
||||
}
|
||||
|
||||
if (sl > len)
|
||||
{
|
||||
yield = s;
|
||||
len = sl;
|
||||
}
|
||||
|
||||
if (++count >= 2) break;
|
||||
}
|
||||
}
|
||||
|
||||
return yield;
|
||||
|
||||
#else /* No UTF support */
|
||||
(void)ptype;
|
||||
(void)pvalue;
|
||||
return "??";
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print Unicode property value *
|
||||
*************************************************/
|
||||
|
||||
/* "Normal" properties can be printed from tables. The PT_CLIST property is a
|
||||
pseudo-property that contains a pointer to a list of case-equivalent
|
||||
characters.
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
code pointer in the compiled code
|
||||
before text to print before
|
||||
after text to print after
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
|
||||
{
|
||||
if (code[1] != PT_CLIST)
|
||||
{
|
||||
const char *sc = (code[1] == PT_SC)? "script:" : "";
|
||||
const char *s = get_ucpname(code[1], code[2]);
|
||||
fprintf(f, "%s%s %s%c%s%s", before, OP_names[*code], sc, toupper(s[0]), s+1, after);
|
||||
}
|
||||
else
|
||||
{
|
||||
const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
|
||||
fprintf (f, "%s%sclist", before, (*code == OP_PROP)? "" : "not ");
|
||||
while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
|
||||
fprintf(f, "%s", after);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print character list *
|
||||
*************************************************/
|
||||
|
||||
/* Prints the characters and character ranges in a character list.
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
code pointer in the compiled code
|
||||
*/
|
||||
|
||||
static PCRE2_SPTR
|
||||
print_char_list(FILE *f, PCRE2_SPTR code, const uint8_t *char_lists_end)
|
||||
{
|
||||
uint32_t type, list_ind;
|
||||
uint32_t char_list_add = XCL_CHAR_LIST_LOW_16_ADD;
|
||||
uint32_t range_start = ~(uint32_t)0, range_end = 0;
|
||||
const uint8_t *next_char;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
type = (uint32_t)(code[0] << 8) | code[1];
|
||||
code += 2;
|
||||
#else
|
||||
type = code[0];
|
||||
code++;
|
||||
#endif /* CODE_UNIT_WIDTH */
|
||||
|
||||
/* Align characters. */
|
||||
next_char = char_lists_end - (GET(code, 0) << 1);
|
||||
type &= XCL_TYPE_MASK;
|
||||
list_ind = 0;
|
||||
|
||||
if ((type & XCL_BEGIN_WITH_RANGE) != 0)
|
||||
range_start = XCL_CHAR_LIST_LOW_16_START;
|
||||
|
||||
while (type > 0)
|
||||
{
|
||||
uint32_t item_count = type & XCL_ITEM_COUNT_MASK;
|
||||
|
||||
if (item_count == XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
if (list_ind <= 1)
|
||||
{
|
||||
item_count = *(const uint16_t*)next_char;
|
||||
next_char += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
item_count = *(const uint32_t*)next_char;
|
||||
next_char += 4;
|
||||
}
|
||||
}
|
||||
|
||||
while (item_count > 0)
|
||||
{
|
||||
if (list_ind <= 1)
|
||||
{
|
||||
range_end = *(const uint16_t*)next_char;
|
||||
next_char += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
range_end = *(const uint32_t*)next_char;
|
||||
next_char += 4;
|
||||
}
|
||||
|
||||
if ((range_end & XCL_CHAR_END) != 0)
|
||||
{
|
||||
range_end = char_list_add + (range_end >> XCL_CHAR_SHIFT);
|
||||
|
||||
if (range_start < range_end)
|
||||
fprintf(f, "\\x{%x}-", range_start);
|
||||
|
||||
fprintf(f, "\\x{%x}", range_end);
|
||||
range_start = ~(uint32_t)0;
|
||||
}
|
||||
else
|
||||
range_start = char_list_add + (range_end >> XCL_CHAR_SHIFT);
|
||||
|
||||
item_count--;
|
||||
}
|
||||
|
||||
list_ind++;
|
||||
type >>= XCL_TYPE_BIT_LEN;
|
||||
|
||||
/* The following code could be optimized to 8/16/32 bit,
|
||||
but it is not worth it for a debugging function. */
|
||||
|
||||
if (range_start == ~(uint32_t)0)
|
||||
{
|
||||
if ((type & XCL_BEGIN_WITH_RANGE) != 0)
|
||||
{
|
||||
if (list_ind == 1) range_start = XCL_CHAR_LIST_HIGH_16_START;
|
||||
else if (list_ind == 2) range_start = XCL_CHAR_LIST_LOW_32_START;
|
||||
else range_start = XCL_CHAR_LIST_HIGH_32_START;
|
||||
}
|
||||
}
|
||||
else if ((type & XCL_BEGIN_WITH_RANGE) == 0)
|
||||
{
|
||||
fprintf(f, "\\x{%x}-", range_start);
|
||||
|
||||
if (list_ind == 1) range_end = XCL_CHAR_LIST_LOW_16_END;
|
||||
else if (list_ind == 2) range_end = XCL_CHAR_LIST_HIGH_16_END;
|
||||
else if (list_ind == 3) range_end = XCL_CHAR_LIST_LOW_32_END;
|
||||
else range_end = XCL_CHAR_LIST_HIGH_32_END;
|
||||
|
||||
fprintf(f, "\\x{%x}", range_end);
|
||||
range_start = ~(uint32_t)0;
|
||||
}
|
||||
|
||||
if (list_ind == 1) char_list_add = XCL_CHAR_LIST_HIGH_16_ADD;
|
||||
else if (list_ind == 2) char_list_add = XCL_CHAR_LIST_LOW_32_ADD;
|
||||
else char_list_add = XCL_CHAR_LIST_HIGH_32_ADD;
|
||||
}
|
||||
|
||||
return code + LINK_SIZE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print a character bitmap *
|
||||
*************************************************/
|
||||
|
||||
/* Prints a 32-byte bitmap, which occurs within a character class opcode.
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
map pointer to the bitmap
|
||||
negated TRUE if the bitmap will be printed as negated
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
print_map(FILE *f, const uint8_t *map, BOOL negated)
|
||||
{
|
||||
BOOL first = TRUE;
|
||||
uint8_t inverted_map[32];
|
||||
int i;
|
||||
|
||||
if (negated)
|
||||
{
|
||||
/* Using 255 ^ instead of ~ avoids clang sanitize warning. */
|
||||
for (i = 0; i < 32; i++) inverted_map[i] = 255 ^ map[i];
|
||||
map = inverted_map;
|
||||
}
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((map[i/8] & (1u << (i&7))) != 0)
|
||||
{
|
||||
int j;
|
||||
for (j = i+1; j < 256; j++)
|
||||
if ((map[j/8] & (1u << (j&7))) == 0) break;
|
||||
if (i == '-' || i == '\\' || i == ']' || (first && i == '^'))
|
||||
fprintf(f, "\\");
|
||||
if (PRINTABLE(i)) fprintf(f, "%c", i);
|
||||
else fprintf(f, "\\x%02x", i);
|
||||
first = FALSE;
|
||||
if (--j > i)
|
||||
{
|
||||
if (j != i + 1) fprintf(f, "-");
|
||||
if (j == '-' || j == '\\' || j == ']') fprintf(f, "\\");
|
||||
if (PRINTABLE(j)) fprintf(f, "%c", j);
|
||||
else fprintf(f, "\\x%02x", j);
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print character class *
|
||||
*************************************************/
|
||||
|
||||
/* Prints a character class, which must be either an OP_CLASS, OP_NCLASS, or
|
||||
OP_XCLASS.
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
type OP_CLASS, OP_NCLASS, or OP_XCLASS
|
||||
code pointer in the compiled code (after the OP tag)
|
||||
utf TRUE if re is UTF (will be FALSE if UTF is not supported)
|
||||
before text to print before
|
||||
after text to print after
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
print_class(FILE *f, int type, PCRE2_SPTR code, const uint8_t *char_lists_end,
|
||||
BOOL utf, const char *before, const char *after)
|
||||
{
|
||||
BOOL printmap, negated;
|
||||
PCRE2_SPTR ccode;
|
||||
|
||||
/* Negative XCLASS and NCLASS both have a bitmap indicating which characters
|
||||
are accepted. For clarity we print this inverted and prefixed by "^". */
|
||||
if (type == OP_XCLASS)
|
||||
{
|
||||
ccode = code + LINK_SIZE;
|
||||
printmap = (*ccode & XCL_MAP) != 0;
|
||||
negated = (*ccode & XCL_NOT) != 0;
|
||||
ccode++;
|
||||
}
|
||||
else /* CLASS or NCLASS */
|
||||
{
|
||||
printmap = TRUE;
|
||||
negated = type == OP_NCLASS;
|
||||
ccode = code;
|
||||
}
|
||||
|
||||
fprintf(f, "%s[%s", before, negated? "^" : "");
|
||||
|
||||
/* Print a bit map */
|
||||
if (printmap)
|
||||
{
|
||||
print_map(f, (const uint8_t *)ccode, negated);
|
||||
ccode += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
|
||||
/* For an XCLASS there is always some additional data */
|
||||
if (type == OP_XCLASS)
|
||||
{
|
||||
PCRE2_UCHAR ch;
|
||||
|
||||
while ((ch = *ccode++) != XCL_END)
|
||||
{
|
||||
const char *notch = "";
|
||||
|
||||
if (ch >= XCL_LIST)
|
||||
{
|
||||
ccode = print_char_list(f, ccode - 1, char_lists_end);
|
||||
break;
|
||||
}
|
||||
|
||||
switch(ch)
|
||||
{
|
||||
case XCL_NOTPROP:
|
||||
notch = "^";
|
||||
/* Fall through */
|
||||
case XCL_PROP:
|
||||
{
|
||||
unsigned int ptype = *ccode++;
|
||||
unsigned int pvalue = *ccode++;
|
||||
const char *s;
|
||||
switch(ptype)
|
||||
{
|
||||
case PT_PXGRAPH:
|
||||
fprintf(f, "[:%sgraph:]", notch);
|
||||
break;
|
||||
case PT_PXPRINT:
|
||||
fprintf(f, "[:%sprint:]", notch);
|
||||
break;
|
||||
case PT_PXPUNCT:
|
||||
fprintf(f, "[:%spunct:]", notch);
|
||||
break;
|
||||
case PT_PXXDIGIT:
|
||||
fprintf(f, "[:%sxdigit:]", notch);
|
||||
break;
|
||||
default:
|
||||
s = get_ucpname(ptype, pvalue);
|
||||
fprintf(f, "\\%c{%c%s}", ((notch[0] == '^')? 'P':'p'),
|
||||
toupper(s[0]), s+1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
ccode += 1 + print_char(f, ccode, utf);
|
||||
if (ch == XCL_RANGE)
|
||||
{
|
||||
fprintf(f, "-");
|
||||
ccode += 1 + print_char(f, ccode, utf);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
PCRE2_ASSERT(ccode == code + (GET(code, 0) - 1));
|
||||
}
|
||||
|
||||
/* Indicate a non-UTF class which was created by negation */
|
||||
fprintf(f, "]%s", after);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print compiled pattern *
|
||||
*************************************************/
|
||||
|
||||
/* The print_lengths flag controls whether offsets and lengths of items are
|
||||
printed. Lenths can be turned off from pcre2test so that automatic tests on
|
||||
bytecode can be written that do not depend on the value of LINK_SIZE.
|
||||
|
||||
Arguments:
|
||||
re a compiled pattern
|
||||
f the file to write to
|
||||
print_lengths show various lengths
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
|
||||
{
|
||||
PCRE2_SPTR codestart, nametable, code;
|
||||
uint32_t nesize = re->name_entry_size;
|
||||
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
|
||||
nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
|
||||
code = codestart = (PCRE2_SPTR)((uint8_t *)re + re->code_start);
|
||||
|
||||
for(;;)
|
||||
{
|
||||
PCRE2_SPTR ccode;
|
||||
uint32_t c;
|
||||
int i;
|
||||
const char *flag = " ";
|
||||
unsigned int extra = 0;
|
||||
|
||||
if (print_lengths)
|
||||
fprintf(f, "%3d ", (int)(code - codestart));
|
||||
else
|
||||
fprintf(f, " ");
|
||||
|
||||
switch(*code)
|
||||
{
|
||||
case OP_END:
|
||||
fprintf(f, " %s\n", OP_names[*code]);
|
||||
fprintf(f, "------------------------------------------------------------------\n");
|
||||
return;
|
||||
|
||||
case OP_CHAR:
|
||||
fprintf(f, " ");
|
||||
do
|
||||
{
|
||||
code++;
|
||||
code += 1 + print_char(f, code, utf);
|
||||
}
|
||||
while (*code == OP_CHAR);
|
||||
fprintf(f, "\n");
|
||||
continue;
|
||||
|
||||
case OP_CHARI:
|
||||
fprintf(f, " /i ");
|
||||
do
|
||||
{
|
||||
code++;
|
||||
code += 1 + print_char(f, code, utf);
|
||||
}
|
||||
while (*code == OP_CHARI);
|
||||
fprintf(f, "\n");
|
||||
continue;
|
||||
|
||||
case OP_CBRA:
|
||||
case OP_CBRAPOS:
|
||||
case OP_SCBRA:
|
||||
case OP_SCBRAPOS:
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_BRA:
|
||||
case OP_BRAPOS:
|
||||
case OP_SBRA:
|
||||
case OP_SBRAPOS:
|
||||
case OP_KETRMAX:
|
||||
case OP_KETRMIN:
|
||||
case OP_KETRPOS:
|
||||
case OP_ALT:
|
||||
case OP_KET:
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ASSERT_NA:
|
||||
case OP_ASSERTBACK_NA:
|
||||
case OP_ASSERT_SCS:
|
||||
case OP_ONCE:
|
||||
case OP_SCRIPT_RUN:
|
||||
case OP_COND:
|
||||
case OP_SCOND:
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_REVERSE:
|
||||
if (print_lengths) fprintf(f, "%3d ", GET2(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_VREVERSE:
|
||||
if (print_lengths) fprintf(f, "%3d %d ", GET2(code, 1),
|
||||
GET2(code, 1 + IMM2_SIZE));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_CLOSE:
|
||||
fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
|
||||
break;
|
||||
|
||||
case OP_CREF:
|
||||
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_DNCREF:
|
||||
{
|
||||
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
|
||||
fprintf(f, " %s Capture ref <", flag);
|
||||
print_custring(f, entry);
|
||||
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_RREF:
|
||||
c = GET2(code, 1);
|
||||
if (c == RREF_ANY)
|
||||
fprintf(f, " Cond recurse any");
|
||||
else
|
||||
fprintf(f, " Cond recurse %d", c);
|
||||
break;
|
||||
|
||||
case OP_DNRREF:
|
||||
{
|
||||
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
|
||||
fprintf(f, " %s Cond recurse <", flag);
|
||||
print_custring(f, entry);
|
||||
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_FALSE:
|
||||
fprintf(f, " Cond false");
|
||||
break;
|
||||
|
||||
case OP_TRUE:
|
||||
fprintf(f, " Cond true");
|
||||
break;
|
||||
|
||||
case OP_STARI:
|
||||
case OP_MINSTARI:
|
||||
case OP_POSSTARI:
|
||||
case OP_PLUSI:
|
||||
case OP_MINPLUSI:
|
||||
case OP_POSPLUSI:
|
||||
case OP_QUERYI:
|
||||
case OP_MINQUERYI:
|
||||
case OP_POSQUERYI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_POSSTAR:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_POSPLUS:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_POSQUERY:
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSQUERY:
|
||||
fprintf(f, " %s ", flag);
|
||||
|
||||
if (*code >= OP_TYPESTAR)
|
||||
{
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
|
||||
{
|
||||
print_prop(f, code + 1, "", " ");
|
||||
extra = 2;
|
||||
}
|
||||
else fprintf(f, "%s", OP_names[code[1]]);
|
||||
}
|
||||
else extra = print_char(f, code+1, utf);
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_EXACTI:
|
||||
case OP_UPTOI:
|
||||
case OP_MINUPTOI:
|
||||
case OP_POSUPTOI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
case OP_EXACT:
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_POSUPTO:
|
||||
fprintf(f, " %s ", flag);
|
||||
extra = print_char(f, code + 1 + IMM2_SIZE, utf);
|
||||
fprintf(f, "{");
|
||||
if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
|
||||
else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
|
||||
break;
|
||||
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEPOSUPTO:
|
||||
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
|
||||
{
|
||||
print_prop(f, code + IMM2_SIZE + 1, " ", " ");
|
||||
extra = 2;
|
||||
}
|
||||
else fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]);
|
||||
fprintf(f, "{");
|
||||
if (*code != OP_TYPEEXACT) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
|
||||
else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
|
||||
break;
|
||||
|
||||
case OP_NOTI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
case OP_NOT:
|
||||
fprintf(f, " %s [^", flag);
|
||||
extra = print_char(f, code + 1, utf);
|
||||
fprintf(f, "] (not)");
|
||||
break;
|
||||
|
||||
case OP_NOTSTARI:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_NOTPOSQUERYI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTPOSQUERY:
|
||||
fprintf(f, " %s [^", flag);
|
||||
extra = print_char(f, code + 1, utf);
|
||||
fprintf(f, "]%s (not)", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_NOTEXACTI:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_NOTPOSUPTOI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTPOSUPTO:
|
||||
fprintf(f, " %s [^", flag);
|
||||
extra = print_char(f, code + 1 + IMM2_SIZE, utf);
|
||||
fprintf(f, "]{");
|
||||
if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
|
||||
else
|
||||
if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
|
||||
fprintf(f, " (not)");
|
||||
break;
|
||||
|
||||
case OP_RECURSE:
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_REFI:
|
||||
flag = "/i";
|
||||
extra = code[1 + IMM2_SIZE];
|
||||
/* Fall through */
|
||||
case OP_REF:
|
||||
fprintf(f, " %s \\%d", flag, GET2(code,1));
|
||||
if (extra != 0) fprintf(f, " 0x%02x", extra);
|
||||
ccode = code + OP_lengths[*code];
|
||||
goto CLASS_REF_REPEAT;
|
||||
|
||||
case OP_DNREFI:
|
||||
flag = "/i";
|
||||
extra = code[1 + 2*IMM2_SIZE];
|
||||
/* Fall through */
|
||||
case OP_DNREF:
|
||||
{
|
||||
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
|
||||
fprintf(f, " %s \\k<", flag);
|
||||
print_custring(f, entry);
|
||||
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
|
||||
if (extra != 0) fprintf(f, " 0x%02x", extra);
|
||||
}
|
||||
ccode = code + OP_lengths[*code];
|
||||
goto CLASS_REF_REPEAT;
|
||||
|
||||
case OP_CALLOUT:
|
||||
fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
|
||||
GET(code, 1), GET(code, 1 + LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
c = code[1 + 4*LINK_SIZE];
|
||||
fprintf(f, " %s %c", OP_names[*code], c);
|
||||
extra = GET(code, 1 + 2*LINK_SIZE);
|
||||
print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
|
||||
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
|
||||
if (c == PRIV(callout_start_delims)[i])
|
||||
{
|
||||
c = PRIV(callout_end_delims)[i];
|
||||
break;
|
||||
}
|
||||
fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
|
||||
GET(code, 1 + LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_PROP:
|
||||
case OP_NOTPROP:
|
||||
print_prop(f, code, " ", "");
|
||||
break;
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_ECLASS:
|
||||
extra = GET(code, 1);
|
||||
fprintf(f, " eclass[\n");
|
||||
/* We print the opcodes contained inside as well. */
|
||||
ccode = code + 1 + LINK_SIZE + 1;
|
||||
if ((ccode[-1] & ECL_MAP) != 0)
|
||||
{
|
||||
const uint8_t *map = (const uint8_t *)ccode;
|
||||
/* The first 6 ASCII characters (SOH...ACK) are totally, utterly useless.
|
||||
If they're set in the bitmap, then it's clearly been formed by negation.*/
|
||||
BOOL print_negated = (map[0] & 0x7e) == 0x7e;
|
||||
|
||||
fprintf(f, " bitmap: [%s", print_negated? "^" : "");
|
||||
print_map(f, map, print_negated);
|
||||
fprintf(f, "]\n");
|
||||
ccode += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
else
|
||||
fprintf(f, " no bitmap\n");
|
||||
while (ccode < code + extra)
|
||||
{
|
||||
if (print_lengths)
|
||||
fprintf(f, "%3d ", (int)(ccode - codestart));
|
||||
else
|
||||
fprintf(f, " ");
|
||||
|
||||
switch (*ccode)
|
||||
{
|
||||
case ECL_AND:
|
||||
fprintf(f, " AND\n");
|
||||
ccode += 1;
|
||||
break;
|
||||
case ECL_OR:
|
||||
fprintf(f, " OR\n");
|
||||
ccode += 1;
|
||||
break;
|
||||
case ECL_XOR:
|
||||
fprintf(f, " XOR\n");
|
||||
ccode += 1;
|
||||
break;
|
||||
case ECL_NOT:
|
||||
fprintf(f, " NOT\n");
|
||||
ccode += 1;
|
||||
break;
|
||||
|
||||
case ECL_XCLASS:
|
||||
print_class(f, OP_XCLASS, ccode+1, (uint8_t*)codestart, utf,
|
||||
" xclass: ", "\n");
|
||||
ccode += GET(ccode, 1);
|
||||
break;
|
||||
|
||||
default:
|
||||
fprintf(f, " UNEXPECTED\n");
|
||||
ccode += 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
fprintf(f, " ]");
|
||||
goto CLASS_REF_REPEAT;
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
|
||||
case OP_CLASS:
|
||||
case OP_NCLASS:
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_XCLASS:
|
||||
if (*code == OP_XCLASS)
|
||||
extra = GET(code, 1);
|
||||
#endif
|
||||
print_class(f, *code, code+1, (uint8_t*)codestart, utf, " ", "");
|
||||
ccode = code + OP_lengths[*code] + extra;
|
||||
|
||||
/* Handle repeats after a class or a back reference */
|
||||
|
||||
CLASS_REF_REPEAT:
|
||||
switch(*ccode)
|
||||
{
|
||||
unsigned int min, max;
|
||||
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRPLUS:
|
||||
case OP_CRMINPLUS:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
case OP_CRPOSSTAR:
|
||||
case OP_CRPOSPLUS:
|
||||
case OP_CRPOSQUERY:
|
||||
fprintf(f, "%s", OP_names[*ccode]);
|
||||
extra += OP_lengths[*ccode];
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
case OP_CRPOSRANGE:
|
||||
min = GET2(ccode,1);
|
||||
max = GET2(ccode,1 + IMM2_SIZE);
|
||||
if (max == 0) fprintf(f, "{%u,}", min);
|
||||
else fprintf(f, "{%u,%u}", min, max);
|
||||
if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
|
||||
else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
|
||||
extra += OP_lengths[*ccode];
|
||||
break;
|
||||
|
||||
/* Do nothing if it's not a repeat; this code stops picky compilers
|
||||
warning about the lack of a default code path. */
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
fprintf(f, " %s ", OP_names[*code]);
|
||||
print_custring_bylen(f, code + 2, code[1]);
|
||||
extra += code[1];
|
||||
break;
|
||||
|
||||
case OP_THEN:
|
||||
fprintf(f, " %s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_CIRCM:
|
||||
case OP_DOLLM:
|
||||
flag = "/m";
|
||||
/* Fall through */
|
||||
|
||||
/* Anything else is just an item with no data, but possibly a flag. */
|
||||
|
||||
default:
|
||||
fprintf(f, " %s %s", flag, OP_names[*code]);
|
||||
break;
|
||||
}
|
||||
|
||||
code += OP_lengths[*code] + extra;
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_printint.c */
|
||||
@@ -1,344 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2021 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains the function for checking a script run. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check script run *
|
||||
*************************************************/
|
||||
|
||||
/* A script run is conceptually a sequence of characters all in the same
|
||||
Unicode script. However, it isn't quite that simple. There are special rules
|
||||
for scripts that are commonly used together, and also special rules for digits.
|
||||
This function implements the appropriate checks, which is possible only when
|
||||
PCRE2 is compiled with Unicode support. The function returns TRUE if there is
|
||||
no Unicode support; however, it should never be called in that circumstance
|
||||
because an error is given by pcre2_compile() if a script run is called for in a
|
||||
version of PCRE2 compiled without Unicode support.
|
||||
|
||||
Arguments:
|
||||
pgr point to the first character
|
||||
endptr point after the last character
|
||||
utf TRUE if in UTF mode
|
||||
|
||||
Returns: TRUE if this is a valid script run
|
||||
*/
|
||||
|
||||
/* These are states in the checking process. */
|
||||
|
||||
enum { SCRIPT_UNSET, /* Requirement as yet unknown */
|
||||
SCRIPT_MAP, /* Bitmap contains acceptable scripts */
|
||||
SCRIPT_HANPENDING, /* Have had only Han characters */
|
||||
SCRIPT_HANHIRAKATA, /* Expect Han or Hirikata */
|
||||
SCRIPT_HANBOPOMOFO, /* Expect Han or Bopomofo */
|
||||
SCRIPT_HANHANGUL /* Expect Han or Hangul */
|
||||
};
|
||||
|
||||
#define UCD_MAPSIZE (ucp_Unknown/32 + 1)
|
||||
#define FULL_MAPSIZE (ucp_Script_Count/32 + 1)
|
||||
|
||||
BOOL
|
||||
PRIV(script_run)(PCRE2_SPTR ptr, PCRE2_SPTR endptr, BOOL utf)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
uint32_t require_state = SCRIPT_UNSET;
|
||||
uint32_t require_map[FULL_MAPSIZE];
|
||||
uint32_t map[FULL_MAPSIZE];
|
||||
uint32_t require_digitset = 0;
|
||||
uint32_t c;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
(void)utf; /* Avoid compiler warning */
|
||||
#endif
|
||||
|
||||
/* Any string containing fewer than 2 characters is a valid script run. */
|
||||
|
||||
if (ptr >= endptr) return TRUE;
|
||||
GETCHARINCTEST(c, ptr);
|
||||
if (ptr >= endptr) return TRUE;
|
||||
|
||||
/* Initialize the require map. This is a full-size bitmap that has a bit for
|
||||
every script, as opposed to the maps in ucd_script_sets, which only have bits
|
||||
for scripts less than ucp_Unknown - those that appear in script extension
|
||||
lists. */
|
||||
|
||||
for (int i = 0; i < FULL_MAPSIZE; i++) require_map[i] = 0;
|
||||
|
||||
/* Scan strings of two or more characters, checking the Unicode characteristics
|
||||
of each code point. There is special code for scripts that can be combined with
|
||||
characters from the Han Chinese script. This may be used in conjunction with
|
||||
four other scripts in these combinations:
|
||||
|
||||
. Han with Hiragana and Katakana is allowed (for Japanese).
|
||||
. Han with Bopomofo is allowed (for Taiwanese Mandarin).
|
||||
. Han with Hangul is allowed (for Korean).
|
||||
|
||||
If the first significant character's script is one of the four, the required
|
||||
script type is immediately known. However, if the first significant
|
||||
character's script is Han, we have to keep checking for a non-Han character.
|
||||
Hence the SCRIPT_HANPENDING state. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
const ucd_record *ucd = GET_UCD(c);
|
||||
uint32_t script = ucd->script;
|
||||
|
||||
/* If the script is Unknown, the string is not a valid script run. Such
|
||||
characters can only form script runs of length one (see test above). */
|
||||
|
||||
if (script == ucp_Unknown) return FALSE;
|
||||
|
||||
/* A character without any script extensions whose script is Inherited or
|
||||
Common is always accepted with any script. If there are extensions, the
|
||||
following processing happens for all scripts. */
|
||||
|
||||
if (UCD_SCRIPTX_PROP(ucd) != 0 || (script != ucp_Inherited && script != ucp_Common))
|
||||
{
|
||||
BOOL OK;
|
||||
|
||||
/* Set up a full-sized map for this character that can include bits for all
|
||||
scripts. Copy the scriptx map for this character (which covers those
|
||||
scripts that appear in script extension lists), set the remaining values to
|
||||
zero, and then, except for Common or Inherited, add this script's bit to
|
||||
the map. */
|
||||
|
||||
memcpy(map, PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(ucd), UCD_MAPSIZE * sizeof(uint32_t));
|
||||
memset(map + UCD_MAPSIZE, 0, (FULL_MAPSIZE - UCD_MAPSIZE) * sizeof(uint32_t));
|
||||
if (script != ucp_Common && script != ucp_Inherited) MAPSET(map, script);
|
||||
|
||||
/* Handle the different checking states */
|
||||
|
||||
switch(require_state)
|
||||
{
|
||||
/* First significant character - it might follow Common or Inherited
|
||||
characters that do not have any script extensions. */
|
||||
|
||||
case SCRIPT_UNSET:
|
||||
switch(script)
|
||||
{
|
||||
case ucp_Han:
|
||||
require_state = SCRIPT_HANPENDING;
|
||||
break;
|
||||
|
||||
case ucp_Hiragana:
|
||||
case ucp_Katakana:
|
||||
require_state = SCRIPT_HANHIRAKATA;
|
||||
break;
|
||||
|
||||
case ucp_Bopomofo:
|
||||
require_state = SCRIPT_HANBOPOMOFO;
|
||||
break;
|
||||
|
||||
case ucp_Hangul:
|
||||
require_state = SCRIPT_HANHANGUL;
|
||||
break;
|
||||
|
||||
default:
|
||||
memcpy(require_map, map, FULL_MAPSIZE * sizeof(uint32_t));
|
||||
require_state = SCRIPT_MAP;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/* The first significant character was Han. An inspection of the Unicode
|
||||
11.0.0 files shows that there are the following types of Script Extension
|
||||
list that involve the Han, Bopomofo, Hiragana, Katakana, and Hangul
|
||||
scripts:
|
||||
|
||||
. Bopomofo + Han
|
||||
. Han + Hiragana + Katakana
|
||||
. Hiragana + Katakana
|
||||
. Bopopmofo + Hangul + Han + Hiragana + Katakana
|
||||
|
||||
The following code tries to make sense of this. */
|
||||
|
||||
#define FOUND_BOPOMOFO 1
|
||||
#define FOUND_HIRAGANA 2
|
||||
#define FOUND_KATAKANA 4
|
||||
#define FOUND_HANGUL 8
|
||||
|
||||
case SCRIPT_HANPENDING:
|
||||
if (script != ucp_Han) /* Another Han does nothing */
|
||||
{
|
||||
uint32_t chspecial = 0;
|
||||
|
||||
if (MAPBIT(map, ucp_Bopomofo) != 0) chspecial |= FOUND_BOPOMOFO;
|
||||
if (MAPBIT(map, ucp_Hiragana) != 0) chspecial |= FOUND_HIRAGANA;
|
||||
if (MAPBIT(map, ucp_Katakana) != 0) chspecial |= FOUND_KATAKANA;
|
||||
if (MAPBIT(map, ucp_Hangul) != 0) chspecial |= FOUND_HANGUL;
|
||||
|
||||
if (chspecial == 0) return FALSE; /* Not allowed with Han */
|
||||
|
||||
if (chspecial == FOUND_BOPOMOFO)
|
||||
require_state = SCRIPT_HANBOPOMOFO;
|
||||
else if (chspecial == (FOUND_HIRAGANA|FOUND_KATAKANA))
|
||||
require_state = SCRIPT_HANHIRAKATA;
|
||||
|
||||
/* Otherwise this character must be allowed with all of them, so remain
|
||||
in the pending state. */
|
||||
}
|
||||
break;
|
||||
|
||||
/* Previously encountered one of the "with Han" scripts. Check that
|
||||
this character is appropriate. */
|
||||
|
||||
case SCRIPT_HANHIRAKATA:
|
||||
if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Hiragana) +
|
||||
MAPBIT(map, ucp_Katakana) == 0) return FALSE;
|
||||
break;
|
||||
|
||||
case SCRIPT_HANBOPOMOFO:
|
||||
if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Bopomofo) == 0) return FALSE;
|
||||
break;
|
||||
|
||||
case SCRIPT_HANHANGUL:
|
||||
if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Hangul) == 0) return FALSE;
|
||||
break;
|
||||
|
||||
/* Previously encountered one or more characters that are allowed with a
|
||||
list of scripts. */
|
||||
|
||||
case SCRIPT_MAP:
|
||||
OK = FALSE;
|
||||
|
||||
for (int i = 0; i < FULL_MAPSIZE; i++)
|
||||
{
|
||||
if ((require_map[i] & map[i]) != 0)
|
||||
{
|
||||
OK = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!OK) return FALSE;
|
||||
|
||||
/* The rest of the string must be in this script, but we have to
|
||||
allow for the Han complications. */
|
||||
|
||||
switch(script)
|
||||
{
|
||||
case ucp_Han:
|
||||
require_state = SCRIPT_HANPENDING;
|
||||
break;
|
||||
|
||||
case ucp_Hiragana:
|
||||
case ucp_Katakana:
|
||||
require_state = SCRIPT_HANHIRAKATA;
|
||||
break;
|
||||
|
||||
case ucp_Bopomofo:
|
||||
require_state = SCRIPT_HANBOPOMOFO;
|
||||
break;
|
||||
|
||||
case ucp_Hangul:
|
||||
require_state = SCRIPT_HANHANGUL;
|
||||
break;
|
||||
|
||||
/* Compute the intersection of the required list of scripts and the
|
||||
allowed scripts for this character. */
|
||||
|
||||
default:
|
||||
for (int i = 0; i < FULL_MAPSIZE; i++) require_map[i] &= map[i];
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
} /* End checking character's script and extensions. */
|
||||
|
||||
/* The character is in an acceptable script. We must now ensure that all
|
||||
decimal digits in the string come from the same set. Some scripts (e.g.
|
||||
Common, Arabic) have more than one set of decimal digits. This code does
|
||||
not allow mixing sets, even within the same script. The vector called
|
||||
PRIV(ucd_digit_sets)[] contains, in its first element, the number of
|
||||
following elements, and then, in ascending order, the code points of the
|
||||
'9' characters in every set of 10 digits. Each set is identified by the
|
||||
offset in the vector of its '9' character. An initial check of the first
|
||||
value picks up ASCII digits quickly. Otherwise, a binary chop is used. */
|
||||
|
||||
if (ucd->chartype == ucp_Nd)
|
||||
{
|
||||
uint32_t digitset;
|
||||
|
||||
if (c <= PRIV(ucd_digit_sets)[1]) digitset = 1; else
|
||||
{
|
||||
int mid;
|
||||
int bot = 1;
|
||||
int top = PRIV(ucd_digit_sets)[0];
|
||||
for (;;)
|
||||
{
|
||||
if (top <= bot + 1) /* <= rather than == is paranoia */
|
||||
{
|
||||
digitset = top;
|
||||
break;
|
||||
}
|
||||
mid = (top + bot) / 2;
|
||||
if (c <= PRIV(ucd_digit_sets)[mid]) top = mid; else bot = mid;
|
||||
}
|
||||
}
|
||||
|
||||
/* A required value of 0 means "unset". */
|
||||
|
||||
if (require_digitset == 0) require_digitset = digitset;
|
||||
else if (digitset != require_digitset) return FALSE;
|
||||
} /* End digit handling */
|
||||
|
||||
/* If we haven't yet got to the end, pick up the next character. */
|
||||
|
||||
if (ptr >= endptr) return TRUE;
|
||||
GETCHARINCTEST(c, ptr);
|
||||
} /* End checking loop */
|
||||
|
||||
#else /* NOT SUPPORT_UNICODE */
|
||||
(void)ptr;
|
||||
(void)endptr;
|
||||
(void)utf;
|
||||
return TRUE;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
|
||||
/* End of pcre2_script_run.c */
|
||||
@@ -1,286 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains functions for serializing and deserializing
|
||||
a sequence of compiled codes. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
/* Magic number to provide a small check against being handed junk. */
|
||||
|
||||
#define SERIALIZED_DATA_MAGIC 0x50523253u
|
||||
|
||||
/* Deserialization is limited to the current PCRE version and
|
||||
character width. */
|
||||
|
||||
#define SERIALIZED_DATA_VERSION \
|
||||
((PCRE2_MAJOR) | ((PCRE2_MINOR) << 16))
|
||||
|
||||
#define SERIALIZED_DATA_CONFIG \
|
||||
(sizeof(PCRE2_UCHAR) | ((sizeof(void*)) << 8) | ((sizeof(PCRE2_SIZE)) << 16))
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Serialize compiled patterns *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
|
||||
pcre2_serialize_encode(const pcre2_code **codes, int32_t number_of_codes,
|
||||
uint8_t **serialized_bytes, PCRE2_SIZE *serialized_size,
|
||||
pcre2_general_context *gcontext)
|
||||
{
|
||||
uint8_t *bytes;
|
||||
uint8_t *dst_bytes;
|
||||
int32_t i;
|
||||
PCRE2_SIZE total_size;
|
||||
const pcre2_real_code *re;
|
||||
const uint8_t *tables;
|
||||
pcre2_serialized_data *data;
|
||||
|
||||
const pcre2_memctl *memctl = (gcontext != NULL) ?
|
||||
&gcontext->memctl : &PRIV(default_compile_context).memctl;
|
||||
|
||||
if (codes == NULL || serialized_bytes == NULL || serialized_size == NULL)
|
||||
return PCRE2_ERROR_NULL;
|
||||
|
||||
if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
|
||||
|
||||
/* Compute total size. */
|
||||
total_size = sizeof(pcre2_serialized_data) + TABLES_LENGTH;
|
||||
tables = NULL;
|
||||
|
||||
for (i = 0; i < number_of_codes; i++)
|
||||
{
|
||||
if (codes[i] == NULL) return PCRE2_ERROR_NULL;
|
||||
re = (const pcre2_real_code *)(codes[i]);
|
||||
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||
if (tables == NULL)
|
||||
tables = re->tables;
|
||||
else if (tables != re->tables)
|
||||
return PCRE2_ERROR_MIXEDTABLES;
|
||||
total_size += re->blocksize;
|
||||
}
|
||||
|
||||
/* Initialize the byte stream. */
|
||||
bytes = memctl->malloc(total_size + sizeof(pcre2_memctl), memctl->memory_data);
|
||||
if (bytes == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
/* The controller is stored as a hidden parameter. */
|
||||
memcpy(bytes, memctl, sizeof(pcre2_memctl));
|
||||
bytes += sizeof(pcre2_memctl);
|
||||
|
||||
data = (pcre2_serialized_data *)bytes;
|
||||
data->magic = SERIALIZED_DATA_MAGIC;
|
||||
data->version = SERIALIZED_DATA_VERSION;
|
||||
data->config = SERIALIZED_DATA_CONFIG;
|
||||
data->number_of_codes = number_of_codes;
|
||||
|
||||
/* Copy all compiled code data. */
|
||||
dst_bytes = bytes + sizeof(pcre2_serialized_data);
|
||||
memcpy(dst_bytes, tables, TABLES_LENGTH);
|
||||
dst_bytes += TABLES_LENGTH;
|
||||
|
||||
for (i = 0; i < number_of_codes; i++)
|
||||
{
|
||||
re = (const pcre2_real_code *)(codes[i]);
|
||||
(void)memcpy(dst_bytes, (const char *)re, re->blocksize);
|
||||
|
||||
/* Certain fields in the compiled code block are re-set during
|
||||
deserialization. In order to ensure that the serialized data stream is always
|
||||
the same for the same pattern, set them to zero here. We can't assume the
|
||||
copy of the pattern is correctly aligned for accessing the fields as part of
|
||||
a structure. Note the use of sizeof(void *) in the second of these, to
|
||||
specify the size of a pointer. If sizeof(uint8_t *) is used (tables is a
|
||||
pointer to uint8_t), gcc gives a warning because the first argument is also a
|
||||
pointer to uint8_t. Casting the first argument to (void *) can stop this, but
|
||||
it didn't stop Coverity giving the same complaint. */
|
||||
|
||||
(void)memset(dst_bytes + offsetof(pcre2_real_code, memctl), 0,
|
||||
sizeof(pcre2_memctl));
|
||||
(void)memset(dst_bytes + offsetof(pcre2_real_code, tables), 0,
|
||||
sizeof(void *));
|
||||
(void)memset(dst_bytes + offsetof(pcre2_real_code, executable_jit), 0,
|
||||
sizeof(void *));
|
||||
|
||||
dst_bytes += re->blocksize;
|
||||
}
|
||||
|
||||
*serialized_bytes = bytes;
|
||||
*serialized_size = total_size;
|
||||
return number_of_codes;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Deserialize compiled patterns *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
|
||||
pcre2_serialize_decode(pcre2_code **codes, int32_t number_of_codes,
|
||||
const uint8_t *bytes, pcre2_general_context *gcontext)
|
||||
{
|
||||
const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes;
|
||||
const pcre2_memctl *memctl = (gcontext != NULL) ?
|
||||
&gcontext->memctl : &PRIV(default_compile_context).memctl;
|
||||
|
||||
const uint8_t *src_bytes;
|
||||
pcre2_real_code *dst_re;
|
||||
uint8_t *tables;
|
||||
int32_t i, j;
|
||||
|
||||
/* Sanity checks. */
|
||||
|
||||
if (data == NULL || codes == NULL) return PCRE2_ERROR_NULL;
|
||||
if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
|
||||
if (data->number_of_codes <= 0) return PCRE2_ERROR_BADSERIALIZEDDATA;
|
||||
if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC;
|
||||
if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE;
|
||||
if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE;
|
||||
|
||||
if (number_of_codes > data->number_of_codes)
|
||||
number_of_codes = data->number_of_codes;
|
||||
|
||||
src_bytes = bytes + sizeof(pcre2_serialized_data);
|
||||
|
||||
/* Decode tables. The reference count for the tables is stored immediately
|
||||
following them. */
|
||||
|
||||
tables = memctl->malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE), memctl->memory_data);
|
||||
if (tables == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
memcpy(tables, src_bytes, TABLES_LENGTH);
|
||||
*(PCRE2_SIZE *)(tables + TABLES_LENGTH) = number_of_codes;
|
||||
src_bytes += TABLES_LENGTH;
|
||||
|
||||
/* Decode the byte stream. We must not try to read the size from the compiled
|
||||
code block in the stream, because it might be unaligned, which causes errors on
|
||||
hardware such as Sparc-64 that doesn't like unaligned memory accesses. The type
|
||||
of the blocksize field is given its own name to ensure that it is the same here
|
||||
as in the block. */
|
||||
|
||||
for (i = 0; i < number_of_codes; i++)
|
||||
{
|
||||
CODE_BLOCKSIZE_TYPE blocksize;
|
||||
memcpy(&blocksize, src_bytes + offsetof(pcre2_real_code, blocksize),
|
||||
sizeof(CODE_BLOCKSIZE_TYPE));
|
||||
if (blocksize <= sizeof(pcre2_real_code))
|
||||
return PCRE2_ERROR_BADSERIALIZEDDATA;
|
||||
|
||||
/* The allocator provided by gcontext replaces the original one. */
|
||||
|
||||
dst_re = (pcre2_real_code *)PRIV(memctl_malloc)(blocksize,
|
||||
(pcre2_memctl *)gcontext);
|
||||
if (dst_re == NULL)
|
||||
{
|
||||
memctl->free(tables, memctl->memory_data);
|
||||
for (j = 0; j < i; j++)
|
||||
{
|
||||
memctl->free(codes[j], memctl->memory_data);
|
||||
codes[j] = NULL;
|
||||
}
|
||||
return PCRE2_ERROR_NOMEMORY;
|
||||
}
|
||||
|
||||
/* The new allocator must be preserved. */
|
||||
|
||||
memcpy(((uint8_t *)dst_re) + sizeof(pcre2_memctl),
|
||||
src_bytes + sizeof(pcre2_memctl), blocksize - sizeof(pcre2_memctl));
|
||||
if (dst_re->magic_number != MAGIC_NUMBER ||
|
||||
dst_re->name_entry_size > MAX_NAME_SIZE + IMM2_SIZE + 1 ||
|
||||
dst_re->name_count > MAX_NAME_COUNT)
|
||||
{
|
||||
memctl->free(dst_re, memctl->memory_data);
|
||||
return PCRE2_ERROR_BADSERIALIZEDDATA;
|
||||
}
|
||||
|
||||
/* At the moment only one table is supported. */
|
||||
|
||||
dst_re->tables = tables;
|
||||
dst_re->executable_jit = NULL;
|
||||
dst_re->flags |= PCRE2_DEREF_TABLES;
|
||||
|
||||
codes[i] = dst_re;
|
||||
src_bytes += blocksize;
|
||||
}
|
||||
|
||||
return number_of_codes;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get the number of serialized patterns *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
|
||||
pcre2_serialize_get_number_of_codes(const uint8_t *bytes)
|
||||
{
|
||||
const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes;
|
||||
|
||||
if (data == NULL) return PCRE2_ERROR_NULL;
|
||||
if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC;
|
||||
if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE;
|
||||
if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE;
|
||||
|
||||
return data->number_of_codes;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free the allocated stream *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_serialize_free(uint8_t *bytes)
|
||||
{
|
||||
if (bytes != NULL)
|
||||
{
|
||||
pcre2_memctl *memctl = (pcre2_memctl *)(bytes - sizeof(pcre2_memctl));
|
||||
memctl->free(memctl, memctl->memory_data);
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_serialize.c */
|
||||
@@ -1,237 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2018-2021 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains internal functions for comparing and finding the length
|
||||
of strings. These are used instead of strcmp() etc because the standard
|
||||
functions work only on 8-bit data. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Emulated memmove() for systems without it *
|
||||
*************************************************/
|
||||
|
||||
/* This function can make use of bcopy() if it is available. Otherwise do it by
|
||||
steam, as there some non-Unix environments that lack both memmove() and
|
||||
bcopy(). */
|
||||
|
||||
#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
|
||||
void *
|
||||
PRIV(memmove)(void *d, const void *s, size_t n)
|
||||
{
|
||||
#ifdef HAVE_BCOPY
|
||||
bcopy(s, d, n);
|
||||
return d;
|
||||
#else
|
||||
size_t i;
|
||||
unsigned char *dest = (unsigned char *)d;
|
||||
const unsigned char *src = (const unsigned char *)s;
|
||||
if (dest > src)
|
||||
{
|
||||
dest += n;
|
||||
src += n;
|
||||
for (i = 0; i < n; ++i) *(--dest) = *(--src);
|
||||
return (void *)dest;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < n; ++i) *dest++ = *src++;
|
||||
return (void *)(dest - n);
|
||||
}
|
||||
#endif /* not HAVE_BCOPY */
|
||||
}
|
||||
#endif /* not VPCOMPAT && not HAVE_MEMMOVE */
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare two zero-terminated PCRE2 strings *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strcmp)(PCRE2_SPTR str1, PCRE2_SPTR str2)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
while (*str1 != '\0' || *str2 != '\0')
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare zero-terminated PCRE2 & 8-bit strings *
|
||||
*************************************************/
|
||||
|
||||
/* As the 8-bit string is almost always a literal, its type is specified as
|
||||
const char *.
|
||||
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strcmp_c8)(PCRE2_SPTR str1, const char *str2)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
while (*str1 != '\0' || *str2 != '\0')
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare two PCRE2 strings, given a length *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
len the length
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strncmp)(PCRE2_SPTR str1, PCRE2_SPTR str2, size_t len)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
for (; len > 0; len--)
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare PCRE2 string to 8-bit string by length *
|
||||
*************************************************/
|
||||
|
||||
/* As the 8-bit string is almost always a literal, its type is specified as
|
||||
const char *.
|
||||
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
len the length
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strncmp_c8)(PCRE2_SPTR str1, const char *str2, size_t len)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
for (; len > 0; len--)
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find the length of a PCRE2 string *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Argument: the string
|
||||
Returns: the length
|
||||
*/
|
||||
|
||||
PCRE2_SIZE
|
||||
PRIV(strlen)(PCRE2_SPTR str)
|
||||
{
|
||||
PCRE2_SIZE c = 0;
|
||||
while (*str++ != 0) c++;
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy 8-bit 0-terminated string to PCRE2 string *
|
||||
*************************************************/
|
||||
|
||||
/* Arguments:
|
||||
str1 buffer to receive the string
|
||||
str2 8-bit string to be copied
|
||||
|
||||
Returns: the number of code units used (excluding trailing zero)
|
||||
*/
|
||||
|
||||
PCRE2_SIZE
|
||||
PRIV(strcpy_c8)(PCRE2_UCHAR *str1, const char *str2)
|
||||
{
|
||||
PCRE2_UCHAR *t = str1;
|
||||
while (*str2 != 0) *t++ = *str2++;
|
||||
*t = 0;
|
||||
return t - str1;
|
||||
}
|
||||
|
||||
/* End of pcre2_string_utils.c */
|
||||
@@ -1,2069 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains functions for scanning a compiled pattern and
|
||||
collecting data (e.g. minimum matching length). */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
/* The maximum remembered capturing brackets minimum. */
|
||||
|
||||
#define MAX_CACHE_BACKREF 128
|
||||
|
||||
/* Set a bit in the starting code unit bit map. */
|
||||
|
||||
#define SET_BIT(c) re->start_bitmap[(c)/8] |= (1u << ((c)&7))
|
||||
|
||||
/* Returns from set_start_bits() */
|
||||
|
||||
enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN, SSB_TOODEEP };
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find the minimum subject length for a group *
|
||||
*************************************************/
|
||||
|
||||
/* Scan a parenthesized group and compute the minimum length of subject that
|
||||
is needed to match it. This is a lower bound; it does not mean there is a
|
||||
string of that length that matches. In UTF mode, the result is in characters
|
||||
rather than code units. The field in a compiled pattern for storing the minimum
|
||||
length is 16-bits long (on the grounds that anything longer than that is
|
||||
pathological), so we give up when we reach that amount. This also means that
|
||||
integer overflow for really crazy patterns cannot happen.
|
||||
|
||||
Backreference minimum lengths are cached to speed up multiple references. This
|
||||
function is called only when the highest back reference in the pattern is less
|
||||
than or equal to MAX_CACHE_BACKREF, which is one less than the size of the
|
||||
caching vector. The zeroth element contains the number of the highest set
|
||||
value.
|
||||
|
||||
Arguments:
|
||||
re compiled pattern block
|
||||
code pointer to start of group (the bracket)
|
||||
startcode pointer to start of the whole pattern's code
|
||||
utf UTF flag
|
||||
recurses chain of recurse_check to catch mutual recursion
|
||||
countptr pointer to call count (to catch over complexity)
|
||||
backref_cache vector for caching back references.
|
||||
|
||||
This function is no longer called when the pattern contains (*ACCEPT); however,
|
||||
the old code for returning -1 is retained, just in case.
|
||||
|
||||
Returns: the minimum length
|
||||
-1 \C in UTF-8 mode
|
||||
or (*ACCEPT)
|
||||
or pattern too complicated
|
||||
-2 internal error (missing capturing bracket)
|
||||
-3 internal error (opcode not listed)
|
||||
*/
|
||||
|
||||
static int
|
||||
find_minlength(const pcre2_real_code *re, PCRE2_SPTR code,
|
||||
PCRE2_SPTR startcode, BOOL utf, recurse_check *recurses, int *countptr,
|
||||
int *backref_cache)
|
||||
{
|
||||
int length = -1;
|
||||
int branchlength = 0;
|
||||
int prev_cap_recno = -1;
|
||||
int prev_cap_d = 0;
|
||||
int prev_recurse_recno = -1;
|
||||
int prev_recurse_d = 0;
|
||||
uint32_t once_fudge = 0;
|
||||
BOOL had_recurse = FALSE;
|
||||
BOOL dupcapused = (re->flags & PCRE2_DUPCAPUSED) != 0;
|
||||
PCRE2_SPTR nextbranch = code + GET(code, 1);
|
||||
PCRE2_SPTR cc = code + 1 + LINK_SIZE;
|
||||
recurse_check this_recurse;
|
||||
|
||||
/* If this is a "could be empty" group, its minimum length is 0. */
|
||||
|
||||
if (*code >= OP_SBRA && *code <= OP_SCOND) return 0;
|
||||
|
||||
/* Skip over capturing bracket number */
|
||||
|
||||
if (*code == OP_CBRA || *code == OP_CBRAPOS) cc += IMM2_SIZE;
|
||||
|
||||
/* A large and/or complex regex can take too long to process. */
|
||||
|
||||
if ((*countptr)++ > 1000) return -1;
|
||||
|
||||
/* Scan along the opcodes for this branch. If we get to the end of the branch,
|
||||
check the length against that of the other branches. If the accumulated length
|
||||
passes 16-bits, reset to that value and skip the rest of the branch. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
int d, min, recno;
|
||||
PCRE2_UCHAR op;
|
||||
PCRE2_SPTR cs, ce;
|
||||
|
||||
if (branchlength >= UINT16_MAX)
|
||||
{
|
||||
branchlength = UINT16_MAX;
|
||||
cc = nextbranch;
|
||||
}
|
||||
|
||||
op = *cc;
|
||||
switch (op)
|
||||
{
|
||||
case OP_COND:
|
||||
case OP_SCOND:
|
||||
|
||||
/* If there is only one branch in a condition, the implied branch has zero
|
||||
length, so we don't add anything. This covers the DEFINE "condition"
|
||||
automatically. If there are two branches we can treat it the same as any
|
||||
other non-capturing subpattern. */
|
||||
|
||||
cs = cc + GET(cc, 1);
|
||||
if (*cs != OP_ALT)
|
||||
{
|
||||
cc = cs + 1 + LINK_SIZE;
|
||||
break;
|
||||
}
|
||||
goto PROCESS_NON_CAPTURE;
|
||||
|
||||
case OP_BRA:
|
||||
/* There's a special case of OP_BRA, when it is wrapped round a repeated
|
||||
OP_RECURSE. We'd like to process the latter at this level so that
|
||||
remembering the value works for repeated cases. So we do nothing, but
|
||||
set a fudge value to skip over the OP_KET after the recurse. */
|
||||
|
||||
if (cc[1+LINK_SIZE] == OP_RECURSE && cc[2*(1+LINK_SIZE)] == OP_KET)
|
||||
{
|
||||
once_fudge = 1 + LINK_SIZE;
|
||||
cc += 1 + LINK_SIZE;
|
||||
break;
|
||||
}
|
||||
/* Fall through */
|
||||
|
||||
case OP_ONCE:
|
||||
case OP_SCRIPT_RUN:
|
||||
case OP_SBRA:
|
||||
case OP_BRAPOS:
|
||||
case OP_SBRAPOS:
|
||||
PROCESS_NON_CAPTURE:
|
||||
d = find_minlength(re, cc, startcode, utf, recurses, countptr,
|
||||
backref_cache);
|
||||
if (d < 0) return d;
|
||||
branchlength += d;
|
||||
do cc += GET(cc, 1); while (*cc == OP_ALT);
|
||||
cc += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* To save time for repeated capturing subpatterns, we remember the
|
||||
length of the previous one. Unfortunately we can't do the same for
|
||||
the unnumbered ones above. Nor can we do this if (?| is present in the
|
||||
pattern because captures with the same number are not then identical. */
|
||||
|
||||
case OP_CBRA:
|
||||
case OP_SCBRA:
|
||||
case OP_CBRAPOS:
|
||||
case OP_SCBRAPOS:
|
||||
recno = (int)GET2(cc, 1+LINK_SIZE);
|
||||
if (dupcapused || recno != prev_cap_recno)
|
||||
{
|
||||
prev_cap_recno = recno;
|
||||
prev_cap_d = find_minlength(re, cc, startcode, utf, recurses, countptr,
|
||||
backref_cache);
|
||||
if (prev_cap_d < 0) return prev_cap_d;
|
||||
}
|
||||
branchlength += prev_cap_d;
|
||||
do cc += GET(cc, 1); while (*cc == OP_ALT);
|
||||
cc += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* ACCEPT makes things far too complicated; we have to give up. In fact,
|
||||
from 10.34 onwards, if a pattern contains (*ACCEPT), this function is not
|
||||
used. However, leave the code in place, just in case. */
|
||||
|
||||
case OP_ACCEPT:
|
||||
case OP_ASSERT_ACCEPT:
|
||||
return -1;
|
||||
|
||||
/* Reached end of a branch; if it's a ket it is the end of a nested
|
||||
call. If it's ALT it is an alternation in a nested call. If it is END it's
|
||||
the end of the outer call. All can be handled by the same code. If the
|
||||
length of any branch is zero, there is no need to scan any subsequent
|
||||
branches. */
|
||||
|
||||
case OP_ALT:
|
||||
case OP_KET:
|
||||
case OP_KETRMAX:
|
||||
case OP_KETRMIN:
|
||||
case OP_KETRPOS:
|
||||
case OP_END:
|
||||
if (length < 0 || (!had_recurse && branchlength < length))
|
||||
length = branchlength;
|
||||
if (op != OP_ALT || length == 0) return length;
|
||||
nextbranch = cc + GET(cc, 1);
|
||||
cc += 1 + LINK_SIZE;
|
||||
branchlength = 0;
|
||||
had_recurse = FALSE;
|
||||
break;
|
||||
|
||||
/* Skip over assertive subpatterns */
|
||||
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ASSERT_NA:
|
||||
case OP_ASSERT_SCS:
|
||||
case OP_ASSERTBACK_NA:
|
||||
do cc += GET(cc, 1); while (*cc == OP_ALT);
|
||||
/* Fall through */
|
||||
|
||||
/* Skip over things that don't match chars */
|
||||
|
||||
case OP_REVERSE:
|
||||
case OP_VREVERSE:
|
||||
case OP_CREF:
|
||||
case OP_DNCREF:
|
||||
case OP_RREF:
|
||||
case OP_DNRREF:
|
||||
case OP_FALSE:
|
||||
case OP_TRUE:
|
||||
case OP_CALLOUT:
|
||||
case OP_SOD:
|
||||
case OP_SOM:
|
||||
case OP_EOD:
|
||||
case OP_EODN:
|
||||
case OP_CIRC:
|
||||
case OP_CIRCM:
|
||||
case OP_DOLL:
|
||||
case OP_DOLLM:
|
||||
case OP_NOT_WORD_BOUNDARY:
|
||||
case OP_WORD_BOUNDARY:
|
||||
case OP_NOT_UCP_WORD_BOUNDARY:
|
||||
case OP_UCP_WORD_BOUNDARY:
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
cc += GET(cc, 1 + 2*LINK_SIZE);
|
||||
break;
|
||||
|
||||
/* Skip over a subpattern that has a {0} or {0,x} quantifier */
|
||||
|
||||
case OP_BRAZERO:
|
||||
case OP_BRAMINZERO:
|
||||
case OP_BRAPOSZERO:
|
||||
case OP_SKIPZERO:
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
do cc += GET(cc, 1); while (*cc == OP_ALT);
|
||||
cc += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Handle literal characters and + repetitions */
|
||||
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
case OP_NOT:
|
||||
case OP_NOTI:
|
||||
case OP_PLUS:
|
||||
case OP_PLUSI:
|
||||
case OP_MINPLUS:
|
||||
case OP_MINPLUSI:
|
||||
case OP_POSPLUS:
|
||||
case OP_POSPLUSI:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTPOSPLUSI:
|
||||
branchlength++;
|
||||
cc += 2;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEPOSPLUS:
|
||||
branchlength++;
|
||||
cc += (cc[1] == OP_PROP || cc[1] == OP_NOTPROP)? 4 : 2;
|
||||
break;
|
||||
|
||||
/* Handle exact repetitions. The count is already in characters, but we
|
||||
may need to skip over a multibyte character in UTF mode. */
|
||||
|
||||
case OP_EXACT:
|
||||
case OP_EXACTI:
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTEXACTI:
|
||||
branchlength += GET2(cc,1);
|
||||
cc += 2 + IMM2_SIZE;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case OP_TYPEEXACT:
|
||||
branchlength += GET2(cc,1);
|
||||
cc += 2 + IMM2_SIZE + ((cc[1 + IMM2_SIZE] == OP_PROP
|
||||
|| cc[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);
|
||||
break;
|
||||
|
||||
/* Handle single-char non-literal matchers */
|
||||
|
||||
case OP_PROP:
|
||||
case OP_NOTPROP:
|
||||
cc += 2;
|
||||
/* Fall through */
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
case OP_DIGIT:
|
||||
case OP_NOT_WHITESPACE:
|
||||
case OP_WHITESPACE:
|
||||
case OP_NOT_WORDCHAR:
|
||||
case OP_WORDCHAR:
|
||||
case OP_ANY:
|
||||
case OP_ALLANY:
|
||||
case OP_EXTUNI:
|
||||
case OP_HSPACE:
|
||||
case OP_NOT_HSPACE:
|
||||
case OP_VSPACE:
|
||||
case OP_NOT_VSPACE:
|
||||
branchlength++;
|
||||
cc++;
|
||||
break;
|
||||
|
||||
/* "Any newline" might match two characters, but it also might match just
|
||||
one. */
|
||||
|
||||
case OP_ANYNL:
|
||||
branchlength += 1;
|
||||
cc++;
|
||||
break;
|
||||
|
||||
/* The single-byte matcher means we can't proceed in UTF mode. (In
|
||||
non-UTF mode \C will actually be turned into OP_ALLANY, so won't ever
|
||||
appear, but leave the code, just in case.) */
|
||||
|
||||
case OP_ANYBYTE:
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) return -1;
|
||||
#endif
|
||||
branchlength++;
|
||||
cc++;
|
||||
break;
|
||||
|
||||
/* For repeated character types, we have to test for \p and \P, which have
|
||||
an extra two bytes of parameters. */
|
||||
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPOSQUERY:
|
||||
if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2;
|
||||
cc += PRIV(OP_lengths)[op];
|
||||
break;
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEPOSUPTO:
|
||||
if (cc[1 + IMM2_SIZE] == OP_PROP
|
||||
|| cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2;
|
||||
cc += PRIV(OP_lengths)[op];
|
||||
break;
|
||||
|
||||
/* Check a class for variable quantification */
|
||||
|
||||
case OP_CLASS:
|
||||
case OP_NCLASS:
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_XCLASS:
|
||||
case OP_ECLASS:
|
||||
/* The original code caused an unsigned overflow in 64 bit systems,
|
||||
so now we use a conditional statement. */
|
||||
if (op == OP_XCLASS || op == OP_ECLASS)
|
||||
cc += GET(cc, 1);
|
||||
else
|
||||
#endif
|
||||
cc += PRIV(OP_lengths)[OP_CLASS];
|
||||
|
||||
switch (*cc)
|
||||
{
|
||||
case OP_CRPLUS:
|
||||
case OP_CRMINPLUS:
|
||||
case OP_CRPOSPLUS:
|
||||
branchlength++;
|
||||
/* Fall through */
|
||||
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
case OP_CRPOSSTAR:
|
||||
case OP_CRPOSQUERY:
|
||||
cc++;
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
case OP_CRPOSRANGE:
|
||||
branchlength += GET2(cc,1);
|
||||
cc += 1 + 2 * IMM2_SIZE;
|
||||
break;
|
||||
|
||||
default:
|
||||
branchlength++;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/* Backreferences and subroutine calls (OP_RECURSE) are treated in the same
|
||||
way: we find the minimum length for the subpattern. A recursion
|
||||
(backreference or subroutine) causes an a flag to be set that causes the
|
||||
length of this branch to be ignored. The logic is that a recursion can only
|
||||
make sense if there is another alternative that stops the recursing. That
|
||||
will provide the minimum length (when no recursion happens).
|
||||
|
||||
If PCRE2_MATCH_UNSET_BACKREF is set, a backreference to an unset bracket
|
||||
matches an empty string (by default it causes a matching failure), so in
|
||||
that case we must set the minimum length to zero.
|
||||
|
||||
For backreferenes, if duplicate numbers are present in the pattern we check
|
||||
for a reference to a duplicate. If it is, we don't know which version will
|
||||
be referenced, so we have to set the minimum length to zero. */
|
||||
|
||||
/* Duplicate named pattern back reference. */
|
||||
|
||||
case OP_DNREF:
|
||||
case OP_DNREFI:
|
||||
if (!dupcapused && (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
|
||||
{
|
||||
int count = GET2(cc, 1+IMM2_SIZE);
|
||||
PCRE2_SPTR slot =
|
||||
(PCRE2_SPTR)((const uint8_t *)re + sizeof(pcre2_real_code)) +
|
||||
GET2(cc, 1) * re->name_entry_size;
|
||||
|
||||
d = INT_MAX;
|
||||
|
||||
/* Scan all groups with the same name; find the shortest. */
|
||||
|
||||
while (count-- > 0)
|
||||
{
|
||||
int dd, i;
|
||||
recno = GET2(slot, 0);
|
||||
|
||||
if (recno <= backref_cache[0] && backref_cache[recno] >= 0)
|
||||
dd = backref_cache[recno];
|
||||
else
|
||||
{
|
||||
ce = cs = PRIV(find_bracket)(startcode, utf, recno);
|
||||
if (cs == NULL) return -2;
|
||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||
|
||||
dd = 0;
|
||||
if (!dupcapused || PRIV(find_bracket)(ce, utf, recno) == NULL)
|
||||
{
|
||||
if (cc > cs && cc < ce) /* Simple recursion */
|
||||
{
|
||||
had_recurse = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
recurse_check *r = recurses;
|
||||
for (r = recurses; r != NULL; r = r->prev)
|
||||
if (r->group == cs) break;
|
||||
if (r != NULL) /* Mutual recursion */
|
||||
{
|
||||
had_recurse = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
this_recurse.prev = recurses; /* No recursion */
|
||||
this_recurse.group = cs;
|
||||
dd = find_minlength(re, cs, startcode, utf, &this_recurse,
|
||||
countptr, backref_cache);
|
||||
if (dd < 0) return dd;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
backref_cache[recno] = dd;
|
||||
for (i = backref_cache[0] + 1; i < recno; i++) backref_cache[i] = -1;
|
||||
backref_cache[0] = recno;
|
||||
}
|
||||
|
||||
if (dd < d) d = dd;
|
||||
if (d <= 0) break; /* No point looking at any more */
|
||||
slot += re->name_entry_size;
|
||||
}
|
||||
}
|
||||
else d = 0;
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
goto REPEAT_BACK_REFERENCE;
|
||||
|
||||
/* Single back reference by number. References by name are converted to by
|
||||
number when there is no duplication. */
|
||||
|
||||
case OP_REF:
|
||||
case OP_REFI:
|
||||
recno = GET2(cc, 1);
|
||||
if (recno <= backref_cache[0] && backref_cache[recno] >= 0)
|
||||
d = backref_cache[recno];
|
||||
else
|
||||
{
|
||||
int i;
|
||||
d = 0;
|
||||
|
||||
if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
|
||||
{
|
||||
ce = cs = PRIV(find_bracket)(startcode, utf, recno);
|
||||
if (cs == NULL) return -2;
|
||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||
|
||||
if (!dupcapused || PRIV(find_bracket)(ce, utf, recno) == NULL)
|
||||
{
|
||||
if (cc > cs && cc < ce) /* Simple recursion */
|
||||
{
|
||||
had_recurse = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
recurse_check *r = recurses;
|
||||
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
|
||||
if (r != NULL) /* Mutual recursion */
|
||||
{
|
||||
had_recurse = TRUE;
|
||||
}
|
||||
else /* No recursion */
|
||||
{
|
||||
this_recurse.prev = recurses;
|
||||
this_recurse.group = cs;
|
||||
d = find_minlength(re, cs, startcode, utf, &this_recurse, countptr,
|
||||
backref_cache);
|
||||
if (d < 0) return d;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
backref_cache[recno] = d;
|
||||
for (i = backref_cache[0] + 1; i < recno; i++) backref_cache[i] = -1;
|
||||
backref_cache[0] = recno;
|
||||
}
|
||||
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
|
||||
/* Handle repeated back references */
|
||||
|
||||
REPEAT_BACK_REFERENCE:
|
||||
switch (*cc)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
case OP_CRPOSSTAR:
|
||||
case OP_CRPOSQUERY:
|
||||
min = 0;
|
||||
cc++;
|
||||
break;
|
||||
|
||||
case OP_CRPLUS:
|
||||
case OP_CRMINPLUS:
|
||||
case OP_CRPOSPLUS:
|
||||
min = 1;
|
||||
cc++;
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
case OP_CRPOSRANGE:
|
||||
min = GET2(cc, 1);
|
||||
cc += 1 + 2 * IMM2_SIZE;
|
||||
break;
|
||||
|
||||
default:
|
||||
min = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Take care not to overflow: (1) min and d are ints, so check that their
|
||||
product is not greater than INT_MAX. (2) branchlength is limited to
|
||||
UINT16_MAX (checked at the top of the loop). */
|
||||
|
||||
if ((d > 0 && (INT_MAX/d) < min) || UINT16_MAX - branchlength < min*d)
|
||||
branchlength = UINT16_MAX;
|
||||
else branchlength += min * d;
|
||||
break;
|
||||
|
||||
/* Recursion always refers to the first occurrence of a subpattern with a
|
||||
given number. Therefore, we can always make use of caching, even when the
|
||||
pattern contains multiple subpatterns with the same number. */
|
||||
|
||||
case OP_RECURSE:
|
||||
cs = ce = startcode + GET(cc, 1);
|
||||
recno = GET2(cs, 1+LINK_SIZE);
|
||||
if (recno == prev_recurse_recno)
|
||||
{
|
||||
branchlength += prev_recurse_d;
|
||||
}
|
||||
else
|
||||
{
|
||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||
if (cc > cs && cc < ce) /* Simple recursion */
|
||||
had_recurse = TRUE;
|
||||
else
|
||||
{
|
||||
recurse_check *r = recurses;
|
||||
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
|
||||
if (r != NULL) /* Mutual recursion */
|
||||
had_recurse = TRUE;
|
||||
else
|
||||
{
|
||||
this_recurse.prev = recurses;
|
||||
this_recurse.group = cs;
|
||||
prev_recurse_d = find_minlength(re, cs, startcode, utf, &this_recurse,
|
||||
countptr, backref_cache);
|
||||
if (prev_recurse_d < 0) return prev_recurse_d;
|
||||
prev_recurse_recno = recno;
|
||||
branchlength += prev_recurse_d;
|
||||
}
|
||||
}
|
||||
}
|
||||
cc += 1 + LINK_SIZE + once_fudge;
|
||||
once_fudge = 0;
|
||||
break;
|
||||
|
||||
/* Anything else does not or need not match a character. We can get the
|
||||
item's length from the table, but for those that can match zero occurrences
|
||||
of a character, we must take special action for UTF-8 characters. As it
|
||||
happens, the "NOT" versions of these opcodes are used at present only for
|
||||
ASCII characters, so they could be omitted from this list. However, in
|
||||
future that may change, so we include them here so as not to leave a
|
||||
gotcha for a future maintainer. */
|
||||
|
||||
case OP_UPTO:
|
||||
case OP_UPTOI:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_MINUPTO:
|
||||
case OP_MINUPTOI:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_POSUPTO:
|
||||
case OP_POSUPTOI:
|
||||
case OP_NOTPOSUPTO:
|
||||
case OP_NOTPOSUPTOI:
|
||||
|
||||
case OP_STAR:
|
||||
case OP_STARI:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTSTARI:
|
||||
case OP_MINSTAR:
|
||||
case OP_MINSTARI:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_POSSTAR:
|
||||
case OP_POSSTARI:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPOSSTARI:
|
||||
|
||||
case OP_QUERY:
|
||||
case OP_QUERYI:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_MINQUERY:
|
||||
case OP_MINQUERYI:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_POSQUERY:
|
||||
case OP_POSQUERYI:
|
||||
case OP_NOTPOSQUERY:
|
||||
case OP_NOTPOSQUERYI:
|
||||
|
||||
cc += PRIV(OP_lengths)[op];
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||||
#endif
|
||||
break;
|
||||
|
||||
/* Skip these, but we need to add in the name length. */
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
cc += PRIV(OP_lengths)[op] + cc[1];
|
||||
break;
|
||||
|
||||
/* The remaining opcodes are just skipped over. */
|
||||
|
||||
case OP_CLOSE:
|
||||
case OP_COMMIT:
|
||||
case OP_FAIL:
|
||||
case OP_PRUNE:
|
||||
case OP_SET_SOM:
|
||||
case OP_SKIP:
|
||||
case OP_THEN:
|
||||
cc += PRIV(OP_lengths)[op];
|
||||
break;
|
||||
|
||||
/* This should not occur: we list all opcodes explicitly so that when
|
||||
new ones get added they are properly considered. */
|
||||
|
||||
default:
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return -3;
|
||||
}
|
||||
}
|
||||
|
||||
PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
|
||||
return -3; /* Avoid compiler warnings */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Set a bit and maybe its alternate case *
|
||||
*************************************************/
|
||||
|
||||
/* Given a character, set its first code unit's bit in the table, and also the
|
||||
corresponding bit for the other version of a letter if we are caseless.
|
||||
|
||||
Arguments:
|
||||
re points to the regex block
|
||||
p points to the first code unit of the character
|
||||
caseless TRUE if caseless
|
||||
utf TRUE for UTF mode
|
||||
ucp TRUE for UCP mode
|
||||
|
||||
Returns: pointer after the character
|
||||
*/
|
||||
|
||||
static PCRE2_SPTR
|
||||
set_table_bit(pcre2_real_code *re, PCRE2_SPTR p, BOOL caseless, BOOL utf,
|
||||
BOOL ucp)
|
||||
{
|
||||
uint32_t c = *p++; /* First code unit */
|
||||
|
||||
(void)utf; /* Stop compiler warnings when UTF not supported */
|
||||
(void)ucp;
|
||||
|
||||
/* In 16-bit and 32-bit modes, code units greater than 0xff set the bit for
|
||||
0xff. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (c > 0xff) SET_BIT(0xff); else
|
||||
#endif
|
||||
|
||||
SET_BIT(c);
|
||||
|
||||
/* In UTF-8 or UTF-16 mode, pick up the remaining code units in order to find
|
||||
the end of the character, even when caseless. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (c >= 0xc0) GETUTF8INC(c, p);
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, p);
|
||||
#endif
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* If caseless, handle the other case of the character. */
|
||||
|
||||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf || ucp)
|
||||
{
|
||||
c = UCD_OTHERCASE(c);
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (utf)
|
||||
{
|
||||
PCRE2_UCHAR buff[6];
|
||||
(void)PRIV(ord2utf)(c, buff);
|
||||
SET_BIT(buff[0]);
|
||||
}
|
||||
else if (c < 256) SET_BIT(c);
|
||||
#else /* 16-bit or 32-bit mode */
|
||||
if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
|
||||
#endif
|
||||
}
|
||||
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Not UTF or UCP */
|
||||
|
||||
if (MAX_255(c)) SET_BIT(re->tables[fcc_offset + c]);
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Set bits for a positive character type *
|
||||
*************************************************/
|
||||
|
||||
/* This function sets starting bits for a character type. In UTF-8 mode, we can
|
||||
only do a direct setting for bytes less than 128, as otherwise there can be
|
||||
confusion with bytes in the middle of UTF-8 characters. In a "traditional"
|
||||
environment, the tables will only recognize ASCII characters anyway, but in at
|
||||
least one Windows environment, some higher bytes bits were set in the tables.
|
||||
So we deal with that case by considering the UTF-8 encoding.
|
||||
|
||||
Arguments:
|
||||
re the regex block
|
||||
cbit type the type of character wanted
|
||||
table_limit 32 for non-UTF-8; 16 for UTF-8
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
set_type_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
|
||||
{
|
||||
uint32_t c;
|
||||
for (c = 0; c < table_limit; c++)
|
||||
re->start_bitmap[c] |= re->tables[c+cbits_offset+cbit_type];
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (table_limit == 32) return;
|
||||
for (c = 128; c < 256; c++)
|
||||
{
|
||||
if ((re->tables[cbits_offset + c/8] & (1u << (c&7))) != 0)
|
||||
{
|
||||
PCRE2_UCHAR buff[6];
|
||||
(void)PRIV(ord2utf)(c, buff);
|
||||
SET_BIT(buff[0]);
|
||||
}
|
||||
}
|
||||
#endif /* UTF-8 */
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Set bits for a negative character type *
|
||||
*************************************************/
|
||||
|
||||
/* This function sets starting bits for a negative character type such as \D.
|
||||
In UTF-8 mode, we can only do a direct setting for bytes less than 128, as
|
||||
otherwise there can be confusion with bytes in the middle of UTF-8 characters.
|
||||
Unlike in the positive case, where we can set appropriate starting bits for
|
||||
specific high-valued UTF-8 characters, in this case we have to set the bits for
|
||||
all high-valued characters. The lowest is 0xc2, but we overkill by starting at
|
||||
0xc0 (192) for simplicity.
|
||||
|
||||
Arguments:
|
||||
re the regex block
|
||||
cbit type the type of character wanted
|
||||
table_limit 32 for non-UTF-8; 16 for UTF-8
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
set_nottype_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
|
||||
{
|
||||
uint32_t c;
|
||||
for (c = 0; c < table_limit; c++)
|
||||
re->start_bitmap[c] |= (uint8_t)(~(re->tables[c+cbits_offset+cbit_type]));
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
/*************************************************
|
||||
* Set starting bits for a character list. *
|
||||
*************************************************/
|
||||
|
||||
/* This function sets starting bits for a character list. It enumerates
|
||||
all characters and character ranges in the character list, and sets
|
||||
the starting bits accordingly.
|
||||
|
||||
Arguments:
|
||||
code pointer to the code
|
||||
start_bitmap pointer to the starting bitmap
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
static void
|
||||
study_char_list(PCRE2_SPTR code, uint8_t *start_bitmap,
|
||||
const uint8_t *char_lists_end)
|
||||
{
|
||||
uint32_t type, list_ind;
|
||||
uint32_t char_list_add = XCL_CHAR_LIST_LOW_16_ADD;
|
||||
uint32_t range_start = ~(uint32_t)0, range_end = 0;
|
||||
const uint8_t *next_char;
|
||||
PCRE2_UCHAR start_buffer[6], end_buffer[6];
|
||||
PCRE2_UCHAR start, end;
|
||||
|
||||
/* Only needed in 8-bit mode at the moment. */
|
||||
type = (uint32_t)(code[0] << 8) | code[1];
|
||||
code += 2;
|
||||
|
||||
/* Align characters. */
|
||||
next_char = char_lists_end - (GET(code, 0) << 1);
|
||||
type &= XCL_TYPE_MASK;
|
||||
list_ind = 0;
|
||||
|
||||
if ((type & XCL_BEGIN_WITH_RANGE) != 0)
|
||||
range_start = XCL_CHAR_LIST_LOW_16_START;
|
||||
|
||||
while (type > 0)
|
||||
{
|
||||
uint32_t item_count = type & XCL_ITEM_COUNT_MASK;
|
||||
|
||||
if (item_count == XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
if (list_ind <= 1)
|
||||
{
|
||||
item_count = *(const uint16_t*)next_char;
|
||||
next_char += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
item_count = *(const uint32_t*)next_char;
|
||||
next_char += 4;
|
||||
}
|
||||
}
|
||||
|
||||
while (item_count > 0)
|
||||
{
|
||||
if (list_ind <= 1)
|
||||
{
|
||||
range_end = *(const uint16_t*)next_char;
|
||||
next_char += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
range_end = *(const uint32_t*)next_char;
|
||||
next_char += 4;
|
||||
}
|
||||
|
||||
if ((range_end & XCL_CHAR_END) != 0)
|
||||
{
|
||||
range_end = char_list_add + (range_end >> XCL_CHAR_SHIFT);
|
||||
|
||||
PRIV(ord2utf)(range_end, end_buffer);
|
||||
end = end_buffer[0];
|
||||
|
||||
if (range_start < range_end)
|
||||
{
|
||||
PRIV(ord2utf)(range_start, start_buffer);
|
||||
for (start = start_buffer[0]; start <= end; start++)
|
||||
start_bitmap[start / 8] |= (1u << (start & 7));
|
||||
}
|
||||
else
|
||||
start_bitmap[end / 8] |= (1u << (end & 7));
|
||||
|
||||
range_start = ~(uint32_t)0;
|
||||
}
|
||||
else
|
||||
range_start = char_list_add + (range_end >> XCL_CHAR_SHIFT);
|
||||
|
||||
item_count--;
|
||||
}
|
||||
|
||||
list_ind++;
|
||||
type >>= XCL_TYPE_BIT_LEN;
|
||||
|
||||
if (range_start == ~(uint32_t)0)
|
||||
{
|
||||
if ((type & XCL_BEGIN_WITH_RANGE) != 0)
|
||||
{
|
||||
/* In 8 bit mode XCL_CHAR_LIST_HIGH_32_START is not possible. */
|
||||
if (list_ind == 1) range_start = XCL_CHAR_LIST_HIGH_16_START;
|
||||
else range_start = XCL_CHAR_LIST_LOW_32_START;
|
||||
}
|
||||
}
|
||||
else if ((type & XCL_BEGIN_WITH_RANGE) == 0)
|
||||
{
|
||||
PRIV(ord2utf)(range_start, start_buffer);
|
||||
|
||||
/* In 8 bit mode XCL_CHAR_LIST_LOW_32_END and
|
||||
XCL_CHAR_LIST_HIGH_32_END are not possible. */
|
||||
if (list_ind == 1) range_end = XCL_CHAR_LIST_LOW_16_END;
|
||||
else range_end = XCL_CHAR_LIST_HIGH_16_END;
|
||||
|
||||
PRIV(ord2utf)(range_end, end_buffer);
|
||||
end = end_buffer[0];
|
||||
|
||||
for (start = start_buffer[0]; start <= end; start++)
|
||||
start_bitmap[start / 8] |= (1u << (start & 7));
|
||||
|
||||
range_start = ~(uint32_t)0;
|
||||
}
|
||||
|
||||
/* In 8 bit mode XCL_CHAR_LIST_HIGH_32_ADD is not possible. */
|
||||
if (list_ind == 1) char_list_add = XCL_CHAR_LIST_HIGH_16_ADD;
|
||||
else char_list_add = XCL_CHAR_LIST_LOW_32_ADD;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create bitmap of starting code units *
|
||||
*************************************************/
|
||||
|
||||
/* This function scans a compiled unanchored expression recursively and
|
||||
attempts to build a bitmap of the set of possible starting code units whose
|
||||
values are less than 256. In 16-bit and 32-bit mode, values above 255 all cause
|
||||
the 255 bit to be set. When calling set[_not]_type_bits() in UTF-8 (sic) mode
|
||||
we pass a value of 16 rather than 32 as the final argument. (See comments in
|
||||
those functions for the reason.)
|
||||
|
||||
The SSB_CONTINUE return is useful for parenthesized groups in patterns such as
|
||||
(a*)b where the group provides some optional starting code units but scanning
|
||||
must continue at the outer level to find at least one mandatory code unit. At
|
||||
the outermost level, this function fails unless the result is SSB_DONE.
|
||||
|
||||
We restrict recursion (for nested groups) to 1000 to avoid stack overflow
|
||||
issues.
|
||||
|
||||
Arguments:
|
||||
re points to the compiled regex block
|
||||
code points to an expression
|
||||
utf TRUE if in UTF mode
|
||||
ucp TRUE if in UCP mode
|
||||
depthptr pointer to recurse depth
|
||||
|
||||
Returns: SSB_FAIL => Failed to find any starting code units
|
||||
SSB_DONE => Found mandatory starting code units
|
||||
SSB_CONTINUE => Found optional starting code units
|
||||
SSB_UNKNOWN => Hit an unrecognized opcode
|
||||
SSB_TOODEEP => Recursion is too deep
|
||||
*/
|
||||
|
||||
static int
|
||||
set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf, BOOL ucp,
|
||||
int *depthptr)
|
||||
{
|
||||
uint32_t c;
|
||||
int yield = SSB_DONE;
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
int table_limit = utf? 16:32;
|
||||
#else
|
||||
int table_limit = 32;
|
||||
#endif
|
||||
|
||||
*depthptr += 1;
|
||||
if (*depthptr > 1000) return SSB_TOODEEP;
|
||||
|
||||
do
|
||||
{
|
||||
BOOL try_next = TRUE;
|
||||
PCRE2_SPTR tcode = code + 1 + LINK_SIZE;
|
||||
|
||||
if (*code == OP_CBRA || *code == OP_SCBRA ||
|
||||
*code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += IMM2_SIZE;
|
||||
|
||||
while (try_next) /* Loop for items in this branch */
|
||||
{
|
||||
int rc;
|
||||
PCRE2_SPTR ncode;
|
||||
const uint8_t *classmap = NULL;
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
PCRE2_UCHAR xclassflags;
|
||||
#endif
|
||||
|
||||
switch(*tcode)
|
||||
{
|
||||
/* If we reach something we don't understand, it means a new opcode has
|
||||
been created that hasn't been added to this function. Hopefully this
|
||||
problem will be discovered during testing. */
|
||||
|
||||
default:
|
||||
return SSB_UNKNOWN;
|
||||
|
||||
/* Fail for a valid opcode that implies no starting bits. */
|
||||
|
||||
case OP_ACCEPT:
|
||||
case OP_ASSERT_ACCEPT:
|
||||
case OP_ALLANY:
|
||||
case OP_ANY:
|
||||
case OP_ANYBYTE:
|
||||
case OP_CIRCM:
|
||||
case OP_CLOSE:
|
||||
case OP_COMMIT:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_COND:
|
||||
case OP_CREF:
|
||||
case OP_FALSE:
|
||||
case OP_TRUE:
|
||||
case OP_DNCREF:
|
||||
case OP_DNREF:
|
||||
case OP_DNREFI:
|
||||
case OP_DNRREF:
|
||||
case OP_DOLL:
|
||||
case OP_DOLLM:
|
||||
case OP_END:
|
||||
case OP_EOD:
|
||||
case OP_EODN:
|
||||
case OP_EXTUNI:
|
||||
case OP_FAIL:
|
||||
case OP_MARK:
|
||||
case OP_NOT:
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTEXACTI:
|
||||
case OP_NOTI:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_NOTPOSQUERY:
|
||||
case OP_NOTPOSQUERYI:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_NOTPOSUPTO:
|
||||
case OP_NOTPOSUPTOI:
|
||||
case OP_NOTPROP:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTSTARI:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_NOT_HSPACE:
|
||||
case OP_NOT_VSPACE:
|
||||
case OP_PRUNE:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_RECURSE:
|
||||
case OP_REF:
|
||||
case OP_REFI:
|
||||
case OP_REVERSE:
|
||||
case OP_VREVERSE:
|
||||
case OP_RREF:
|
||||
case OP_SCOND:
|
||||
case OP_SET_SOM:
|
||||
case OP_SKIP:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_SOD:
|
||||
case OP_SOM:
|
||||
case OP_THEN:
|
||||
case OP_THEN_ARG:
|
||||
return SSB_FAIL;
|
||||
|
||||
/* OP_CIRC happens only at the start of an anchored branch (multiline ^
|
||||
uses OP_CIRCM). Skip over it. */
|
||||
|
||||
case OP_CIRC:
|
||||
tcode += PRIV(OP_lengths)[OP_CIRC];
|
||||
break;
|
||||
|
||||
/* A "real" property test implies no starting bits, but the fake property
|
||||
PT_CLIST identifies a list of characters. These lists are short, as they
|
||||
are used for characters with more than one "other case", so there is no
|
||||
point in recognizing them for OP_NOTPROP. */
|
||||
|
||||
case OP_PROP:
|
||||
if (tcode[1] != PT_CLIST) return SSB_FAIL;
|
||||
{
|
||||
const uint32_t *p = PRIV(ucd_caseless_sets) + tcode[2];
|
||||
while ((c = *p++) < NOTACHAR)
|
||||
{
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (utf)
|
||||
{
|
||||
PCRE2_UCHAR buff[6];
|
||||
(void)PRIV(ord2utf)(c, buff);
|
||||
c = buff[0];
|
||||
}
|
||||
#endif
|
||||
if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
|
||||
}
|
||||
}
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* We can ignore word boundary tests. */
|
||||
|
||||
case OP_WORD_BOUNDARY:
|
||||
case OP_NOT_WORD_BOUNDARY:
|
||||
case OP_UCP_WORD_BOUNDARY:
|
||||
case OP_NOT_UCP_WORD_BOUNDARY:
|
||||
tcode++;
|
||||
break;
|
||||
|
||||
/* For a positive lookahead assertion, inspect what immediately follows,
|
||||
ignoring intermediate assertions and callouts. If the next item is one
|
||||
that sets a mandatory character, skip this assertion. Otherwise, treat it
|
||||
the same as other bracket groups. */
|
||||
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NA:
|
||||
ncode = tcode + GET(tcode, 1);
|
||||
while (*ncode == OP_ALT) ncode += GET(ncode, 1);
|
||||
ncode += 1 + LINK_SIZE;
|
||||
|
||||
/* Skip irrelevant items */
|
||||
|
||||
for (BOOL done = FALSE; !done;)
|
||||
{
|
||||
switch (*ncode)
|
||||
{
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ASSERT_NA:
|
||||
case OP_ASSERTBACK_NA:
|
||||
case OP_ASSERT_SCS:
|
||||
ncode += GET(ncode, 1);
|
||||
while (*ncode == OP_ALT) ncode += GET(ncode, 1);
|
||||
ncode += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
case OP_WORD_BOUNDARY:
|
||||
case OP_NOT_WORD_BOUNDARY:
|
||||
case OP_UCP_WORD_BOUNDARY:
|
||||
case OP_NOT_UCP_WORD_BOUNDARY:
|
||||
ncode++;
|
||||
break;
|
||||
|
||||
case OP_CALLOUT:
|
||||
ncode += PRIV(OP_lengths)[OP_CALLOUT];
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
ncode += GET(ncode, 1 + 2*LINK_SIZE);
|
||||
break;
|
||||
|
||||
default:
|
||||
done = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now check the next significant item. */
|
||||
|
||||
switch(*ncode)
|
||||
{
|
||||
default:
|
||||
break;
|
||||
|
||||
case OP_PROP:
|
||||
if (ncode[1] != PT_CLIST) break;
|
||||
/* Fall through */
|
||||
case OP_ANYNL:
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
case OP_EXACT:
|
||||
case OP_EXACTI:
|
||||
case OP_HSPACE:
|
||||
case OP_MINPLUS:
|
||||
case OP_MINPLUSI:
|
||||
case OP_PLUS:
|
||||
case OP_PLUSI:
|
||||
case OP_POSPLUS:
|
||||
case OP_POSPLUSI:
|
||||
case OP_VSPACE:
|
||||
/* Note that these types will only be present in non-UCP mode. */
|
||||
case OP_DIGIT:
|
||||
case OP_NOT_DIGIT:
|
||||
case OP_WORDCHAR:
|
||||
case OP_NOT_WORDCHAR:
|
||||
case OP_WHITESPACE:
|
||||
case OP_NOT_WHITESPACE:
|
||||
tcode = ncode;
|
||||
continue; /* With the following significant opcode */
|
||||
}
|
||||
/* Fall through */
|
||||
|
||||
/* For a group bracket or a positive assertion without an immediately
|
||||
following mandatory setting, recurse to set bits from within the
|
||||
subpattern. If it can't find anything, we have to give up. If it finds
|
||||
some mandatory character(s), we are done for this branch. Otherwise,
|
||||
carry on scanning after the subpattern. */
|
||||
|
||||
case OP_BRA:
|
||||
case OP_SBRA:
|
||||
case OP_CBRA:
|
||||
case OP_SCBRA:
|
||||
case OP_BRAPOS:
|
||||
case OP_SBRAPOS:
|
||||
case OP_CBRAPOS:
|
||||
case OP_SCBRAPOS:
|
||||
case OP_ONCE:
|
||||
case OP_SCRIPT_RUN:
|
||||
rc = set_start_bits(re, tcode, utf, ucp, depthptr);
|
||||
if (rc == SSB_DONE)
|
||||
{
|
||||
try_next = FALSE;
|
||||
}
|
||||
else if (rc == SSB_CONTINUE)
|
||||
{
|
||||
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
}
|
||||
else return rc; /* FAIL, UNKNOWN, or TOODEEP */
|
||||
break;
|
||||
|
||||
/* If we hit ALT or KET, it means we haven't found anything mandatory in
|
||||
this branch, though we might have found something optional. For ALT, we
|
||||
continue with the next alternative, but we have to arrange that the final
|
||||
result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET,
|
||||
return SSB_CONTINUE: if this is the top level, that indicates failure,
|
||||
but after a nested subpattern, it causes scanning to continue. */
|
||||
|
||||
case OP_ALT:
|
||||
yield = SSB_CONTINUE;
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_KET:
|
||||
case OP_KETRMAX:
|
||||
case OP_KETRMIN:
|
||||
case OP_KETRPOS:
|
||||
return SSB_CONTINUE;
|
||||
|
||||
/* Skip over callout */
|
||||
|
||||
case OP_CALLOUT:
|
||||
tcode += PRIV(OP_lengths)[OP_CALLOUT];
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
tcode += GET(tcode, 1 + 2*LINK_SIZE);
|
||||
break;
|
||||
|
||||
/* Skip over lookbehind, negative lookahead, and scan substring
|
||||
assertions */
|
||||
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ASSERTBACK_NA:
|
||||
case OP_ASSERT_SCS:
|
||||
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* BRAZERO does the bracket, but carries on. */
|
||||
|
||||
case OP_BRAZERO:
|
||||
case OP_BRAMINZERO:
|
||||
case OP_BRAPOSZERO:
|
||||
rc = set_start_bits(re, ++tcode, utf, ucp, depthptr);
|
||||
if (rc == SSB_FAIL || rc == SSB_UNKNOWN || rc == SSB_TOODEEP) return rc;
|
||||
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* SKIPZERO skips the bracket. */
|
||||
|
||||
case OP_SKIPZERO:
|
||||
tcode++;
|
||||
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Single-char * or ? sets the bit and tries the next item */
|
||||
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_POSSTAR:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_POSQUERY:
|
||||
tcode = set_table_bit(re, tcode + 1, FALSE, utf, ucp);
|
||||
break;
|
||||
|
||||
case OP_STARI:
|
||||
case OP_MINSTARI:
|
||||
case OP_POSSTARI:
|
||||
case OP_QUERYI:
|
||||
case OP_MINQUERYI:
|
||||
case OP_POSQUERYI:
|
||||
tcode = set_table_bit(re, tcode + 1, TRUE, utf, ucp);
|
||||
break;
|
||||
|
||||
/* Single-char upto sets the bit and tries the next */
|
||||
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_POSUPTO:
|
||||
tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, FALSE, utf, ucp);
|
||||
break;
|
||||
|
||||
case OP_UPTOI:
|
||||
case OP_MINUPTOI:
|
||||
case OP_POSUPTOI:
|
||||
tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, TRUE, utf, ucp);
|
||||
break;
|
||||
|
||||
/* At least one single char sets the bit and stops */
|
||||
|
||||
case OP_EXACT:
|
||||
tcode += IMM2_SIZE;
|
||||
/* Fall through */
|
||||
case OP_CHAR:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_POSPLUS:
|
||||
(void)set_table_bit(re, tcode + 1, FALSE, utf, ucp);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_EXACTI:
|
||||
tcode += IMM2_SIZE;
|
||||
/* Fall through */
|
||||
case OP_CHARI:
|
||||
case OP_PLUSI:
|
||||
case OP_MINPLUSI:
|
||||
case OP_POSPLUSI:
|
||||
(void)set_table_bit(re, tcode + 1, TRUE, utf, ucp);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* Special spacing and line-terminating items. These recognize specific
|
||||
lists of characters. The difference between VSPACE and ANYNL is that the
|
||||
latter can match the two-character CRLF sequence, but that is not
|
||||
relevant for finding the first character, so their code here is
|
||||
identical. */
|
||||
|
||||
case OP_HSPACE:
|
||||
SET_BIT(CHAR_HT);
|
||||
SET_BIT(CHAR_SPACE);
|
||||
|
||||
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
|
||||
the bits for 0xA0 and for code units >= 255, independently of UTF. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
SET_BIT(0xA0);
|
||||
SET_BIT(0xFF);
|
||||
#else
|
||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||
units of horizontal space characters. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
SET_BIT(0xC2); /* For U+00A0 */
|
||||
SET_BIT(0xE1); /* For U+1680, U+180E */
|
||||
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */
|
||||
SET_BIT(0xE3); /* For U+3000 */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
/* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless
|
||||
the code is EBCDIC. */
|
||||
{
|
||||
#ifndef EBCDIC
|
||||
SET_BIT(0xA0);
|
||||
#endif /* Not EBCDIC */
|
||||
}
|
||||
#endif /* 8-bit support */
|
||||
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_ANYNL:
|
||||
case OP_VSPACE:
|
||||
SET_BIT(CHAR_LF);
|
||||
SET_BIT(CHAR_VT);
|
||||
SET_BIT(CHAR_FF);
|
||||
SET_BIT(CHAR_CR);
|
||||
|
||||
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
|
||||
the bits for NEL and for code units >= 255, independently of UTF. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
SET_BIT(CHAR_NEL);
|
||||
SET_BIT(0xFF);
|
||||
#else
|
||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||
units of vertical space characters. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
SET_BIT(0xC2); /* For U+0085 (NEL) */
|
||||
SET_BIT(0xE2); /* For U+2028, U+2029 */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
/* For the 8-bit library not in UTF-8 mode, set the bit for NEL. */
|
||||
{
|
||||
SET_BIT(CHAR_NEL);
|
||||
}
|
||||
#endif /* 8-bit support */
|
||||
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* Single character types set the bits and stop. Note that if PCRE2_UCP
|
||||
is set, we do not see these opcodes because \d etc are converted to
|
||||
properties. Therefore, these apply in the case when only characters less
|
||||
than 256 are recognized to match the types. */
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
set_nottype_bits(re, cbit_digit, table_limit);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_DIGIT:
|
||||
set_type_bits(re, cbit_digit, table_limit);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
set_nottype_bits(re, cbit_space, table_limit);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_WHITESPACE:
|
||||
set_type_bits(re, cbit_space, table_limit);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
set_nottype_bits(re, cbit_word, table_limit);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_WORDCHAR:
|
||||
set_type_bits(re, cbit_word, table_limit);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* One or more character type fudges the pointer and restarts, knowing
|
||||
it will hit a single character type and stop there. */
|
||||
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEPOSPLUS:
|
||||
tcode++;
|
||||
break;
|
||||
|
||||
case OP_TYPEEXACT:
|
||||
tcode += 1 + IMM2_SIZE;
|
||||
break;
|
||||
|
||||
/* Zero or more repeats of character types set the bits and then
|
||||
try again. */
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEPOSUPTO:
|
||||
tcode += IMM2_SIZE; /* Fall through */
|
||||
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSQUERY:
|
||||
switch(tcode[1])
|
||||
{
|
||||
default:
|
||||
case OP_ANY:
|
||||
case OP_ALLANY:
|
||||
return SSB_FAIL;
|
||||
|
||||
case OP_HSPACE:
|
||||
SET_BIT(CHAR_HT);
|
||||
SET_BIT(CHAR_SPACE);
|
||||
|
||||
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
|
||||
the bits for 0xA0 and for code units >= 255, independently of UTF. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
SET_BIT(0xA0);
|
||||
SET_BIT(0xFF);
|
||||
#else
|
||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||
units of horizontal space characters. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
SET_BIT(0xC2); /* For U+00A0 */
|
||||
SET_BIT(0xE1); /* For U+1680, U+180E */
|
||||
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */
|
||||
SET_BIT(0xE3); /* For U+3000 */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
/* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless
|
||||
the code is EBCDIC. */
|
||||
{
|
||||
#ifndef EBCDIC
|
||||
SET_BIT(0xA0);
|
||||
#endif /* Not EBCDIC */
|
||||
}
|
||||
#endif /* 8-bit support */
|
||||
break;
|
||||
|
||||
case OP_ANYNL:
|
||||
case OP_VSPACE:
|
||||
SET_BIT(CHAR_LF);
|
||||
SET_BIT(CHAR_VT);
|
||||
SET_BIT(CHAR_FF);
|
||||
SET_BIT(CHAR_CR);
|
||||
|
||||
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
|
||||
the bits for NEL and for code units >= 255, independently of UTF. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
SET_BIT(CHAR_NEL);
|
||||
SET_BIT(0xFF);
|
||||
#else
|
||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||
units of vertical space characters. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
SET_BIT(0xC2); /* For U+0085 (NEL) */
|
||||
SET_BIT(0xE2); /* For U+2028, U+2029 */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
/* For the 8-bit library not in UTF-8 mode, set the bit for NEL. */
|
||||
{
|
||||
SET_BIT(CHAR_NEL);
|
||||
}
|
||||
#endif /* 8-bit support */
|
||||
break;
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
set_nottype_bits(re, cbit_digit, table_limit);
|
||||
break;
|
||||
|
||||
case OP_DIGIT:
|
||||
set_type_bits(re, cbit_digit, table_limit);
|
||||
break;
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
set_nottype_bits(re, cbit_space, table_limit);
|
||||
break;
|
||||
|
||||
case OP_WHITESPACE:
|
||||
set_type_bits(re, cbit_space, table_limit);
|
||||
break;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
set_nottype_bits(re, cbit_word, table_limit);
|
||||
break;
|
||||
|
||||
case OP_WORDCHAR:
|
||||
set_type_bits(re, cbit_word, table_limit);
|
||||
break;
|
||||
}
|
||||
|
||||
tcode += 2;
|
||||
break;
|
||||
|
||||
/* Set-based ECLASS: treat it the same as a "complex" XCLASS; give up. */
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_ECLASS:
|
||||
return SSB_FAIL;
|
||||
#endif
|
||||
|
||||
/* Extended class: if there are any property checks, or if this is a
|
||||
negative XCLASS without a map, give up. If there are no property checks,
|
||||
there must be wide characters on the XCLASS list, because otherwise an
|
||||
XCLASS would not have been created. This means that code points >= 255
|
||||
are potential starters. In the UTF-8 case we can scan them and set bits
|
||||
for the relevant leading bytes. */
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_XCLASS:
|
||||
xclassflags = tcode[1 + LINK_SIZE];
|
||||
if ((xclassflags & XCL_HASPROP) != 0 ||
|
||||
(xclassflags & (XCL_MAP|XCL_NOT)) == XCL_NOT)
|
||||
return SSB_FAIL;
|
||||
|
||||
/* We have a positive XCLASS or a negative one without a map. Set up the
|
||||
map pointer if there is one, and fall through. */
|
||||
|
||||
classmap = ((xclassflags & XCL_MAP) == 0)? NULL :
|
||||
(const uint8_t *)(tcode + 1 + LINK_SIZE + 1);
|
||||
|
||||
/* In UTF-8 mode, scan the character list and set bits for leading bytes,
|
||||
then jump to handle the map. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (utf && (xclassflags & XCL_NOT) == 0)
|
||||
{
|
||||
PCRE2_UCHAR b, e;
|
||||
PCRE2_SPTR p = tcode + 1 + LINK_SIZE + 1 + ((classmap == NULL)? 0:32);
|
||||
tcode += GET(tcode, 1);
|
||||
|
||||
if (*p >= XCL_LIST)
|
||||
{
|
||||
study_char_list(p, re->start_bitmap,
|
||||
((const uint8_t *)re + re->code_start));
|
||||
goto HANDLE_CLASSMAP;
|
||||
}
|
||||
|
||||
for (;;) switch (*p++)
|
||||
{
|
||||
case XCL_SINGLE:
|
||||
b = *p++;
|
||||
while ((*p & 0xc0) == 0x80) p++;
|
||||
re->start_bitmap[b/8] |= (1u << (b&7));
|
||||
break;
|
||||
|
||||
case XCL_RANGE:
|
||||
b = *p++;
|
||||
while ((*p & 0xc0) == 0x80) p++;
|
||||
e = *p++;
|
||||
while ((*p & 0xc0) == 0x80) p++;
|
||||
for (; b <= e; b++)
|
||||
re->start_bitmap[b/8] |= (1u << (b&7));
|
||||
break;
|
||||
|
||||
case XCL_END:
|
||||
goto HANDLE_CLASSMAP;
|
||||
|
||||
default:
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return SSB_UNKNOWN; /* Internal error, should not occur */
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
|
||||
/* It seems that the fall through comment must be outside the #ifdef if
|
||||
it is to avoid the gcc compiler warning. */
|
||||
|
||||
/* Fall through */
|
||||
|
||||
/* Enter here for a negative non-XCLASS. In the 8-bit library, if we are
|
||||
in UTF mode, any byte with a value >= 0xc4 is a potentially valid starter
|
||||
because it starts a character with a value > 255. In 8-bit non-UTF mode,
|
||||
there is no difference between CLASS and NCLASS. In all other wide
|
||||
character modes, set the 0xFF bit to indicate code units >= 255. */
|
||||
|
||||
case OP_NCLASS:
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (utf)
|
||||
{
|
||||
re->start_bitmap[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
|
||||
memset(re->start_bitmap+25, 0xff, 7); /* Bits for 0xc9 - 0xff */
|
||||
}
|
||||
#elif PCRE2_CODE_UNIT_WIDTH != 8
|
||||
SET_BIT(0xFF); /* For characters >= 255 */
|
||||
#endif
|
||||
/* Fall through */
|
||||
|
||||
/* Enter here for a positive non-XCLASS. If we have fallen through from
|
||||
an XCLASS, classmap will already be set; just advance the code pointer.
|
||||
Otherwise, set up classmap for a a non-XCLASS and advance past it. */
|
||||
|
||||
case OP_CLASS:
|
||||
if (*tcode == OP_XCLASS) tcode += GET(tcode, 1); else
|
||||
{
|
||||
classmap = (const uint8_t *)(++tcode);
|
||||
tcode += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
|
||||
/* When wide characters are supported, classmap may be NULL. In UTF-8
|
||||
(sic) mode, the bits in a class bit map correspond to character values,
|
||||
not to byte values. However, the bit map we are constructing is for byte
|
||||
values. So we have to do a conversion for characters whose code point is
|
||||
greater than 127. In fact, there are only two possible starting bytes for
|
||||
characters in the range 128 - 255. */
|
||||
|
||||
#if defined SUPPORT_WIDE_CHARS && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
HANDLE_CLASSMAP:
|
||||
#endif
|
||||
if (classmap != NULL)
|
||||
{
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (utf)
|
||||
{
|
||||
for (c = 0; c < 16; c++) re->start_bitmap[c] |= classmap[c];
|
||||
for (c = 128; c < 256; c++)
|
||||
{
|
||||
if ((classmap[c/8] & (1u << (c&7))) != 0)
|
||||
{
|
||||
int d = (c >> 6) | 0xc0; /* Set bit for this starter */
|
||||
re->start_bitmap[d/8] |= (1u << (d&7)); /* and then skip on to the */
|
||||
c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
/* In all modes except UTF-8, the two bit maps are compatible. */
|
||||
|
||||
{
|
||||
for (c = 0; c < 32; c++) re->start_bitmap[c] |= classmap[c];
|
||||
}
|
||||
}
|
||||
|
||||
/* Act on what follows the class. For a zero minimum repeat, continue;
|
||||
otherwise stop processing. */
|
||||
|
||||
switch (*tcode)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
case OP_CRPOSSTAR:
|
||||
case OP_CRPOSQUERY:
|
||||
tcode++;
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
case OP_CRPOSRANGE:
|
||||
if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE;
|
||||
else try_next = FALSE;
|
||||
break;
|
||||
|
||||
default:
|
||||
try_next = FALSE;
|
||||
break;
|
||||
}
|
||||
break; /* End of class handling case */
|
||||
} /* End of switch for opcodes */
|
||||
} /* End of try_next loop */
|
||||
|
||||
code += GET(code, 1); /* Advance to next branch */
|
||||
}
|
||||
while (*code == OP_ALT);
|
||||
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Study a compiled expression *
|
||||
*************************************************/
|
||||
|
||||
/* This function is handed a compiled expression that it must study to produce
|
||||
information that will speed up the matching.
|
||||
|
||||
Argument:
|
||||
re points to the compiled expression
|
||||
|
||||
Returns: 0 normally; non-zero should never normally occur
|
||||
1 unknown opcode in set_start_bits
|
||||
2 missing capturing bracket
|
||||
3 unknown opcode in find_minlength
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(study)(pcre2_real_code *re)
|
||||
{
|
||||
int count = 0;
|
||||
PCRE2_UCHAR *code;
|
||||
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
BOOL ucp = (re->overall_options & PCRE2_UCP) != 0;
|
||||
|
||||
/* Find start of compiled code */
|
||||
|
||||
code = (PCRE2_UCHAR *)((uint8_t *)re + re->code_start);
|
||||
|
||||
/* For a pattern that has a first code unit, or a multiline pattern that
|
||||
matches only at "line start", there is no point in seeking a list of starting
|
||||
code units. */
|
||||
|
||||
if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
|
||||
{
|
||||
int depth = 0;
|
||||
int rc = set_start_bits(re, code, utf, ucp, &depth);
|
||||
if (rc == SSB_UNKNOWN)
|
||||
{
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* If a list of starting code units was set up, scan the list to see if only
|
||||
one or two were listed. Having only one listed is rare because usually a
|
||||
single starting code unit will have been recognized and PCRE2_FIRSTSET set.
|
||||
If two are listed, see if they are caseless versions of the same character;
|
||||
if so we can replace the list with a caseless first code unit. This gives
|
||||
better performance and is plausibly worth doing for patterns such as [Ww]ord
|
||||
or (word|WORD). */
|
||||
|
||||
if (rc == SSB_DONE)
|
||||
{
|
||||
int i;
|
||||
int a = -1;
|
||||
int b = -1;
|
||||
uint8_t *p = re->start_bitmap;
|
||||
uint32_t flags = PCRE2_FIRSTMAPSET;
|
||||
|
||||
for (i = 0; i < 256; p++, i += 8)
|
||||
{
|
||||
uint8_t x = *p;
|
||||
if (x != 0)
|
||||
{
|
||||
int c;
|
||||
uint8_t y = x & (~x + 1); /* Least significant bit */
|
||||
if (y != x) goto DONE; /* More than one bit set */
|
||||
|
||||
/* In the 16-bit and 32-bit libraries, the bit for 0xff means "0xff and
|
||||
all wide characters", so we cannot use it here. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (i == 248 && x == 0x80) goto DONE;
|
||||
#endif
|
||||
|
||||
/* Compute the character value */
|
||||
|
||||
c = i;
|
||||
switch (x)
|
||||
{
|
||||
case 1: break;
|
||||
case 2: c += 1; break; case 4: c += 2; break;
|
||||
case 8: c += 3; break; case 16: c += 4; break;
|
||||
case 32: c += 5; break; case 64: c += 6; break;
|
||||
case 128: c += 7; break;
|
||||
}
|
||||
|
||||
/* c contains the code unit value, in the range 0-255. In 8-bit UTF
|
||||
mode, only values < 128 can be used. In all the other cases, c is a
|
||||
character value. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (utf && c > 127) goto DONE;
|
||||
#endif
|
||||
if (a < 0) a = c; /* First one found, save in a */
|
||||
else if (b < 0) /* Second one found */
|
||||
{
|
||||
int d = TABLE_GET((unsigned int)c, re->tables + fcc_offset, c);
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf || ucp)
|
||||
{
|
||||
if (UCD_CASESET(c) != 0) goto DONE; /* Multiple case set */
|
||||
if (c > 127) d = UCD_OTHERCASE(c);
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
if (d != a) goto DONE; /* Not the other case of a */
|
||||
b = c; /* Save second in b */
|
||||
}
|
||||
else goto DONE; /* More than two characters found */
|
||||
}
|
||||
}
|
||||
|
||||
/* Replace the start code unit bits with a first code unit. If it is the
|
||||
same as a required later code unit, then clear the required later code
|
||||
unit. This is because a search for a required code unit starts after an
|
||||
explicit first code unit, but at a code unit found from the bitmap.
|
||||
Patterns such as /a*a/ don't work if both the start unit and required
|
||||
unit are the same. */
|
||||
|
||||
if (a >= 0) {
|
||||
if ((re->flags & PCRE2_LASTSET) && (re->last_codeunit == (uint32_t)a || (b >= 0 && re->last_codeunit == (uint32_t)b))) {
|
||||
re->flags &= ~(PCRE2_LASTSET | PCRE2_LASTCASELESS);
|
||||
re->last_codeunit = 0;
|
||||
}
|
||||
re->first_codeunit = a;
|
||||
flags = PCRE2_FIRSTSET;
|
||||
if (b >= 0) flags |= PCRE2_FIRSTCASELESS;
|
||||
}
|
||||
|
||||
DONE:
|
||||
re->flags |= flags;
|
||||
}
|
||||
}
|
||||
|
||||
/* Find the minimum length of subject string. If the pattern can match an empty
|
||||
string, the minimum length is already known. If the pattern contains (*ACCEPT)
|
||||
all bets are off, and we don't even try to find a minimum length. If there are
|
||||
more back references than the size of the vector we are going to cache them in,
|
||||
do nothing. A pattern that complicated will probably take a long time to
|
||||
analyze and may in any case turn out to be too complicated. Note that back
|
||||
reference minima are held as 16-bit numbers. */
|
||||
|
||||
if ((re->flags & (PCRE2_MATCH_EMPTY|PCRE2_HASACCEPT)) == 0 &&
|
||||
re->top_backref <= MAX_CACHE_BACKREF)
|
||||
{
|
||||
int min;
|
||||
int backref_cache[MAX_CACHE_BACKREF+1];
|
||||
backref_cache[0] = 0; /* Highest one that is set */
|
||||
min = find_minlength(re, code, code, utf, NULL, &count, backref_cache);
|
||||
switch(min)
|
||||
{
|
||||
case -1: /* \C in UTF mode or over-complex regex */
|
||||
break; /* Leave minlength unchanged (will be zero) */
|
||||
|
||||
case -2:
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return 2; /* missing capturing bracket */
|
||||
|
||||
case -3:
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return 3; /* unrecognized opcode */
|
||||
|
||||
default:
|
||||
re->minlength = (min > UINT16_MAX)? UINT16_MAX : min;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre2_study.c */
|
||||
@@ -1,1707 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#define PTR_STACK_SIZE 20
|
||||
|
||||
#define SUBSTITUTE_OPTIONS \
|
||||
(PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
|
||||
PCRE2_SUBSTITUTE_LITERAL|PCRE2_SUBSTITUTE_MATCHED| \
|
||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_REPLACEMENT_ONLY| \
|
||||
PCRE2_SUBSTITUTE_UNKNOWN_UNSET|PCRE2_SUBSTITUTE_UNSET_EMPTY)
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find end of substitute text *
|
||||
*************************************************/
|
||||
|
||||
/* In extended mode, we recognize ${name:+set text:unset text} and similar
|
||||
constructions. This requires the identification of unescaped : and }
|
||||
characters. This function scans for such. It must deal with nested ${
|
||||
constructions. The pointer to the text is updated, either to the required end
|
||||
character, or to where an error was detected.
|
||||
|
||||
Arguments:
|
||||
code points to the compiled expression (for options)
|
||||
ptrptr points to the pointer to the start of the text (updated)
|
||||
ptrend end of the whole string
|
||||
last TRUE if the last expected string (only } recognized)
|
||||
|
||||
Returns: 0 on success
|
||||
negative error code on failure
|
||||
*/
|
||||
|
||||
static int
|
||||
find_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend,
|
||||
BOOL last)
|
||||
{
|
||||
int rc = 0;
|
||||
uint32_t nestlevel = 0;
|
||||
BOOL literal = FALSE;
|
||||
PCRE2_SPTR ptr = *ptrptr;
|
||||
|
||||
for (; ptr < ptrend; ptr++)
|
||||
{
|
||||
if (literal)
|
||||
{
|
||||
if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E)
|
||||
{
|
||||
literal = FALSE;
|
||||
ptr += 1;
|
||||
}
|
||||
}
|
||||
|
||||
else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
|
||||
{
|
||||
if (nestlevel == 0) goto EXIT;
|
||||
nestlevel--;
|
||||
}
|
||||
|
||||
else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT;
|
||||
|
||||
else if (*ptr == CHAR_DOLLAR_SIGN)
|
||||
{
|
||||
if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
|
||||
{
|
||||
nestlevel++;
|
||||
ptr += 1;
|
||||
}
|
||||
}
|
||||
|
||||
else if (*ptr == CHAR_BACKSLASH)
|
||||
{
|
||||
int erc;
|
||||
int errorcode;
|
||||
uint32_t ch;
|
||||
|
||||
if (ptr < ptrend - 1) switch (ptr[1])
|
||||
{
|
||||
case CHAR_L:
|
||||
case CHAR_l:
|
||||
case CHAR_U:
|
||||
case CHAR_u:
|
||||
ptr += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
ptr += 1; /* Must point after \ */
|
||||
erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
|
||||
code->overall_options, code->extra_options, code->top_bracket, FALSE, NULL);
|
||||
ptr -= 1; /* Back to last code unit of escape */
|
||||
if (errorcode != 0)
|
||||
{
|
||||
/* errorcode from check_escape is positive, so must not be returned by
|
||||
pcre2_substitute(). */
|
||||
rc = PCRE2_ERROR_BADREPESCAPE;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
switch(erc)
|
||||
{
|
||||
case 0: /* Data character */
|
||||
case ESC_b: /* Data character */
|
||||
case ESC_v: /* Data character */
|
||||
case ESC_E: /* Isolated \E is ignored */
|
||||
break;
|
||||
|
||||
case ESC_Q:
|
||||
literal = TRUE;
|
||||
break;
|
||||
|
||||
case ESC_g:
|
||||
/* The \g<name> form (\g<number> already handled by check_escape)
|
||||
|
||||
Don't worry about finding the matching ">". We are super, super lenient
|
||||
about validating ${} replacements inside find_text_end(), so we certainly
|
||||
don't need to worry about other syntax. Importantly, a \g<..> or $<...>
|
||||
sequence can't contain a '}' character. */
|
||||
break;
|
||||
|
||||
default:
|
||||
if (erc < 0)
|
||||
break; /* capture group reference */
|
||||
rc = PCRE2_ERROR_BADREPESCAPE;
|
||||
goto EXIT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rc = PCRE2_ERROR_REPMISSINGBRACE; /* Terminator not found */
|
||||
|
||||
EXIT:
|
||||
*ptrptr = ptr;
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Validate group name *
|
||||
*************************************************/
|
||||
|
||||
/* This function scans for a capture group name, validating it
|
||||
consists of legal characters, is not empty, and does not exceed
|
||||
MAX_NAME_SIZE.
|
||||
|
||||
Arguments:
|
||||
ptrptr points to the pointer to the start of the text (updated)
|
||||
ptrend end of the whole string
|
||||
utf true if the input is UTF-encoded
|
||||
ctypes pointer to the character types table
|
||||
|
||||
Returns: TRUE if a name was read
|
||||
FALSE otherwise
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
read_name_subst(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, BOOL utf,
|
||||
const uint8_t* ctypes)
|
||||
{
|
||||
PCRE2_SPTR ptr = *ptrptr;
|
||||
PCRE2_SPTR nameptr = ptr;
|
||||
|
||||
if (ptr >= ptrend) /* No characters in name */
|
||||
goto FAILED;
|
||||
|
||||
/* We do not need to check whether the name starts with a non-digit.
|
||||
We are simply referencing names here, not defining them. */
|
||||
|
||||
/* See read_name in the pcre2_compile.c for the corresponding logic
|
||||
restricting group names inside the pattern itself. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
uint32_t c, type;
|
||||
|
||||
while (ptr < ptrend)
|
||||
{
|
||||
GETCHAR(c, ptr);
|
||||
type = UCD_CHARTYPE(c);
|
||||
if (type != ucp_Nd && PRIV(ucp_gentype)[type] != ucp_L &&
|
||||
c != CHAR_UNDERSCORE) break;
|
||||
ptr++;
|
||||
FORWARDCHARTEST(ptr, ptrend);
|
||||
}
|
||||
}
|
||||
else
|
||||
#else
|
||||
(void)utf; /* Avoid compiler warning */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Handle group names in non-UTF modes. */
|
||||
|
||||
{
|
||||
while (ptr < ptrend && MAX_255(*ptr) && (ctypes[*ptr] & ctype_word) != 0)
|
||||
{
|
||||
ptr++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check name length */
|
||||
|
||||
if (ptr - nameptr > MAX_NAME_SIZE)
|
||||
goto FAILED;
|
||||
|
||||
/* Subpattern names must not be empty */
|
||||
if (ptr == nameptr)
|
||||
goto FAILED;
|
||||
|
||||
*ptrptr = ptr;
|
||||
return TRUE;
|
||||
|
||||
FAILED:
|
||||
*ptrptr = ptr;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Case transformations *
|
||||
*************************************************/
|
||||
|
||||
#define PCRE2_SUBSTITUTE_CASE_NONE 0
|
||||
// 1, 2, 3 are PCRE2_SUBSTITUTE_CASE_LOWER, UPPER, TITLE_FIRST.
|
||||
#define PCRE2_SUBSTITUTE_CASE_REVERSE_TITLE_FIRST 4
|
||||
|
||||
typedef struct {
|
||||
int to_case; /* One of PCRE2_SUBSTITUTE_CASE_xyz */
|
||||
BOOL single_char;
|
||||
} case_state;
|
||||
|
||||
/* Helper to guess how much a string is likely to increase in size when
|
||||
case-transformed. Usually, strings don't change size at all, but some rare
|
||||
characters do grow. Estimate +10%, plus another few characters.
|
||||
|
||||
Performing this estimation is unfortunate, but inevitable, since we can't call
|
||||
the callout if we ran out of buffer space to prepare its input.
|
||||
|
||||
Because this estimate is inexact (and in pathological cases, underestimates the
|
||||
required buffer size) we must document that when you have a
|
||||
substitute_case_callout, and you are using PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, you
|
||||
may need more than two calls to determine the final buffer size. */
|
||||
|
||||
static PCRE2_SIZE
|
||||
pessimistic_case_inflation(PCRE2_SIZE len)
|
||||
{
|
||||
return (len >> 3u) + 10;
|
||||
}
|
||||
|
||||
/* Case transformation behaviour if no callout is passed. */
|
||||
|
||||
static PCRE2_SIZE
|
||||
default_substitute_case_callout(
|
||||
PCRE2_SPTR input, PCRE2_SIZE input_len,
|
||||
PCRE2_UCHAR *output, PCRE2_SIZE output_cap,
|
||||
case_state *state, const pcre2_code *code)
|
||||
{
|
||||
PCRE2_SPTR input_end = input + input_len;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf;
|
||||
BOOL ucp;
|
||||
#endif
|
||||
PCRE2_UCHAR temp[6];
|
||||
BOOL next_to_upper;
|
||||
BOOL rest_to_upper;
|
||||
BOOL single_char;
|
||||
BOOL overflow = FALSE;
|
||||
PCRE2_SIZE written = 0;
|
||||
|
||||
/* Helpful simplifying invariant: input and output are disjoint buffers.
|
||||
I believe that this code is technically undefined behaviour, because the two
|
||||
pointers input/output are "unrelated" pointers and hence not comparable. Casting
|
||||
via char* bypasses some but not all of those technical rules. It is not included
|
||||
in release builds, in any case. */
|
||||
PCRE2_ASSERT((char *)(input + input_len) <= (char *)output ||
|
||||
(char *)(output + output_cap) <= (char *)input);
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
utf = (code->overall_options & PCRE2_UTF) != 0;
|
||||
ucp = (code->overall_options & PCRE2_UCP) != 0;
|
||||
#endif
|
||||
|
||||
if (input_len == 0) return 0;
|
||||
|
||||
switch (state->to_case)
|
||||
{
|
||||
default:
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return 0;
|
||||
|
||||
case PCRE2_SUBSTITUTE_CASE_LOWER: // Can be single_char TRUE or FALSE
|
||||
case PCRE2_SUBSTITUTE_CASE_UPPER: // Can only be single_char FALSE
|
||||
next_to_upper = rest_to_upper = (state->to_case == PCRE2_SUBSTITUTE_CASE_UPPER);
|
||||
break;
|
||||
|
||||
case PCRE2_SUBSTITUTE_CASE_TITLE_FIRST: // Can be single_char TRUE or FALSE
|
||||
next_to_upper = TRUE;
|
||||
rest_to_upper = FALSE;
|
||||
state->to_case = PCRE2_SUBSTITUTE_CASE_LOWER;
|
||||
break;
|
||||
|
||||
case PCRE2_SUBSTITUTE_CASE_REVERSE_TITLE_FIRST: // Can only be single_char FALSE
|
||||
next_to_upper = FALSE;
|
||||
rest_to_upper = TRUE;
|
||||
state->to_case = PCRE2_SUBSTITUTE_CASE_UPPER;
|
||||
break;
|
||||
}
|
||||
|
||||
single_char = state->single_char;
|
||||
if (single_char)
|
||||
state->to_case = PCRE2_SUBSTITUTE_CASE_NONE;
|
||||
|
||||
while (input < input_end)
|
||||
{
|
||||
uint32_t ch;
|
||||
unsigned int chlen;
|
||||
|
||||
GETCHARINCTEST(ch, input);
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if ((utf || ucp) && ch >= 128)
|
||||
{
|
||||
uint32_t type = UCD_CHARTYPE(ch);
|
||||
if (PRIV(ucp_gentype)[type] == ucp_L &&
|
||||
type != (next_to_upper? ucp_Lu : ucp_Ll))
|
||||
ch = UCD_OTHERCASE(ch);
|
||||
|
||||
/* TODO This is far from correct... it doesn't support the SpecialCasing.txt
|
||||
mappings, but worse, it's not even correct for all the ordinary case
|
||||
mappings. We should add support for those (at least), and then add the
|
||||
SpecialCasing.txt mappings for Esszet and ligatures, and finally use the
|
||||
Turkish casing flag on the match context. */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
if (MAX_255(ch))
|
||||
{
|
||||
if (((code->tables + cbits_offset +
|
||||
(next_to_upper? cbit_upper:cbit_lower)
|
||||
)[ch/8] & (1u << (ch%8))) == 0)
|
||||
ch = (code->tables + fcc_offset)[ch];
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) chlen = PRIV(ord2utf)(ch, temp); else
|
||||
#endif
|
||||
{
|
||||
temp[0] = ch;
|
||||
chlen = 1;
|
||||
}
|
||||
|
||||
if (!overflow && chlen <= output_cap)
|
||||
{
|
||||
memcpy(output, temp, CU2BYTES(chlen));
|
||||
output += chlen;
|
||||
output_cap -= chlen;
|
||||
}
|
||||
else
|
||||
{
|
||||
overflow = TRUE;
|
||||
}
|
||||
|
||||
if (chlen > ~(PCRE2_SIZE)0 - written) /* Integer overflow */
|
||||
return ~(PCRE2_SIZE)0;
|
||||
written += chlen;
|
||||
|
||||
next_to_upper = rest_to_upper;
|
||||
|
||||
/* memcpy the remainder, if only transforming a single character. */
|
||||
|
||||
if (single_char)
|
||||
{
|
||||
PCRE2_SIZE rest_len = input_end - input;
|
||||
|
||||
if (!overflow && rest_len <= output_cap)
|
||||
memcpy(output, input, CU2BYTES(rest_len));
|
||||
|
||||
if (rest_len > ~(PCRE2_SIZE)0 - written) /* Integer overflow */
|
||||
return ~(PCRE2_SIZE)0;
|
||||
written += rest_len;
|
||||
|
||||
return written;
|
||||
}
|
||||
}
|
||||
|
||||
return written;
|
||||
}
|
||||
|
||||
/* Helper to perform the call to the substitute_case_callout. We wrap the
|
||||
user-provided callout because our internal arguments are slightly extended. We
|
||||
don't want the user callout to handle the case of "\l" (first character only to
|
||||
lowercase) or "\l\U" (first character to lowercase, rest to uppercase) because
|
||||
those are not operations defined by Unicode. Instead the user callout simply
|
||||
needs to provide the three Unicode primitives: lower, upper, titlecase. */
|
||||
|
||||
static PCRE2_SIZE
|
||||
do_case_copy(
|
||||
PCRE2_UCHAR *input_output, PCRE2_SIZE input_len, PCRE2_SIZE output_cap,
|
||||
case_state *state, BOOL utf,
|
||||
PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *,
|
||||
PCRE2_SIZE, int, void *),
|
||||
void *substitute_case_callout_data)
|
||||
{
|
||||
PCRE2_SPTR input = input_output;
|
||||
PCRE2_UCHAR *output = input_output;
|
||||
PCRE2_SIZE rc;
|
||||
PCRE2_SIZE rc2;
|
||||
int ch1_to_case;
|
||||
int rest_to_case;
|
||||
PCRE2_UCHAR ch1[6];
|
||||
PCRE2_SIZE ch1_len;
|
||||
PCRE2_SPTR rest;
|
||||
PCRE2_SIZE rest_len;
|
||||
BOOL ch1_overflow = FALSE;
|
||||
BOOL rest_overflow = FALSE;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32 || !defined(SUPPORT_UNICODE)
|
||||
(void)utf; /* Avoid compiler warning. */
|
||||
#endif
|
||||
|
||||
PCRE2_ASSERT(input_len != 0);
|
||||
|
||||
switch (state->to_case)
|
||||
{
|
||||
default:
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return 0;
|
||||
|
||||
case PCRE2_SUBSTITUTE_CASE_LOWER: // Can be single_char TRUE or FALSE
|
||||
case PCRE2_SUBSTITUTE_CASE_UPPER: // Can only be single_char FALSE
|
||||
case PCRE2_SUBSTITUTE_CASE_TITLE_FIRST: // Can be single_char TRUE or FALSE
|
||||
|
||||
/* The easy case, where our internal casing operations align with those of
|
||||
the callout. */
|
||||
|
||||
if (state->single_char == FALSE)
|
||||
{
|
||||
rc = substitute_case_callout(input, input_len, output, output_cap,
|
||||
state->to_case, substitute_case_callout_data);
|
||||
|
||||
if (state->to_case == PCRE2_SUBSTITUTE_CASE_TITLE_FIRST)
|
||||
state->to_case = PCRE2_SUBSTITUTE_CASE_LOWER;
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
ch1_to_case = state->to_case;
|
||||
rest_to_case = PCRE2_SUBSTITUTE_CASE_NONE;
|
||||
break;
|
||||
|
||||
case PCRE2_SUBSTITUTE_CASE_REVERSE_TITLE_FIRST: // Can only be single_char FALSE
|
||||
ch1_to_case = PCRE2_SUBSTITUTE_CASE_LOWER;
|
||||
rest_to_case = PCRE2_SUBSTITUTE_CASE_UPPER;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Identify the leading character. Take copy, because its storage overlaps with
|
||||
`output`, and hence may be scrambled by the callout. */
|
||||
|
||||
{
|
||||
PCRE2_SPTR ch_end = input;
|
||||
uint32_t ch;
|
||||
|
||||
GETCHARINCTEST(ch, ch_end);
|
||||
(void) ch;
|
||||
PCRE2_ASSERT(ch_end <= input + input_len && ch_end - input <= 6);
|
||||
ch1_len = ch_end - input;
|
||||
memcpy(ch1, input, CU2BYTES(ch1_len));
|
||||
}
|
||||
|
||||
rest = input + ch1_len;
|
||||
rest_len = input_len - ch1_len;
|
||||
|
||||
/* Transform just ch1. The buffers are always in-place (input == output). With a
|
||||
custom callout, we need a loop to discover its required buffer size. The loop
|
||||
wouldn't be required if the callout were well-behaved, but it might be naughty
|
||||
and return "5" the first time, then "10" the next time we call it using the
|
||||
exact same input! */
|
||||
|
||||
{
|
||||
PCRE2_SIZE ch1_cap;
|
||||
PCRE2_SIZE max_ch1_cap;
|
||||
|
||||
ch1_cap = ch1_len; /* First attempt uses the space vacated by ch1. */
|
||||
PCRE2_ASSERT(output_cap >= input_len && input_len >= rest_len);
|
||||
max_ch1_cap = output_cap - rest_len;
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
rc = substitute_case_callout(ch1, ch1_len, output, ch1_cap, ch1_to_case,
|
||||
substitute_case_callout_data);
|
||||
if (rc == ~(PCRE2_SIZE)0) return rc;
|
||||
|
||||
if (rc <= ch1_cap) break;
|
||||
|
||||
if (rc > max_ch1_cap)
|
||||
{
|
||||
ch1_overflow = TRUE;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Move the rest to the right, to make room for expanding ch1. */
|
||||
|
||||
memmove(input_output + rc, rest, CU2BYTES(rest_len));
|
||||
rest = input + rc;
|
||||
|
||||
ch1_cap = rc;
|
||||
|
||||
/* Proof of loop termination: `ch1_cap` is growing on each iteration, but
|
||||
the loop ends if `rc` reaches the (unchanging) upper bound of output_cap. */
|
||||
}
|
||||
}
|
||||
|
||||
if (rest_to_case == PCRE2_SUBSTITUTE_CASE_NONE)
|
||||
{
|
||||
if (!ch1_overflow)
|
||||
{
|
||||
PCRE2_ASSERT(rest_len <= output_cap - rc);
|
||||
memmove(output + rc, rest, CU2BYTES(rest_len));
|
||||
}
|
||||
rc2 = rest_len;
|
||||
|
||||
state->to_case = PCRE2_SUBSTITUTE_CASE_NONE;
|
||||
}
|
||||
else
|
||||
{
|
||||
PCRE2_UCHAR dummy[1];
|
||||
|
||||
rc2 = substitute_case_callout(rest, rest_len,
|
||||
ch1_overflow? dummy : output + rc,
|
||||
ch1_overflow? 0u : output_cap - rc,
|
||||
rest_to_case, substitute_case_callout_data);
|
||||
if (rc2 == ~(PCRE2_SIZE)0) return rc2;
|
||||
|
||||
if (!ch1_overflow && rc2 > output_cap - rc) rest_overflow = TRUE;
|
||||
|
||||
/* If ch1 grows so that `xform(ch1)+rest` can't fit in the buffer, but then
|
||||
`rest` shrinks, it's actually possible for the total calculated length of
|
||||
`xform(ch1)+xform(rest)` to come out at less than output_cap. But we can't
|
||||
report that, because it would make it seem that the operation succeeded.
|
||||
If either of xform(ch1) or xform(rest) won't fit in the buffer, our final
|
||||
result must be > output_cap. */
|
||||
if (ch1_overflow && rc2 < rest_len)
|
||||
rc2 = rest_len;
|
||||
|
||||
state->to_case = PCRE2_SUBSTITUTE_CASE_UPPER;
|
||||
}
|
||||
|
||||
if (rc2 > ~(PCRE2_SIZE)0 - rc) /* Integer overflow */
|
||||
return ~(PCRE2_SIZE)0;
|
||||
|
||||
PCRE2_ASSERT(!(ch1_overflow || rest_overflow) || rc + rc2 > output_cap);
|
||||
(void)rest_overflow;
|
||||
|
||||
return rc + rc2;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match and substitute *
|
||||
*************************************************/
|
||||
|
||||
/* This function applies a compiled re to a subject string and creates a new
|
||||
string with substitutions. The first 7 arguments are the same as for
|
||||
pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
|
||||
|
||||
Arguments:
|
||||
code points to the compiled expression
|
||||
subject points to the subject string
|
||||
length length of subject string (may contain binary zeros)
|
||||
start_offset where to start in the subject string
|
||||
options option bits
|
||||
match_data points to a match_data block, or is NULL
|
||||
context points a PCRE2 context
|
||||
replacement points to the replacement string
|
||||
rlength length of replacement string
|
||||
buffer where to put the substituted string
|
||||
blength points to length of buffer; updated to length of string
|
||||
|
||||
Returns: >= 0 number of substitutions made
|
||||
< 0 an error code
|
||||
PCRE2_ERROR_BADREPLACEMENT means invalid use of $
|
||||
*/
|
||||
|
||||
/* This macro checks for space in the buffer before copying into it. On
|
||||
overflow, either give an error immediately, or keep on, accumulating the
|
||||
length. */
|
||||
|
||||
#define CHECKMEMCPY(from, length_) \
|
||||
do { \
|
||||
PCRE2_SIZE chkmc_length = length_; \
|
||||
if (overflowed) \
|
||||
{ \
|
||||
if (chkmc_length > ~(PCRE2_SIZE)0 - extra_needed) /* Integer overflow */ \
|
||||
goto TOOLARGEREPLACE; \
|
||||
extra_needed += chkmc_length; \
|
||||
} \
|
||||
else if (lengthleft < chkmc_length) \
|
||||
{ \
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
|
||||
overflowed = TRUE; \
|
||||
extra_needed = chkmc_length - lengthleft; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
memcpy(buffer + buff_offset, from, CU2BYTES(chkmc_length)); \
|
||||
buff_offset += chkmc_length; \
|
||||
lengthleft -= chkmc_length; \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* This macro checks for space and copies characters with casing modifications.
|
||||
On overflow, it behaves as for CHECKMEMCPY().
|
||||
|
||||
When substitute_case_callout is NULL, the source and destination buffers must
|
||||
not overlap, because our default handler does not support this. */
|
||||
|
||||
#define CHECKCASECPY_BASE(length_, do_call) \
|
||||
do { \
|
||||
PCRE2_SIZE chkcc_length = (PCRE2_SIZE)(length_); \
|
||||
PCRE2_SIZE chkcc_rc; \
|
||||
do_call \
|
||||
if (lengthleft < chkcc_rc) \
|
||||
{ \
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
|
||||
overflowed = TRUE; \
|
||||
extra_needed = chkcc_rc - lengthleft; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
buff_offset += chkcc_rc; \
|
||||
lengthleft -= chkcc_rc; \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define CHECKCASECPY_DEFAULT(from, length_) \
|
||||
CHECKCASECPY_BASE(length_, { \
|
||||
chkcc_rc = default_substitute_case_callout(from, chkcc_length, \
|
||||
buffer + buff_offset, \
|
||||
overflowed? 0 : lengthleft, \
|
||||
&forcecase, code); \
|
||||
if (overflowed) \
|
||||
{ \
|
||||
if (chkcc_rc > ~(PCRE2_SIZE)0 - extra_needed) /* Integer overflow */ \
|
||||
goto TOOLARGEREPLACE; \
|
||||
extra_needed += chkcc_rc; \
|
||||
break; \
|
||||
} \
|
||||
})
|
||||
|
||||
#define CHECKCASECPY_CALLOUT(length_) \
|
||||
CHECKCASECPY_BASE(length_, { \
|
||||
chkcc_rc = do_case_copy(buffer + buff_offset, chkcc_length, \
|
||||
lengthleft, &forcecase, utf, \
|
||||
substitute_case_callout, \
|
||||
substitute_case_callout_data); \
|
||||
if (chkcc_rc == ~(PCRE2_SIZE)0) goto CASEERROR; \
|
||||
})
|
||||
|
||||
/* This macro does a delayed case transformation, for the situation when we have
|
||||
a case-forcing callout. */
|
||||
|
||||
#define DELAYEDFORCECASE() \
|
||||
do { \
|
||||
PCRE2_SIZE chars_outstanding = (buff_offset - casestart_offset) + \
|
||||
(extra_needed - casestart_extra_needed); \
|
||||
if (chars_outstanding > 0) \
|
||||
{ \
|
||||
if (overflowed) \
|
||||
{ \
|
||||
PCRE2_SIZE guess = pessimistic_case_inflation(chars_outstanding); \
|
||||
if (guess > ~(PCRE2_SIZE)0 - extra_needed) /* Integer overflow */ \
|
||||
goto TOOLARGEREPLACE; \
|
||||
extra_needed += guess; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
/* Rewind the buffer */ \
|
||||
lengthleft += (buff_offset - casestart_offset); \
|
||||
buff_offset = casestart_offset; \
|
||||
/* Care! In-place case transformation */ \
|
||||
CHECKCASECPY_CALLOUT(chars_outstanding); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Here's the function */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
|
||||
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
|
||||
pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
|
||||
PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
|
||||
{
|
||||
int rc;
|
||||
int subs;
|
||||
uint32_t ovector_count;
|
||||
uint32_t goptions = 0;
|
||||
uint32_t suboptions;
|
||||
pcre2_match_data *internal_match_data = NULL;
|
||||
BOOL escaped_literal = FALSE;
|
||||
BOOL overflowed = FALSE;
|
||||
BOOL use_existing_match;
|
||||
BOOL replacement_only;
|
||||
BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
|
||||
PCRE2_UCHAR temp[6];
|
||||
PCRE2_SPTR ptr;
|
||||
PCRE2_SPTR repend = NULL;
|
||||
PCRE2_SIZE extra_needed = 0;
|
||||
PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
|
||||
PCRE2_SIZE *ovector;
|
||||
PCRE2_SIZE ovecsave[3];
|
||||
pcre2_substitute_callout_block scb;
|
||||
PCRE2_SIZE sub_start_extra_needed;
|
||||
PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *,
|
||||
PCRE2_SIZE, int, void *) = NULL;
|
||||
void *substitute_case_callout_data = NULL;
|
||||
|
||||
/* General initialization */
|
||||
|
||||
buff_offset = 0;
|
||||
lengthleft = buff_length = *blength;
|
||||
*blength = PCRE2_UNSET;
|
||||
ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
|
||||
|
||||
if (mcontext != NULL)
|
||||
{
|
||||
substitute_case_callout = mcontext->substitute_case_callout;
|
||||
substitute_case_callout_data = mcontext->substitute_case_callout_data;
|
||||
}
|
||||
|
||||
/* Partial matching is not valid. This must come after setting *blength to
|
||||
PCRE2_UNSET, so as not to imply an offset in the replacement. */
|
||||
|
||||
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
/* Validate length and find the end of the replacement. A NULL replacement of
|
||||
zero length is interpreted as an empty string. */
|
||||
|
||||
if (replacement == NULL)
|
||||
{
|
||||
if (rlength != 0) return PCRE2_ERROR_NULL;
|
||||
replacement = (PCRE2_SPTR)"";
|
||||
}
|
||||
|
||||
if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
|
||||
repend = replacement + rlength;
|
||||
|
||||
/* Check for using a match that has already happened. Note that the subject
|
||||
pointer in the match data may be NULL after a no-match. */
|
||||
|
||||
use_existing_match = ((options & PCRE2_SUBSTITUTE_MATCHED) != 0);
|
||||
replacement_only = ((options & PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) != 0);
|
||||
|
||||
/* If starting from an existing match, there must be an externally provided
|
||||
match data block. We create an internal match_data block in two cases: (a) an
|
||||
external one is not supplied (and we are not starting from an existing match);
|
||||
(b) an existing match is to be used for the first substitution. In the latter
|
||||
case, we copy the existing match into the internal block, except for any cached
|
||||
heap frame size and pointer. This ensures that no changes are made to the
|
||||
external match data block. */
|
||||
|
||||
/* WARNING: In both cases below a general context is constructed "by hand"
|
||||
because calling pcre2_general_context_create() involves a memory allocation. If
|
||||
the contents of a general context control block are ever changed there will
|
||||
have to be changes below. */
|
||||
|
||||
if (match_data == NULL)
|
||||
{
|
||||
pcre2_general_context gcontext;
|
||||
if (use_existing_match) return PCRE2_ERROR_NULL;
|
||||
gcontext.memctl = (mcontext == NULL)?
|
||||
((const pcre2_real_code *)code)->memctl :
|
||||
((pcre2_real_match_context *)mcontext)->memctl;
|
||||
match_data = internal_match_data =
|
||||
pcre2_match_data_create_from_pattern(code, &gcontext);
|
||||
if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
}
|
||||
|
||||
else if (use_existing_match)
|
||||
{
|
||||
int pairs;
|
||||
pcre2_general_context gcontext;
|
||||
gcontext.memctl = (mcontext == NULL)?
|
||||
((const pcre2_real_code *)code)->memctl :
|
||||
((pcre2_real_match_context *)mcontext)->memctl;
|
||||
pairs = (code->top_bracket + 1 < match_data->oveccount)?
|
||||
code->top_bracket + 1 : match_data->oveccount;
|
||||
internal_match_data = pcre2_match_data_create(match_data->oveccount,
|
||||
&gcontext);
|
||||
if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
memcpy(internal_match_data, match_data, offsetof(pcre2_match_data, ovector)
|
||||
+ 2*pairs*sizeof(PCRE2_SIZE));
|
||||
internal_match_data->heapframes = NULL;
|
||||
internal_match_data->heapframes_size = 0;
|
||||
match_data = internal_match_data;
|
||||
}
|
||||
|
||||
/* Remember ovector details */
|
||||
|
||||
ovector = pcre2_get_ovector_pointer(match_data);
|
||||
ovector_count = pcre2_get_ovector_count(match_data);
|
||||
|
||||
/* Fixed things in the callout block */
|
||||
|
||||
scb.version = 0;
|
||||
scb.input = subject;
|
||||
scb.output = (PCRE2_SPTR)buffer;
|
||||
scb.ovector = ovector;
|
||||
|
||||
/* A NULL subject of zero length is treated as an empty string. */
|
||||
|
||||
if (subject == NULL)
|
||||
{
|
||||
if (length != 0) return PCRE2_ERROR_NULL;
|
||||
subject = (PCRE2_SPTR)"";
|
||||
}
|
||||
|
||||
/* Find length of zero-terminated subject */
|
||||
|
||||
if (length == PCRE2_ZERO_TERMINATED)
|
||||
length = subject? PRIV(strlen)(subject) : 0;
|
||||
|
||||
/* Check UTF replacement string if necessary. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||
{
|
||||
rc = PRIV(valid_utf)(replacement, rlength, &(match_data->startchar));
|
||||
if (rc != 0)
|
||||
{
|
||||
match_data->leftchar = 0;
|
||||
goto EXIT;
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Save the substitute options and remove them from the match options. */
|
||||
|
||||
suboptions = options & SUBSTITUTE_OPTIONS;
|
||||
options &= ~SUBSTITUTE_OPTIONS;
|
||||
|
||||
/* Error if the start match offset is greater than the length of the subject. */
|
||||
|
||||
if (start_offset > length)
|
||||
{
|
||||
match_data->leftchar = 0;
|
||||
rc = PCRE2_ERROR_BADOFFSET;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* Copy up to the start offset, unless only the replacement is required. */
|
||||
|
||||
if (!replacement_only) CHECKMEMCPY(subject, start_offset);
|
||||
|
||||
/* Loop for global substituting. If PCRE2_SUBSTITUTE_MATCHED is set, the first
|
||||
match is taken from the match_data that was passed in. */
|
||||
|
||||
subs = 0;
|
||||
do
|
||||
{
|
||||
PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
|
||||
uint32_t ptrstackptr = 0;
|
||||
case_state forcecase = { PCRE2_SUBSTITUTE_CASE_NONE, FALSE };
|
||||
PCRE2_SIZE casestart_offset = 0;
|
||||
PCRE2_SIZE casestart_extra_needed = 0;
|
||||
|
||||
if (use_existing_match)
|
||||
{
|
||||
rc = match_data->rc;
|
||||
use_existing_match = FALSE;
|
||||
}
|
||||
else rc = pcre2_match(code, subject, length, start_offset, options|goptions,
|
||||
match_data, mcontext);
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) options |= PCRE2_NO_UTF_CHECK; /* Only need to check once */
|
||||
#endif
|
||||
|
||||
/* Any error other than no match returns the error code. No match when not
|
||||
doing the special after-empty-match global rematch, or when at the end of the
|
||||
subject, breaks the global loop. Otherwise, advance the starting point by one
|
||||
character, copying it to the output, and try again. */
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
PCRE2_SIZE save_start;
|
||||
|
||||
if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
|
||||
if (goptions == 0 || start_offset >= length) break;
|
||||
|
||||
/* Advance by one code point. Then, if CRLF is a valid newline sequence and
|
||||
we have advanced into the middle of it, advance one more code point. In
|
||||
other words, do not start in the middle of CRLF, even if CR and LF on their
|
||||
own are valid newlines. */
|
||||
|
||||
save_start = start_offset++;
|
||||
if (subject[start_offset-1] == CHAR_CR &&
|
||||
(code->newline_convention == PCRE2_NEWLINE_CRLF ||
|
||||
code->newline_convention == PCRE2_NEWLINE_ANY ||
|
||||
code->newline_convention == PCRE2_NEWLINE_ANYCRLF) &&
|
||||
start_offset < length &&
|
||||
subject[start_offset] == CHAR_LF)
|
||||
start_offset++;
|
||||
|
||||
/* Otherwise, in UTF mode, advance past any secondary code points. */
|
||||
|
||||
else if ((code->overall_options & PCRE2_UTF) != 0)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80)
|
||||
start_offset++;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
while (start_offset < length &&
|
||||
(subject[start_offset] & 0xfc00) == 0xdc00)
|
||||
start_offset++;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Copy what we have advanced past (unless not required), reset the special
|
||||
global options, and continue to the next match. */
|
||||
|
||||
fraglength = start_offset - save_start;
|
||||
if (!replacement_only) CHECKMEMCPY(subject + save_start, fraglength);
|
||||
goptions = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle a successful match. Matches that use \K to end before they start
|
||||
or start before the current point in the subject are not supported. */
|
||||
|
||||
if (ovector[1] < ovector[0] || ovector[0] < start_offset)
|
||||
{
|
||||
rc = PCRE2_ERROR_BADSUBSPATTERN;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* Check for the same match as previous. This is legitimate after matching an
|
||||
empty string that starts after the initial match offset. We have tried again
|
||||
at the match point in case the pattern is one like /(?<=\G.)/ which can never
|
||||
match at its starting point, so running the match achieves the bumpalong. If
|
||||
we do get the same (null) match at the original match point, it isn't such a
|
||||
pattern, so we now do the empty string magic. In all other cases, a repeat
|
||||
match should never occur. */
|
||||
|
||||
if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
|
||||
{
|
||||
if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
|
||||
{
|
||||
goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
|
||||
ovecsave[2] = start_offset;
|
||||
continue; /* Back to the top of the loop */
|
||||
}
|
||||
rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* Count substitutions with a paranoid check for integer overflow; surely no
|
||||
real call to this function would ever hit this! */
|
||||
|
||||
if (subs == INT_MAX)
|
||||
{
|
||||
rc = PCRE2_ERROR_TOOMANYREPLACE;
|
||||
goto EXIT;
|
||||
}
|
||||
subs++;
|
||||
|
||||
/* Copy the text leading up to the match (unless not required); remember
|
||||
where the insert begins and how many ovector pairs are set; and remember how
|
||||
much space we have requested in extra_needed. */
|
||||
|
||||
if (rc == 0) rc = ovector_count;
|
||||
fraglength = ovector[0] - start_offset;
|
||||
if (!replacement_only) CHECKMEMCPY(subject + start_offset, fraglength);
|
||||
scb.output_offsets[0] = buff_offset;
|
||||
scb.oveccount = rc;
|
||||
sub_start_extra_needed = extra_needed;
|
||||
|
||||
/* Process the replacement string. If the entire replacement is literal, just
|
||||
copy it with length check. */
|
||||
|
||||
ptr = replacement;
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_LITERAL) != 0)
|
||||
{
|
||||
CHECKMEMCPY(ptr, rlength);
|
||||
}
|
||||
|
||||
/* Within a non-literal replacement, which must be scanned character by
|
||||
character, local literal mode can be set by \Q, but only in extended mode
|
||||
when backslashes are being interpreted. In extended mode we must handle
|
||||
nested substrings that are to be reprocessed. */
|
||||
|
||||
else for (;;)
|
||||
{
|
||||
uint32_t ch;
|
||||
unsigned int chlen;
|
||||
int group;
|
||||
uint32_t special;
|
||||
PCRE2_SPTR text1_start = NULL;
|
||||
PCRE2_SPTR text1_end = NULL;
|
||||
PCRE2_SPTR text2_start = NULL;
|
||||
PCRE2_SPTR text2_end = NULL;
|
||||
PCRE2_UCHAR name[MAX_NAME_SIZE + 1];
|
||||
|
||||
/* If at the end of a nested substring, pop the stack. */
|
||||
|
||||
if (ptr >= repend)
|
||||
{
|
||||
if (ptrstackptr == 0) break; /* End of replacement string */
|
||||
repend = ptrstack[--ptrstackptr];
|
||||
ptr = ptrstack[--ptrstackptr];
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle the next character */
|
||||
|
||||
if (escaped_literal)
|
||||
{
|
||||
if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
|
||||
{
|
||||
escaped_literal = FALSE;
|
||||
ptr += 2;
|
||||
continue;
|
||||
}
|
||||
goto LOADLITERAL;
|
||||
}
|
||||
|
||||
/* Not in literal mode. */
|
||||
|
||||
if (*ptr == CHAR_DOLLAR_SIGN)
|
||||
{
|
||||
BOOL inparens;
|
||||
BOOL inangle;
|
||||
BOOL star;
|
||||
PCRE2_SIZE sublength;
|
||||
PCRE2_UCHAR next;
|
||||
PCRE2_SPTR subptr, subptrend;
|
||||
|
||||
if (++ptr >= repend) goto BAD;
|
||||
if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL;
|
||||
|
||||
special = 0;
|
||||
text1_start = NULL;
|
||||
text1_end = NULL;
|
||||
text2_start = NULL;
|
||||
text2_end = NULL;
|
||||
group = -1;
|
||||
inparens = FALSE;
|
||||
inangle = FALSE;
|
||||
star = FALSE;
|
||||
subptr = NULL;
|
||||
subptrend = NULL;
|
||||
|
||||
/* Special $ sequences, as supported by Perl, JavaScript, .NET and others. */
|
||||
if (next == CHAR_AMPERSAND)
|
||||
{
|
||||
++ptr;
|
||||
group = 0;
|
||||
goto GROUP_SUBSTITUTE;
|
||||
}
|
||||
if (next == CHAR_GRAVE_ACCENT || next == CHAR_APOSTROPHE)
|
||||
{
|
||||
++ptr;
|
||||
rc = pcre2_substring_length_bynumber(match_data, 0, &sublength);
|
||||
if (rc < 0) goto PTREXIT; /* (Sanity-check ovector before reading from it.) */
|
||||
|
||||
if (next == CHAR_GRAVE_ACCENT)
|
||||
{
|
||||
subptr = subject;
|
||||
subptrend = subject + ovector[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
subptr = subject + ovector[1];
|
||||
subptrend = subject + length;
|
||||
}
|
||||
|
||||
goto SUBPTR_SUBSTITUTE;
|
||||
}
|
||||
if (next == CHAR_UNDERSCORE)
|
||||
{
|
||||
/* Java, .NET support $_ for "entire input string". */
|
||||
++ptr;
|
||||
subptr = subject;
|
||||
subptrend = subject + length;
|
||||
goto SUBPTR_SUBSTITUTE;
|
||||
}
|
||||
|
||||
if (next == CHAR_LEFT_CURLY_BRACKET)
|
||||
{
|
||||
if (++ptr >= repend) goto BAD;
|
||||
next = *ptr;
|
||||
inparens = TRUE;
|
||||
}
|
||||
else if (next == CHAR_LESS_THAN_SIGN)
|
||||
{
|
||||
/* JavaScript compatibility syntax, $<name>. Processes only named
|
||||
groups (not numbered) and does not support extensions such as star
|
||||
(you can do ${name} and ${*name}, but not $<*name>). */
|
||||
if (++ptr >= repend) goto BAD;
|
||||
next = *ptr;
|
||||
inangle = TRUE;
|
||||
}
|
||||
|
||||
if (!inangle && next == CHAR_ASTERISK)
|
||||
{
|
||||
if (++ptr >= repend) goto BAD;
|
||||
next = *ptr;
|
||||
star = TRUE;
|
||||
}
|
||||
|
||||
if (!star && !inangle && next >= CHAR_0 && next <= CHAR_9)
|
||||
{
|
||||
group = next - CHAR_0;
|
||||
while (++ptr < repend)
|
||||
{
|
||||
next = *ptr;
|
||||
if (next < CHAR_0 || next > CHAR_9) break;
|
||||
group = group * 10 + (next - CHAR_0);
|
||||
|
||||
/* A check for a number greater than the hightest captured group
|
||||
is sufficient here; no need for a separate overflow check. If unknown
|
||||
groups are to be treated as unset, just skip over any remaining
|
||||
digits and carry on. */
|
||||
|
||||
if (group > code->top_bracket)
|
||||
{
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
|
||||
{
|
||||
while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9);
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
rc = PCRE2_ERROR_NOSUBSTRING;
|
||||
goto PTREXIT;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
PCRE2_SIZE name_len;
|
||||
PCRE2_SPTR name_start = ptr;
|
||||
if (!read_name_subst(&ptr, repend, utf, code->tables + ctypes_offset))
|
||||
goto BAD;
|
||||
name_len = ptr - name_start;
|
||||
memcpy(name, name_start, CU2BYTES(name_len));
|
||||
name[name_len] = 0;
|
||||
}
|
||||
|
||||
next = 0; /* not used or updated after this point */
|
||||
(void)next;
|
||||
|
||||
/* In extended mode we recognize ${name:+set text:unset text} and
|
||||
${name:-default text}. */
|
||||
|
||||
if (inparens)
|
||||
{
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
|
||||
!star && ptr < repend - 2 && *ptr == CHAR_COLON)
|
||||
{
|
||||
special = *(++ptr);
|
||||
if (special != CHAR_PLUS && special != CHAR_MINUS)
|
||||
{
|
||||
rc = PCRE2_ERROR_BADSUBSTITUTION;
|
||||
goto PTREXIT;
|
||||
}
|
||||
|
||||
text1_start = ++ptr;
|
||||
rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS);
|
||||
if (rc != 0) goto PTREXIT;
|
||||
text1_end = ptr;
|
||||
|
||||
if (special == CHAR_PLUS && *ptr == CHAR_COLON)
|
||||
{
|
||||
text2_start = ++ptr;
|
||||
rc = find_text_end(code, &ptr, repend, TRUE);
|
||||
if (rc != 0) goto PTREXIT;
|
||||
text2_end = ptr;
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET)
|
||||
{
|
||||
rc = PCRE2_ERROR_REPMISSINGBRACE;
|
||||
goto PTREXIT;
|
||||
}
|
||||
}
|
||||
|
||||
ptr++;
|
||||
}
|
||||
|
||||
if (inangle)
|
||||
{
|
||||
if (ptr >= repend || *ptr != CHAR_GREATER_THAN_SIGN)
|
||||
goto BAD;
|
||||
ptr++;
|
||||
}
|
||||
|
||||
/* Have found a syntactically correct group number or name, or *name.
|
||||
Only *MARK is currently recognized. */
|
||||
|
||||
if (star)
|
||||
{
|
||||
if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
|
||||
{
|
||||
PCRE2_SPTR mark = pcre2_get_mark(match_data);
|
||||
if (mark != NULL)
|
||||
{
|
||||
/* Peek backwards one code unit to obtain the length of the mark.
|
||||
It can (theoretically) contain an embedded NUL. */
|
||||
fraglength = mark[-1];
|
||||
if (forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE &&
|
||||
substitute_case_callout == NULL)
|
||||
CHECKCASECPY_DEFAULT(mark, fraglength);
|
||||
else
|
||||
CHECKMEMCPY(mark, fraglength);
|
||||
}
|
||||
}
|
||||
else goto BAD;
|
||||
}
|
||||
|
||||
/* Substitute the contents of a group. We don't use substring_copy
|
||||
functions any more, in order to support case forcing. */
|
||||
|
||||
else
|
||||
{
|
||||
GROUP_SUBSTITUTE:
|
||||
/* Find a number for a named group. In case there are duplicate names,
|
||||
search for the first one that is set. If the name is not found when
|
||||
PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a
|
||||
non-existent group. */
|
||||
|
||||
if (group < 0)
|
||||
{
|
||||
PCRE2_SPTR first, last, entry;
|
||||
rc = pcre2_substring_nametable_scan(code, name, &first, &last);
|
||||
if (rc == PCRE2_ERROR_NOSUBSTRING &&
|
||||
(suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
|
||||
{
|
||||
group = code->top_bracket + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rc < 0) goto PTREXIT;
|
||||
for (entry = first; entry <= last; entry += rc)
|
||||
{
|
||||
uint32_t ng = GET2(entry, 0);
|
||||
if (ng < ovector_count)
|
||||
{
|
||||
if (group < 0) group = ng; /* First in ovector */
|
||||
if (ovector[ng*2] != PCRE2_UNSET)
|
||||
{
|
||||
group = ng; /* First that is set */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If group is still negative, it means we did not find a group
|
||||
that is in the ovector. Just set the first group. */
|
||||
|
||||
if (group < 0) group = GET2(first, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* We now have a group that is identified by number. Find the length of
|
||||
the captured string. If a group in a non-special substitution is unset
|
||||
when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */
|
||||
|
||||
rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
|
||||
if (rc < 0)
|
||||
{
|
||||
if (rc == PCRE2_ERROR_NOSUBSTRING &&
|
||||
(suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
|
||||
{
|
||||
rc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
if (rc != PCRE2_ERROR_UNSET) goto PTREXIT; /* Non-unset errors */
|
||||
if (special == 0) /* Plain substitution */
|
||||
{
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue;
|
||||
goto PTREXIT; /* Else error */
|
||||
}
|
||||
}
|
||||
|
||||
/* If special is '+' we have a 'set' and possibly an 'unset' text,
|
||||
both of which are reprocessed when used. If special is '-' we have a
|
||||
default text for when the group is unset; it must be reprocessed. */
|
||||
|
||||
if (special != 0)
|
||||
{
|
||||
if (special == CHAR_MINUS)
|
||||
{
|
||||
if (rc == 0) goto LITERAL_SUBSTITUTE;
|
||||
text2_start = text1_start;
|
||||
text2_end = text1_end;
|
||||
}
|
||||
|
||||
if (ptrstackptr >= PTR_STACK_SIZE) goto BAD;
|
||||
ptrstack[ptrstackptr++] = ptr;
|
||||
ptrstack[ptrstackptr++] = repend;
|
||||
|
||||
if (rc == 0)
|
||||
{
|
||||
ptr = text1_start;
|
||||
repend = text1_end;
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr = text2_start;
|
||||
repend = text2_end;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Otherwise we have a literal substitution of a group's contents. */
|
||||
|
||||
LITERAL_SUBSTITUTE:
|
||||
subptr = subject + ovector[group*2];
|
||||
subptrend = subject + ovector[group*2 + 1];
|
||||
|
||||
/* Substitute a literal string, possibly forcing alphabetic case. */
|
||||
|
||||
SUBPTR_SUBSTITUTE:
|
||||
if (forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE &&
|
||||
substitute_case_callout == NULL)
|
||||
CHECKCASECPY_DEFAULT(subptr, subptrend - subptr);
|
||||
else
|
||||
CHECKMEMCPY(subptr, subptrend - subptr);
|
||||
}
|
||||
} /* End of $ processing */
|
||||
|
||||
/* Handle an escape sequence in extended mode. We can use check_escape()
|
||||
to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but
|
||||
the case-forcing escapes are not supported in pcre2_compile() so must be
|
||||
recognized here. */
|
||||
|
||||
else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
|
||||
*ptr == CHAR_BACKSLASH)
|
||||
{
|
||||
int errorcode;
|
||||
case_state new_forcecase = { PCRE2_SUBSTITUTE_CASE_NONE, FALSE };
|
||||
|
||||
if (ptr < repend - 1) switch (ptr[1])
|
||||
{
|
||||
case CHAR_L:
|
||||
new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_LOWER;
|
||||
new_forcecase.single_char = FALSE;
|
||||
ptr += 2;
|
||||
break;
|
||||
|
||||
case CHAR_l:
|
||||
new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_LOWER;
|
||||
new_forcecase.single_char = TRUE;
|
||||
ptr += 2;
|
||||
if (ptr + 2 < repend && ptr[0] == CHAR_BACKSLASH && ptr[1] == CHAR_U)
|
||||
{
|
||||
/* Perl reverse-title-casing feature for \l\U */
|
||||
new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_REVERSE_TITLE_FIRST;
|
||||
new_forcecase.single_char = FALSE;
|
||||
ptr += 2;
|
||||
}
|
||||
break;
|
||||
|
||||
case CHAR_U:
|
||||
new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_UPPER;
|
||||
new_forcecase.single_char = FALSE;
|
||||
ptr += 2;
|
||||
break;
|
||||
|
||||
case CHAR_u:
|
||||
new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_TITLE_FIRST;
|
||||
new_forcecase.single_char = TRUE;
|
||||
ptr += 2;
|
||||
if (ptr + 2 < repend && ptr[0] == CHAR_BACKSLASH && ptr[1] == CHAR_L)
|
||||
{
|
||||
/* Perl title-casing feature for \u\L */
|
||||
new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_TITLE_FIRST;
|
||||
new_forcecase.single_char = FALSE;
|
||||
ptr += 2;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (new_forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE)
|
||||
{
|
||||
SETFORCECASE:
|
||||
|
||||
/* If the substitute_case_callout is unset, our case-forcing is done
|
||||
immediately. If there is a callout however, then its action is delayed
|
||||
until all the characters have been collected.
|
||||
|
||||
Apply the callout now, before we set the new casing mode. */
|
||||
|
||||
if (substitute_case_callout != NULL &&
|
||||
forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE)
|
||||
DELAYEDFORCECASE();
|
||||
|
||||
forcecase = new_forcecase;
|
||||
casestart_offset = buff_offset;
|
||||
casestart_extra_needed = extra_needed;
|
||||
continue;
|
||||
}
|
||||
|
||||
ptr++; /* Point after \ */
|
||||
rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
|
||||
code->overall_options, code->extra_options, code->top_bracket, FALSE, NULL);
|
||||
if (errorcode != 0) goto BADESCAPE;
|
||||
|
||||
switch(rc)
|
||||
{
|
||||
case ESC_E:
|
||||
goto SETFORCECASE;
|
||||
|
||||
case ESC_Q:
|
||||
escaped_literal = TRUE;
|
||||
continue;
|
||||
|
||||
case 0: /* Data character */
|
||||
case ESC_b: /* \b is backspace in a substitution */
|
||||
case ESC_v: /* \v is vertical tab in a substitution */
|
||||
|
||||
if (rc == ESC_b) ch = CHAR_BS;
|
||||
if (rc == ESC_v) ch = CHAR_VT;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) chlen = PRIV(ord2utf)(ch, temp); else
|
||||
#endif
|
||||
{
|
||||
temp[0] = ch;
|
||||
chlen = 1;
|
||||
}
|
||||
|
||||
if (forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE &&
|
||||
substitute_case_callout == NULL)
|
||||
CHECKCASECPY_DEFAULT(temp, chlen);
|
||||
else
|
||||
CHECKMEMCPY(temp, chlen);
|
||||
continue;
|
||||
|
||||
case ESC_g:
|
||||
{
|
||||
PCRE2_SIZE name_len;
|
||||
PCRE2_SPTR name_start;
|
||||
|
||||
/* Parse the \g<name> form (\g<number> already handled by check_escape) */
|
||||
if (ptr >= repend || *ptr != CHAR_LESS_THAN_SIGN)
|
||||
goto BADESCAPE;
|
||||
++ptr;
|
||||
|
||||
name_start = ptr;
|
||||
if (!read_name_subst(&ptr, repend, utf, code->tables + ctypes_offset))
|
||||
goto BADESCAPE;
|
||||
name_len = ptr - name_start;
|
||||
|
||||
if (ptr >= repend || *ptr != CHAR_GREATER_THAN_SIGN)
|
||||
goto BADESCAPE;
|
||||
++ptr;
|
||||
|
||||
special = 0;
|
||||
group = -1;
|
||||
memcpy(name, name_start, CU2BYTES(name_len));
|
||||
name[name_len] = 0;
|
||||
goto GROUP_SUBSTITUTE;
|
||||
}
|
||||
|
||||
default:
|
||||
if (rc < 0)
|
||||
{
|
||||
special = 0;
|
||||
group = -rc - 1;
|
||||
goto GROUP_SUBSTITUTE;
|
||||
}
|
||||
goto BADESCAPE;
|
||||
}
|
||||
} /* End of backslash processing */
|
||||
|
||||
/* Handle a literal code unit */
|
||||
|
||||
else
|
||||
{
|
||||
PCRE2_SPTR ch_start;
|
||||
|
||||
LOADLITERAL:
|
||||
ch_start = ptr;
|
||||
GETCHARINCTEST(ch, ptr); /* Get character value, increment pointer */
|
||||
(void) ch;
|
||||
|
||||
if (forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE &&
|
||||
substitute_case_callout == NULL)
|
||||
CHECKCASECPY_DEFAULT(ch_start, ptr - ch_start);
|
||||
else
|
||||
CHECKMEMCPY(ch_start, ptr - ch_start);
|
||||
} /* End handling a literal code unit */
|
||||
} /* End of loop for scanning the replacement. */
|
||||
|
||||
/* If the substitute_case_callout is unset, our case-forcing is done
|
||||
immediately. If there is a callout however, then its action is delayed
|
||||
until all the characters have been collected.
|
||||
|
||||
We now clean up any trailing section of the replacement for which we deferred
|
||||
the case-forcing. */
|
||||
|
||||
if (substitute_case_callout != NULL &&
|
||||
forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE)
|
||||
DELAYEDFORCECASE();
|
||||
|
||||
/* The replacement has been copied to the output, or its size has been
|
||||
remembered. Handle the callout if there is one. */
|
||||
|
||||
if (mcontext != NULL && mcontext->substitute_callout != NULL)
|
||||
{
|
||||
/* If we an actual (non-simulated) replacement, do the callout. */
|
||||
|
||||
if (!overflowed)
|
||||
{
|
||||
scb.subscount = subs;
|
||||
scb.output_offsets[1] = buff_offset;
|
||||
rc = mcontext->substitute_callout(&scb,
|
||||
mcontext->substitute_callout_data);
|
||||
|
||||
/* A non-zero return means cancel this substitution. Instead, copy the
|
||||
matched string fragment. */
|
||||
|
||||
if (rc != 0)
|
||||
{
|
||||
PCRE2_SIZE newlength = scb.output_offsets[1] - scb.output_offsets[0];
|
||||
PCRE2_SIZE oldlength = ovector[1] - ovector[0];
|
||||
|
||||
buff_offset -= newlength;
|
||||
lengthleft += newlength;
|
||||
if (!replacement_only) CHECKMEMCPY(subject + ovector[0], oldlength);
|
||||
|
||||
/* A negative return means do not do any more. */
|
||||
|
||||
if (rc < 0) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL);
|
||||
}
|
||||
}
|
||||
|
||||
/* In this interesting case, we cannot do the callout, so it's hard to
|
||||
estimate the required buffer size. What callers want is to be able to make
|
||||
two calls to pcre2_substitute(), once with PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
to discover the buffer size, and then a second and final call. Older
|
||||
versions of PCRE2 violated this assumption, by proceding as if the callout
|
||||
had returned zero - but on the second call to pcre2_substitute() it could
|
||||
return non-zero and then overflow the buffer again. Callers probably don't
|
||||
want to keep on looping to incrementally discover the buffer size. */
|
||||
|
||||
else
|
||||
{
|
||||
PCRE2_SIZE newlength_buf = buff_offset - scb.output_offsets[0];
|
||||
PCRE2_SIZE newlength_extra = extra_needed - sub_start_extra_needed;
|
||||
PCRE2_SIZE newlength =
|
||||
(newlength_extra > ~(PCRE2_SIZE)0 - newlength_buf)? /* Integer overflow */
|
||||
~(PCRE2_SIZE)0 : newlength_buf + newlength_extra; /* Cap the addition */
|
||||
PCRE2_SIZE oldlength = ovector[1] - ovector[0];
|
||||
|
||||
/* Be pessimistic: request whichever buffer size is larger out of
|
||||
accepting or rejecting the substitution. */
|
||||
|
||||
if (oldlength > newlength)
|
||||
{
|
||||
PCRE2_SIZE additional = oldlength - newlength;
|
||||
if (additional > ~(PCRE2_SIZE)0 - extra_needed) /* Integer overflow */
|
||||
goto TOOLARGEREPLACE;
|
||||
extra_needed += additional;
|
||||
}
|
||||
|
||||
/* Proceed as if the callout did not return a negative. A negative
|
||||
effectively rejects all future substitutions, but we want to examine them
|
||||
pessimistically. */
|
||||
}
|
||||
}
|
||||
|
||||
/* Save the details of this match. See above for how this data is used. If we
|
||||
matched an empty string, do the magic for global matches. Update the start
|
||||
offset to point to the rest of the subject string. If we re-used an existing
|
||||
match for the first match, switch to the internal match data block. */
|
||||
|
||||
ovecsave[0] = ovector[0];
|
||||
ovecsave[1] = ovector[1];
|
||||
ovecsave[2] = start_offset;
|
||||
|
||||
goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 :
|
||||
PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
|
||||
start_offset = ovector[1];
|
||||
} while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */
|
||||
|
||||
/* Copy the rest of the subject unless not required, and terminate the output
|
||||
with a binary zero. */
|
||||
|
||||
if (!replacement_only)
|
||||
{
|
||||
fraglength = length - start_offset;
|
||||
CHECKMEMCPY(subject + start_offset, fraglength);
|
||||
}
|
||||
|
||||
temp[0] = 0;
|
||||
CHECKMEMCPY(temp, 1);
|
||||
|
||||
/* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
|
||||
and matching has carried on after a full buffer, in order to compute the length
|
||||
needed. Otherwise, an overflow generates an immediate error return. */
|
||||
|
||||
if (overflowed)
|
||||
{
|
||||
rc = PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
if (extra_needed > ~(PCRE2_SIZE)0 - buff_length) /* Integer overflow */
|
||||
goto TOOLARGEREPLACE;
|
||||
*blength = buff_length + extra_needed;
|
||||
}
|
||||
|
||||
/* After a successful execution, return the number of substitutions and set the
|
||||
length of buffer used, excluding the trailing zero. */
|
||||
|
||||
else
|
||||
{
|
||||
rc = subs;
|
||||
*blength = buff_offset - 1;
|
||||
}
|
||||
|
||||
EXIT:
|
||||
if (internal_match_data != NULL) pcre2_match_data_free(internal_match_data);
|
||||
else match_data->rc = rc;
|
||||
return rc;
|
||||
|
||||
NOROOM:
|
||||
rc = PCRE2_ERROR_NOMEMORY;
|
||||
goto EXIT;
|
||||
|
||||
CASEERROR:
|
||||
rc = PCRE2_ERROR_REPLACECASE;
|
||||
goto EXIT;
|
||||
|
||||
TOOLARGEREPLACE:
|
||||
rc = PCRE2_ERROR_TOOLARGEREPLACE;
|
||||
goto EXIT;
|
||||
|
||||
BAD:
|
||||
rc = PCRE2_ERROR_BADREPLACEMENT;
|
||||
goto PTREXIT;
|
||||
|
||||
BADESCAPE:
|
||||
rc = PCRE2_ERROR_BADREPESCAPE;
|
||||
|
||||
PTREXIT:
|
||||
*blength = (PCRE2_SIZE)(ptr - replacement);
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* End of pcre2_substitute.c */
|
||||
@@ -1,550 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy named captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer,
|
||||
identifying it by name. If the regex permits duplicate names, the first
|
||||
substring that is set is chosen.
|
||||
|
||||
Arguments:
|
||||
match_data points to the match data
|
||||
stringname the name of the required substring
|
||||
buffer where to put the substring
|
||||
sizeptr the size of the buffer, updated to the size of the substring
|
||||
|
||||
Returns: if successful: zero
|
||||
if not successful, a negative error code:
|
||||
(1) an error from nametable_scan()
|
||||
(2) an error from copy_bynumber()
|
||||
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
|
||||
(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_copy_byname(pcre2_match_data *match_data, PCRE2_SPTR stringname,
|
||||
PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SPTR first, last, entry;
|
||||
int failrc, entrysize;
|
||||
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
|
||||
return PCRE2_ERROR_DFA_UFUNC;
|
||||
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
|
||||
&first, &last);
|
||||
if (entrysize < 0) return entrysize;
|
||||
failrc = PCRE2_ERROR_UNAVAILABLE;
|
||||
for (entry = first; entry <= last; entry += entrysize)
|
||||
{
|
||||
uint32_t n = GET2(entry, 0);
|
||||
if (n < match_data->oveccount)
|
||||
{
|
||||
if (match_data->ovector[n*2] != PCRE2_UNSET)
|
||||
return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
|
||||
failrc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
}
|
||||
return failrc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy numbered captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer,
|
||||
identifying it by number.
|
||||
|
||||
Arguments:
|
||||
match_data points to the match data
|
||||
stringnumber the number of the required substring
|
||||
buffer where to put the substring
|
||||
sizeptr the size of the buffer, updated to the size of the substring
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful, a negative error code:
|
||||
PCRE2_ERROR_NOMEMORY: buffer too small
|
||||
PCRE2_ERROR_NOSUBSTRING: no such substring
|
||||
PCRE2_ERROR_UNAVAILABLE: ovector too small
|
||||
PCRE2_ERROR_UNSET: substring is not set
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
|
||||
uint32_t stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
int rc;
|
||||
PCRE2_SIZE size;
|
||||
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
|
||||
if (rc < 0) return rc;
|
||||
if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
|
||||
memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2],
|
||||
CU2BYTES(size));
|
||||
buffer[size] = 0;
|
||||
*sizeptr = size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Extract named captured string *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring, identified by name, into
|
||||
new memory. If the regex permits duplicate names, the first substring that is
|
||||
set is chosen.
|
||||
|
||||
Arguments:
|
||||
match_data pointer to match_data
|
||||
stringname the name of the required substring
|
||||
stringptr where to put the pointer to the new memory
|
||||
sizeptr where to put the length of the substring
|
||||
|
||||
Returns: if successful: zero
|
||||
if not successful, a negative value:
|
||||
(1) an error from nametable_scan()
|
||||
(2) an error from get_bynumber()
|
||||
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
|
||||
(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_get_byname(pcre2_match_data *match_data,
|
||||
PCRE2_SPTR stringname, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SPTR first, last, entry;
|
||||
int failrc, entrysize;
|
||||
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
|
||||
return PCRE2_ERROR_DFA_UFUNC;
|
||||
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
|
||||
&first, &last);
|
||||
if (entrysize < 0) return entrysize;
|
||||
failrc = PCRE2_ERROR_UNAVAILABLE;
|
||||
for (entry = first; entry <= last; entry += entrysize)
|
||||
{
|
||||
uint32_t n = GET2(entry, 0);
|
||||
if (n < match_data->oveccount)
|
||||
{
|
||||
if (match_data->ovector[n*2] != PCRE2_UNSET)
|
||||
return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
|
||||
failrc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
}
|
||||
return failrc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Extract captured string to new memory *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a piece of new
|
||||
memory.
|
||||
|
||||
Arguments:
|
||||
match_data points to match data
|
||||
stringnumber the number of the required substring
|
||||
stringptr where to put a pointer to the new memory
|
||||
sizeptr where to put the size of the substring
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful, a negative error code:
|
||||
PCRE2_ERROR_NOMEMORY: failed to get memory
|
||||
PCRE2_ERROR_NOSUBSTRING: no such substring
|
||||
PCRE2_ERROR_UNAVAILABLE: ovector too small
|
||||
PCRE2_ERROR_UNSET: substring is not set
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_get_bynumber(pcre2_match_data *match_data,
|
||||
uint32_t stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
int rc;
|
||||
PCRE2_SIZE size;
|
||||
PCRE2_UCHAR *yield;
|
||||
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
|
||||
if (rc < 0) return rc;
|
||||
yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
|
||||
(size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
|
||||
if (yield == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl));
|
||||
memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2],
|
||||
CU2BYTES(size));
|
||||
yield[size] = 0;
|
||||
*stringptr = yield;
|
||||
*sizeptr = size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free memory obtained by get_substring *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Argument: the result of a previous pcre2_substring_get_byxxx()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_free(PCRE2_UCHAR *string)
|
||||
{
|
||||
if (string != NULL)
|
||||
{
|
||||
pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl));
|
||||
memctl->free(memctl, memctl->memory_data);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get length of a named substring *
|
||||
*************************************************/
|
||||
|
||||
/* This function returns the length of a named captured substring. If the regex
|
||||
permits duplicate names, the first substring that is set is chosen.
|
||||
|
||||
Arguments:
|
||||
match_data pointer to match data
|
||||
stringname the name of the required substring
|
||||
sizeptr where to put the length
|
||||
|
||||
Returns: 0 if successful, else a negative error number
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_length_byname(pcre2_match_data *match_data,
|
||||
PCRE2_SPTR stringname, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SPTR first, last, entry;
|
||||
int failrc, entrysize;
|
||||
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
|
||||
return PCRE2_ERROR_DFA_UFUNC;
|
||||
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
|
||||
&first, &last);
|
||||
if (entrysize < 0) return entrysize;
|
||||
failrc = PCRE2_ERROR_UNAVAILABLE;
|
||||
for (entry = first; entry <= last; entry += entrysize)
|
||||
{
|
||||
uint32_t n = GET2(entry, 0);
|
||||
if (n < match_data->oveccount)
|
||||
{
|
||||
if (match_data->ovector[n*2] != PCRE2_UNSET)
|
||||
return pcre2_substring_length_bynumber(match_data, n, sizeptr);
|
||||
failrc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
}
|
||||
return failrc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get length of a numbered substring *
|
||||
*************************************************/
|
||||
|
||||
/* This function returns the length of a captured substring. If the start is
|
||||
beyond the end (which can happen when \K is used in an assertion), it sets the
|
||||
length to zero.
|
||||
|
||||
Arguments:
|
||||
match_data pointer to match data
|
||||
stringnumber the number of the required substring
|
||||
sizeptr where to put the length, if not NULL
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful, a negative error code:
|
||||
PCRE2_ERROR_NOSUBSTRING: no such substring
|
||||
PCRE2_ERROR_UNAVAILABLE: ovector is too small
|
||||
PCRE2_ERROR_UNSET: substring is not set
|
||||
PCRE2_ERROR_INVALIDOFFSET: internal error, should not occur
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_length_bynumber(pcre2_match_data *match_data,
|
||||
uint32_t stringnumber, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SIZE left, right;
|
||||
int count = match_data->rc;
|
||||
if (count == PCRE2_ERROR_PARTIAL)
|
||||
{
|
||||
if (stringnumber > 0) return PCRE2_ERROR_PARTIAL;
|
||||
count = 0;
|
||||
}
|
||||
else if (count < 0) return count; /* Match failed */
|
||||
|
||||
if (match_data->matchedby != PCRE2_MATCHEDBY_DFA_INTERPRETER)
|
||||
{
|
||||
if (stringnumber > match_data->code->top_bracket)
|
||||
return PCRE2_ERROR_NOSUBSTRING;
|
||||
if (stringnumber >= match_data->oveccount)
|
||||
return PCRE2_ERROR_UNAVAILABLE;
|
||||
if (match_data->ovector[stringnumber*2] == PCRE2_UNSET)
|
||||
return PCRE2_ERROR_UNSET;
|
||||
}
|
||||
else /* Matched using pcre2_dfa_match() */
|
||||
{
|
||||
if (stringnumber >= match_data->oveccount) return PCRE2_ERROR_UNAVAILABLE;
|
||||
if (count != 0 && stringnumber >= (uint32_t)count) return PCRE2_ERROR_UNSET;
|
||||
}
|
||||
|
||||
left = match_data->ovector[stringnumber*2];
|
||||
right = match_data->ovector[stringnumber*2+1];
|
||||
if (left > match_data->subject_length || right > match_data->subject_length)
|
||||
return PCRE2_ERROR_INVALIDOFFSET;
|
||||
if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Extract all captured strings to new memory *
|
||||
*************************************************/
|
||||
|
||||
/* This function gets one chunk of memory and builds a list of pointers and all
|
||||
the captured substrings in it. A NULL pointer is put on the end of the list.
|
||||
The substrings are zero-terminated, but also, if the final argument is
|
||||
non-NULL, a list of lengths is also returned. This allows binary data to be
|
||||
handled.
|
||||
|
||||
Arguments:
|
||||
match_data points to the match data
|
||||
listptr set to point to the list of pointers
|
||||
lengthsptr set to point to the list of lengths (may be NULL)
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful, a negative error code:
|
||||
PCRE2_ERROR_NOMEMORY: failed to get memory,
|
||||
or a match failure code
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
|
||||
PCRE2_SIZE **lengthsptr)
|
||||
{
|
||||
int i, count, count2;
|
||||
PCRE2_SIZE size;
|
||||
PCRE2_SIZE *lensp;
|
||||
pcre2_memctl *memp;
|
||||
PCRE2_UCHAR **listp;
|
||||
PCRE2_UCHAR *sp;
|
||||
PCRE2_SIZE *ovector;
|
||||
|
||||
if ((count = match_data->rc) < 0) return count; /* Match failed */
|
||||
if (count == 0) count = match_data->oveccount; /* Ovector too small */
|
||||
|
||||
count2 = 2*count;
|
||||
ovector = match_data->ovector;
|
||||
size = sizeof(pcre2_memctl) + sizeof(PCRE2_UCHAR *); /* For final NULL */
|
||||
if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count; /* For lengths */
|
||||
|
||||
for (i = 0; i < count2; i += 2)
|
||||
{
|
||||
size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1);
|
||||
if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]);
|
||||
}
|
||||
|
||||
memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
|
||||
if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
*listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
|
||||
lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));
|
||||
|
||||
if (lengthsptr == NULL)
|
||||
{
|
||||
sp = (PCRE2_UCHAR *)lensp;
|
||||
lensp = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
*lengthsptr = lensp;
|
||||
sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
|
||||
}
|
||||
|
||||
for (i = 0; i < count2; i += 2)
|
||||
{
|
||||
size = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
|
||||
|
||||
/* Size == 0 includes the case when the capture is unset. Avoid adding
|
||||
PCRE2_UNSET to match_data->subject because it overflows, even though with
|
||||
zero size calling memcpy() is harmless. */
|
||||
|
||||
if (size != 0) memcpy(sp, match_data->subject + ovector[i], CU2BYTES(size));
|
||||
*listp++ = sp;
|
||||
if (lensp != NULL) *lensp++ = size;
|
||||
sp += size;
|
||||
*sp++ = 0;
|
||||
}
|
||||
|
||||
*listp = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free memory obtained by substring_list_get *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Argument: the result of a previous pcre2_substring_list_get()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_list_free(PCRE2_UCHAR **list)
|
||||
{
|
||||
if (list != NULL)
|
||||
{
|
||||
pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl));
|
||||
memctl->free(memctl, memctl->memory_data);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find (multiple) entries for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This function scans the nametable for a given name, using binary chop. It
|
||||
returns either two pointers to the entries in the table, or, if no pointers are
|
||||
given, the number of a unique group with the given name. If duplicate names are
|
||||
permitted, and the name is not unique, an error is generated.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose entries required
|
||||
firstptr where to put the pointer to the first entry
|
||||
lastptr where to put the pointer to the last entry
|
||||
|
||||
Returns: PCRE2_ERROR_NOSUBSTRING if the name is not found
|
||||
otherwise, if firstptr and lastptr are NULL:
|
||||
a group number for a unique substring
|
||||
else PCRE2_ERROR_NOUNIQUESUBSTRING
|
||||
otherwise:
|
||||
the length of each entry, having set firstptr and lastptr
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname,
|
||||
PCRE2_SPTR *firstptr, PCRE2_SPTR *lastptr)
|
||||
{
|
||||
uint16_t bot = 0;
|
||||
uint16_t top = code->name_count;
|
||||
uint16_t entrysize = code->name_entry_size;
|
||||
PCRE2_SPTR nametable = (PCRE2_SPTR)((const char *)code + sizeof(pcre2_real_code));
|
||||
|
||||
while (top > bot)
|
||||
{
|
||||
uint16_t mid = (top + bot) / 2;
|
||||
PCRE2_SPTR entry = nametable + entrysize*mid;
|
||||
int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);
|
||||
if (c == 0)
|
||||
{
|
||||
PCRE2_SPTR first;
|
||||
PCRE2_SPTR last;
|
||||
PCRE2_SPTR lastentry;
|
||||
lastentry = nametable + entrysize * (code->name_count - 1);
|
||||
first = last = entry;
|
||||
while (first > nametable)
|
||||
{
|
||||
if (PRIV(strcmp)(stringname, (first - entrysize + IMM2_SIZE)) != 0) break;
|
||||
first -= entrysize;
|
||||
}
|
||||
while (last < lastentry)
|
||||
{
|
||||
if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;
|
||||
last += entrysize;
|
||||
}
|
||||
if (firstptr == NULL) return (first == last)?
|
||||
(int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;
|
||||
*firstptr = first;
|
||||
*lastptr = last;
|
||||
return entrysize;
|
||||
}
|
||||
if (c > 0) bot = mid + 1; else top = mid;
|
||||
}
|
||||
|
||||
return PCRE2_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find number for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is a convenience wrapper for pcre2_substring_nametable_scan()
|
||||
when it is known that names are unique. If there are duplicate names, it is not
|
||||
defined which number is returned.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose number is required
|
||||
|
||||
Returns: the number of the named parenthesis, or a negative number
|
||||
PCRE2_ERROR_NOSUBSTRING if not found
|
||||
PCRE2_ERROR_NOUNIQUESUBSTRING if not unique
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_number_from_name(const pcre2_code *code,
|
||||
PCRE2_SPTR stringname)
|
||||
{
|
||||
return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);
|
||||
}
|
||||
|
||||
/* End of pcre2_substring.c */
|
||||
@@ -1,234 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains some fixed tables that are used by more than one of the
|
||||
PCRE2 code modules. The tables are also #included by the pcre2test program,
|
||||
which uses macros to change their names from _pcre2_xxx to xxxx, thereby
|
||||
avoiding name clashes with the library. In this case, PCRE2_PCRE2TEST is
|
||||
defined. */
|
||||
|
||||
#ifndef PCRE2_PCRE2TEST /* We're compiling the library */
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
#include "pcre2_internal.h"
|
||||
#endif /* PCRE2_PCRE2TEST */
|
||||
|
||||
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||
the definition is next to the definition of the opcodes in pcre2_internal.h.
|
||||
This is mode-dependent, so it is skipped when this file is included by
|
||||
pcre2test. */
|
||||
|
||||
#ifndef PCRE2_PCRE2TEST
|
||||
const uint8_t PRIV(OP_lengths)[] = { OP_LENGTHS };
|
||||
#endif
|
||||
|
||||
/* Tables of horizontal and vertical whitespace characters, suitable for
|
||||
adding to classes. */
|
||||
|
||||
const uint32_t PRIV(hspace_list)[] = { HSPACE_LIST };
|
||||
const uint32_t PRIV(vspace_list)[] = { VSPACE_LIST };
|
||||
|
||||
/* These tables are the pairs of delimiters that are valid for callout string
|
||||
arguments. For each starting delimiter there must be a matching ending
|
||||
delimiter, which in fact is different only for bracket-like delimiters. */
|
||||
|
||||
const uint32_t PRIV(callout_start_delims)[] = {
|
||||
CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
|
||||
CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
|
||||
CHAR_DOLLAR_SIGN, CHAR_LEFT_CURLY_BRACKET, 0 };
|
||||
|
||||
const uint32_t PRIV(callout_end_delims[]) = {
|
||||
CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
|
||||
CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
|
||||
CHAR_DOLLAR_SIGN, CHAR_RIGHT_CURLY_BRACKET, 0 };
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Tables for UTF-8 support *
|
||||
*************************************************/
|
||||
|
||||
/* These tables are required by pcre2test in 16- or 32-bit mode, as well
|
||||
as for the library in 8-bit mode, because pcre2test uses UTF-8 internally for
|
||||
handling wide characters. */
|
||||
|
||||
#if defined PCRE2_PCRE2TEST || \
|
||||
(defined SUPPORT_UNICODE && \
|
||||
defined PCRE2_CODE_UNIT_WIDTH && \
|
||||
PCRE2_CODE_UNIT_WIDTH == 8)
|
||||
|
||||
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
||||
character. */
|
||||
|
||||
const int PRIV(utf8_table1)[] =
|
||||
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
||||
|
||||
const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int);
|
||||
|
||||
/* These are the indicator bits and the mask for the data bits to set in the
|
||||
first byte of a character, indexed by the number of additional bytes. */
|
||||
|
||||
const int PRIV(utf8_table2)[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
|
||||
const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
|
||||
|
||||
/* Table of the number of extra bytes, indexed by the first byte masked with
|
||||
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
|
||||
|
||||
const uint8_t PRIV(utf8_table4)[] = {
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
||||
|
||||
#endif /* UTF-8 support needed */
|
||||
|
||||
/* Tables concerned with Unicode properties are relevant only when Unicode
|
||||
support is enabled. See also the pcre2_ucptables.c file, which is generated by
|
||||
a Python script from Unicode data files. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
|
||||
/* Table to translate from particular type value to the general value. */
|
||||
|
||||
const uint32_t PRIV(ucp_gentype)[] = {
|
||||
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
|
||||
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
|
||||
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
|
||||
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
|
||||
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
|
||||
ucp_P, ucp_P, /* Ps, Po */
|
||||
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
|
||||
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
|
||||
};
|
||||
|
||||
/* This table encodes the rules for finding the end of an extended grapheme
|
||||
cluster. Every code point has a grapheme break property which is one of the
|
||||
ucp_gbXX values defined in pcre2_ucp.h. These changed between Unicode versions
|
||||
10 and 11. The 2-dimensional table is indexed by the properties of two adjacent
|
||||
code points. The left property selects a word from the table, and the right
|
||||
property selects a bit from that word like this:
|
||||
|
||||
PRIV(ucp_gbtable)[left-property] & (1u << right-property)
|
||||
|
||||
The value is non-zero if a grapheme break is NOT permitted between the relevant
|
||||
two code points. The breaking rules are as follows:
|
||||
|
||||
1. Break at the start and end of text (pretty obviously).
|
||||
|
||||
2. Do not break between a CR and LF; otherwise, break before and after
|
||||
controls.
|
||||
|
||||
3. Do not break Hangul syllable sequences, the rules for which are:
|
||||
|
||||
L may be followed by L, V, LV or LVT
|
||||
LV or V may be followed by V or T
|
||||
LVT or T may be followed by T
|
||||
|
||||
4. Do not break before extending characters or zero-width-joiner (ZWJ).
|
||||
|
||||
The following rules are only for extended grapheme clusters (but that's what we
|
||||
are implementing).
|
||||
|
||||
5. Do not break before SpacingMarks.
|
||||
|
||||
6. Do not break after Prepend characters.
|
||||
|
||||
7. Do not break within emoji modifier sequences or emoji zwj sequences. That
|
||||
is, do not break between characters with the Extended_Pictographic property
|
||||
if a ZWJ intervenes. Extend characters are allowed between the characters;
|
||||
this cannot be represented in this table, the code has to deal with it.
|
||||
|
||||
8. Do not break within emoji flag sequences. That is, do not break between
|
||||
regional indicator (RI) symbols if there are an odd number of RI characters
|
||||
before the break point. This table encodes "join RI characters"; the code
|
||||
has to deal with checking for previous adjoining RIs.
|
||||
|
||||
9. Otherwise, break everywhere.
|
||||
*/
|
||||
|
||||
#define ESZ (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbZWJ)
|
||||
|
||||
const uint32_t PRIV(ucp_gbtable)[] = {
|
||||
(1u<<ucp_gbLF), /* 0 CR */
|
||||
0, /* 1 LF */
|
||||
0, /* 2 Control */
|
||||
ESZ, /* 3 Extend */
|
||||
ESZ|(1u<<ucp_gbPrepend)| /* 4 Prepend */
|
||||
(1u<<ucp_gbL)|(1u<<ucp_gbV)|(1u<<ucp_gbT)|
|
||||
(1u<<ucp_gbLV)|(1u<<ucp_gbLVT)|(1u<<ucp_gbOther)|
|
||||
(1u<<ucp_gbRegional_Indicator),
|
||||
ESZ, /* 5 SpacingMark */
|
||||
ESZ|(1u<<ucp_gbL)|(1u<<ucp_gbV)|(1u<<ucp_gbLV)| /* 6 L */
|
||||
(1u<<ucp_gbLVT),
|
||||
ESZ|(1u<<ucp_gbV)|(1u<<ucp_gbT), /* 7 V */
|
||||
ESZ|(1u<<ucp_gbT), /* 8 T */
|
||||
ESZ|(1u<<ucp_gbV)|(1u<<ucp_gbT), /* 9 LV */
|
||||
ESZ|(1u<<ucp_gbT), /* 10 LVT */
|
||||
(1u<<ucp_gbRegional_Indicator), /* 11 Regional Indicator */
|
||||
ESZ, /* 12 Other */
|
||||
ESZ|(1u<<ucp_gbExtended_Pictographic), /* 13 ZWJ */
|
||||
ESZ /* 14 Extended Pictographic */
|
||||
};
|
||||
|
||||
#undef ESZ
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
/* This table reverses PRIV(ucp_gentype). We can save the cost
|
||||
of a memory load. */
|
||||
|
||||
const int PRIV(ucp_typerange)[] = {
|
||||
ucp_Cc, ucp_Cs,
|
||||
ucp_Ll, ucp_Lu,
|
||||
ucp_Mc, ucp_Mn,
|
||||
ucp_Nd, ucp_No,
|
||||
ucp_Pc, ucp_Ps,
|
||||
ucp_Sc, ucp_So,
|
||||
ucp_Zl, ucp_Zs,
|
||||
};
|
||||
#endif /* SUPPORT_JIT */
|
||||
|
||||
/* Finally, include the tables that are auto-generated from the Unicode data
|
||||
files. */
|
||||
|
||||
#include "pcre2_ucptables.c"
|
||||
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* End of pcre2_tables.c */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,408 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2022 University of Cambridge
|
||||
|
||||
This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY!
|
||||
Instead, modify the maint/GenerateUcpHeader.py script and run it to generate
|
||||
a new version of this code.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
|
||||
#define PCRE2_UCP_H_IDEMPOTENT_GUARD
|
||||
|
||||
/* This file contains definitions of the Unicode property values that are
|
||||
returned by the UCD access macros and used throughout PCRE2.
|
||||
|
||||
IMPORTANT: The specific values of the first two enums (general and particular
|
||||
character categories) are assumed by the table called catposstab in the file
|
||||
pcre2_auto_possess.c. They are unlikely to change, but should be checked after
|
||||
an update. */
|
||||
|
||||
/* These are the general character categories. */
|
||||
|
||||
enum {
|
||||
ucp_C,
|
||||
ucp_L,
|
||||
ucp_M,
|
||||
ucp_N,
|
||||
ucp_P,
|
||||
ucp_S,
|
||||
ucp_Z,
|
||||
};
|
||||
|
||||
/* These are the particular character categories. */
|
||||
|
||||
enum {
|
||||
ucp_Cc, /* Control */
|
||||
ucp_Cf, /* Format */
|
||||
ucp_Cn, /* Unassigned */
|
||||
ucp_Co, /* Private use */
|
||||
ucp_Cs, /* Surrogate */
|
||||
ucp_Ll, /* Lower case letter */
|
||||
ucp_Lm, /* Modifier letter */
|
||||
ucp_Lo, /* Other letter */
|
||||
ucp_Lt, /* Title case letter */
|
||||
ucp_Lu, /* Upper case letter */
|
||||
ucp_Mc, /* Spacing mark */
|
||||
ucp_Me, /* Enclosing mark */
|
||||
ucp_Mn, /* Non-spacing mark */
|
||||
ucp_Nd, /* Decimal number */
|
||||
ucp_Nl, /* Letter number */
|
||||
ucp_No, /* Other number */
|
||||
ucp_Pc, /* Connector punctuation */
|
||||
ucp_Pd, /* Dash punctuation */
|
||||
ucp_Pe, /* Close punctuation */
|
||||
ucp_Pf, /* Final punctuation */
|
||||
ucp_Pi, /* Initial punctuation */
|
||||
ucp_Po, /* Other punctuation */
|
||||
ucp_Ps, /* Open punctuation */
|
||||
ucp_Sc, /* Currency symbol */
|
||||
ucp_Sk, /* Modifier symbol */
|
||||
ucp_Sm, /* Mathematical symbol */
|
||||
ucp_So, /* Other symbol */
|
||||
ucp_Zl, /* Line separator */
|
||||
ucp_Zp, /* Paragraph separator */
|
||||
ucp_Zs, /* Space separator */
|
||||
};
|
||||
|
||||
/* These are Boolean properties. */
|
||||
|
||||
enum {
|
||||
ucp_ASCII,
|
||||
ucp_ASCII_Hex_Digit,
|
||||
ucp_Alphabetic,
|
||||
ucp_Bidi_Control,
|
||||
ucp_Bidi_Mirrored,
|
||||
ucp_Case_Ignorable,
|
||||
ucp_Cased,
|
||||
ucp_Changes_When_Casefolded,
|
||||
ucp_Changes_When_Casemapped,
|
||||
ucp_Changes_When_Lowercased,
|
||||
ucp_Changes_When_Titlecased,
|
||||
ucp_Changes_When_Uppercased,
|
||||
ucp_Dash,
|
||||
ucp_Default_Ignorable_Code_Point,
|
||||
ucp_Deprecated,
|
||||
ucp_Diacritic,
|
||||
ucp_Emoji,
|
||||
ucp_Emoji_Component,
|
||||
ucp_Emoji_Modifier,
|
||||
ucp_Emoji_Modifier_Base,
|
||||
ucp_Emoji_Presentation,
|
||||
ucp_Extended_Pictographic,
|
||||
ucp_Extender,
|
||||
ucp_Grapheme_Base,
|
||||
ucp_Grapheme_Extend,
|
||||
ucp_Grapheme_Link,
|
||||
ucp_Hex_Digit,
|
||||
ucp_IDS_Binary_Operator,
|
||||
ucp_IDS_Trinary_Operator,
|
||||
ucp_IDS_Unary_Operator,
|
||||
ucp_ID_Compat_Math_Continue,
|
||||
ucp_ID_Compat_Math_Start,
|
||||
ucp_ID_Continue,
|
||||
ucp_ID_Start,
|
||||
ucp_Ideographic,
|
||||
ucp_InCB,
|
||||
ucp_Join_Control,
|
||||
ucp_Logical_Order_Exception,
|
||||
ucp_Lowercase,
|
||||
ucp_Math,
|
||||
ucp_Modifier_Combining_Mark,
|
||||
ucp_Noncharacter_Code_Point,
|
||||
ucp_Pattern_Syntax,
|
||||
ucp_Pattern_White_Space,
|
||||
ucp_Prepended_Concatenation_Mark,
|
||||
ucp_Quotation_Mark,
|
||||
ucp_Radical,
|
||||
ucp_Regional_Indicator,
|
||||
ucp_Sentence_Terminal,
|
||||
ucp_Soft_Dotted,
|
||||
ucp_Terminal_Punctuation,
|
||||
ucp_Unified_Ideograph,
|
||||
ucp_Uppercase,
|
||||
ucp_Variation_Selector,
|
||||
ucp_White_Space,
|
||||
ucp_XID_Continue,
|
||||
ucp_XID_Start,
|
||||
/* This must be last */
|
||||
ucp_Bprop_Count
|
||||
};
|
||||
|
||||
/* Size of entries in ucd_boolprop_sets[] */
|
||||
|
||||
#define ucd_boolprop_sets_item_size 2
|
||||
|
||||
/* These are the bidi class values. */
|
||||
|
||||
enum {
|
||||
ucp_bidiAL, /* Arabic_Letter */
|
||||
ucp_bidiAN, /* Arabic_Number */
|
||||
ucp_bidiB, /* Paragraph_Separator */
|
||||
ucp_bidiBN, /* Boundary_Neutral */
|
||||
ucp_bidiCS, /* Common_Separator */
|
||||
ucp_bidiEN, /* European_Number */
|
||||
ucp_bidiES, /* European_Separator */
|
||||
ucp_bidiET, /* European_Terminator */
|
||||
ucp_bidiFSI, /* First_Strong_Isolate */
|
||||
ucp_bidiL, /* Left_To_Right */
|
||||
ucp_bidiLRE, /* Left_To_Right_Embedding */
|
||||
ucp_bidiLRI, /* Left_To_Right_Isolate */
|
||||
ucp_bidiLRO, /* Left_To_Right_Override */
|
||||
ucp_bidiNSM, /* Nonspacing_Mark */
|
||||
ucp_bidiON, /* Other_Neutral */
|
||||
ucp_bidiPDF, /* Pop_Directional_Format */
|
||||
ucp_bidiPDI, /* Pop_Directional_Isolate */
|
||||
ucp_bidiR, /* Right_To_Left */
|
||||
ucp_bidiRLE, /* Right_To_Left_Embedding */
|
||||
ucp_bidiRLI, /* Right_To_Left_Isolate */
|
||||
ucp_bidiRLO, /* Right_To_Left_Override */
|
||||
ucp_bidiS, /* Segment_Separator */
|
||||
ucp_bidiWS, /* White_Space */
|
||||
};
|
||||
|
||||
/* These are grapheme break properties. The Extended Pictographic property
|
||||
comes from the emoji-data.txt file. */
|
||||
|
||||
enum {
|
||||
ucp_gbCR, /* 0 */
|
||||
ucp_gbLF, /* 1 */
|
||||
ucp_gbControl, /* 2 */
|
||||
ucp_gbExtend, /* 3 */
|
||||
ucp_gbPrepend, /* 4 */
|
||||
ucp_gbSpacingMark, /* 5 */
|
||||
ucp_gbL, /* 6 Hangul syllable type L */
|
||||
ucp_gbV, /* 7 Hangul syllable type V */
|
||||
ucp_gbT, /* 8 Hangul syllable type T */
|
||||
ucp_gbLV, /* 9 Hangul syllable type LV */
|
||||
ucp_gbLVT, /* 10 Hangul syllable type LVT */
|
||||
ucp_gbRegional_Indicator, /* 11 */
|
||||
ucp_gbOther, /* 12 */
|
||||
ucp_gbZWJ, /* 13 */
|
||||
ucp_gbExtended_Pictographic, /* 14 */
|
||||
};
|
||||
|
||||
/* These are the script identifications. */
|
||||
|
||||
enum {
|
||||
/* Scripts which has characters in other scripts. */
|
||||
ucp_Latin,
|
||||
ucp_Greek,
|
||||
ucp_Cyrillic,
|
||||
ucp_Armenian,
|
||||
ucp_Hebrew,
|
||||
ucp_Arabic,
|
||||
ucp_Syriac,
|
||||
ucp_Thaana,
|
||||
ucp_Devanagari,
|
||||
ucp_Bengali,
|
||||
ucp_Gurmukhi,
|
||||
ucp_Gujarati,
|
||||
ucp_Oriya,
|
||||
ucp_Tamil,
|
||||
ucp_Telugu,
|
||||
ucp_Kannada,
|
||||
ucp_Malayalam,
|
||||
ucp_Sinhala,
|
||||
ucp_Thai,
|
||||
ucp_Tibetan,
|
||||
ucp_Myanmar,
|
||||
ucp_Georgian,
|
||||
ucp_Hangul,
|
||||
ucp_Ethiopic,
|
||||
ucp_Cherokee,
|
||||
ucp_Runic,
|
||||
ucp_Mongolian,
|
||||
ucp_Hiragana,
|
||||
ucp_Katakana,
|
||||
ucp_Bopomofo,
|
||||
ucp_Han,
|
||||
ucp_Yi,
|
||||
ucp_Gothic,
|
||||
ucp_Tagalog,
|
||||
ucp_Hanunoo,
|
||||
ucp_Buhid,
|
||||
ucp_Tagbanwa,
|
||||
ucp_Limbu,
|
||||
ucp_Tai_Le,
|
||||
ucp_Linear_B,
|
||||
ucp_Shavian,
|
||||
ucp_Cypriot,
|
||||
ucp_Buginese,
|
||||
ucp_Coptic,
|
||||
ucp_Glagolitic,
|
||||
ucp_Tifinagh,
|
||||
ucp_Syloti_Nagri,
|
||||
ucp_Phags_Pa,
|
||||
ucp_Nko,
|
||||
ucp_Kayah_Li,
|
||||
ucp_Lycian,
|
||||
ucp_Carian,
|
||||
ucp_Lydian,
|
||||
ucp_Avestan,
|
||||
ucp_Samaritan,
|
||||
ucp_Lisu,
|
||||
ucp_Javanese,
|
||||
ucp_Old_Turkic,
|
||||
ucp_Kaithi,
|
||||
ucp_Mandaic,
|
||||
ucp_Chakma,
|
||||
ucp_Meroitic_Hieroglyphs,
|
||||
ucp_Sharada,
|
||||
ucp_Takri,
|
||||
ucp_Caucasian_Albanian,
|
||||
ucp_Duployan,
|
||||
ucp_Elbasan,
|
||||
ucp_Grantha,
|
||||
ucp_Khojki,
|
||||
ucp_Linear_A,
|
||||
ucp_Mahajani,
|
||||
ucp_Manichaean,
|
||||
ucp_Modi,
|
||||
ucp_Old_Permic,
|
||||
ucp_Psalter_Pahlavi,
|
||||
ucp_Khudawadi,
|
||||
ucp_Tirhuta,
|
||||
ucp_Multani,
|
||||
ucp_Old_Hungarian,
|
||||
ucp_Adlam,
|
||||
ucp_Osage,
|
||||
ucp_Tangut,
|
||||
ucp_Masaram_Gondi,
|
||||
ucp_Dogra,
|
||||
ucp_Gunjala_Gondi,
|
||||
ucp_Hanifi_Rohingya,
|
||||
ucp_Sogdian,
|
||||
ucp_Nandinagari,
|
||||
ucp_Yezidi,
|
||||
ucp_Cypro_Minoan,
|
||||
ucp_Old_Uyghur,
|
||||
ucp_Toto,
|
||||
ucp_Garay,
|
||||
ucp_Gurung_Khema,
|
||||
ucp_Ol_Onal,
|
||||
ucp_Sunuwar,
|
||||
ucp_Todhri,
|
||||
ucp_Tulu_Tigalari,
|
||||
|
||||
/* Scripts which has no characters in other scripts. */
|
||||
ucp_Unknown,
|
||||
ucp_Common,
|
||||
ucp_Lao,
|
||||
ucp_Canadian_Aboriginal,
|
||||
ucp_Ogham,
|
||||
ucp_Khmer,
|
||||
ucp_Old_Italic,
|
||||
ucp_Deseret,
|
||||
ucp_Inherited,
|
||||
ucp_Ugaritic,
|
||||
ucp_Osmanya,
|
||||
ucp_Braille,
|
||||
ucp_New_Tai_Lue,
|
||||
ucp_Old_Persian,
|
||||
ucp_Kharoshthi,
|
||||
ucp_Balinese,
|
||||
ucp_Cuneiform,
|
||||
ucp_Phoenician,
|
||||
ucp_Sundanese,
|
||||
ucp_Lepcha,
|
||||
ucp_Ol_Chiki,
|
||||
ucp_Vai,
|
||||
ucp_Saurashtra,
|
||||
ucp_Rejang,
|
||||
ucp_Cham,
|
||||
ucp_Tai_Tham,
|
||||
ucp_Tai_Viet,
|
||||
ucp_Egyptian_Hieroglyphs,
|
||||
ucp_Bamum,
|
||||
ucp_Meetei_Mayek,
|
||||
ucp_Imperial_Aramaic,
|
||||
ucp_Old_South_Arabian,
|
||||
ucp_Inscriptional_Parthian,
|
||||
ucp_Inscriptional_Pahlavi,
|
||||
ucp_Batak,
|
||||
ucp_Brahmi,
|
||||
ucp_Meroitic_Cursive,
|
||||
ucp_Miao,
|
||||
ucp_Sora_Sompeng,
|
||||
ucp_Bassa_Vah,
|
||||
ucp_Pahawh_Hmong,
|
||||
ucp_Mende_Kikakui,
|
||||
ucp_Mro,
|
||||
ucp_Old_North_Arabian,
|
||||
ucp_Nabataean,
|
||||
ucp_Palmyrene,
|
||||
ucp_Pau_Cin_Hau,
|
||||
ucp_Siddham,
|
||||
ucp_Warang_Citi,
|
||||
ucp_Ahom,
|
||||
ucp_Anatolian_Hieroglyphs,
|
||||
ucp_Hatran,
|
||||
ucp_SignWriting,
|
||||
ucp_Bhaiksuki,
|
||||
ucp_Marchen,
|
||||
ucp_Newa,
|
||||
ucp_Nushu,
|
||||
ucp_Soyombo,
|
||||
ucp_Zanabazar_Square,
|
||||
ucp_Makasar,
|
||||
ucp_Medefaidrin,
|
||||
ucp_Old_Sogdian,
|
||||
ucp_Elymaic,
|
||||
ucp_Nyiakeng_Puachue_Hmong,
|
||||
ucp_Wancho,
|
||||
ucp_Chorasmian,
|
||||
ucp_Dives_Akuru,
|
||||
ucp_Khitan_Small_Script,
|
||||
ucp_Tangsa,
|
||||
ucp_Vithkuqi,
|
||||
ucp_Kawi,
|
||||
ucp_Nag_Mundari,
|
||||
ucp_Kirat_Rai,
|
||||
|
||||
/* This must be last */
|
||||
ucp_Script_Count
|
||||
};
|
||||
|
||||
/* Size of entries in ucd_script_sets[] */
|
||||
|
||||
#define ucd_script_sets_item_size 4
|
||||
|
||||
#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
|
||||
|
||||
/* End of pcre2_ucp.h */
|
||||
@@ -1,1596 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2022 University of Cambridge
|
||||
|
||||
This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY!
|
||||
Instead, modify the maint/GenerateUcpTables.py script and run it to generate
|
||||
a new version of this code.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
|
||||
/* The PRIV(utt)[] table below translates Unicode property names into type and
|
||||
code values. It is searched by binary chop, so must be in collating sequence of
|
||||
name. Originally, the table contained pointers to the name strings in the first
|
||||
field of each entry. However, that leads to a large number of relocations when
|
||||
a shared library is dynamically loaded. A significant reduction is made by
|
||||
putting all the names into a single, large string and using offsets instead.
|
||||
All letters are lower cased, and underscores are removed, in accordance with
|
||||
the "loose matching" rules that Unicode advises and Perl uses. */
|
||||
|
||||
#define STRING_adlam0 STR_a STR_d STR_l STR_a STR_m "\0"
|
||||
#define STRING_adlm0 STR_a STR_d STR_l STR_m "\0"
|
||||
#define STRING_aghb0 STR_a STR_g STR_h STR_b "\0"
|
||||
#define STRING_ahex0 STR_a STR_h STR_e STR_x "\0"
|
||||
#define STRING_ahom0 STR_a STR_h STR_o STR_m "\0"
|
||||
#define STRING_alpha0 STR_a STR_l STR_p STR_h STR_a "\0"
|
||||
#define STRING_alphabetic0 STR_a STR_l STR_p STR_h STR_a STR_b STR_e STR_t STR_i STR_c "\0"
|
||||
#define STRING_anatolianhieroglyphs0 STR_a STR_n STR_a STR_t STR_o STR_l STR_i STR_a STR_n STR_h STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||
#define STRING_any0 STR_a STR_n STR_y "\0"
|
||||
#define STRING_arab0 STR_a STR_r STR_a STR_b "\0"
|
||||
#define STRING_arabic0 STR_a STR_r STR_a STR_b STR_i STR_c "\0"
|
||||
#define STRING_armenian0 STR_a STR_r STR_m STR_e STR_n STR_i STR_a STR_n "\0"
|
||||
#define STRING_armi0 STR_a STR_r STR_m STR_i "\0"
|
||||
#define STRING_armn0 STR_a STR_r STR_m STR_n "\0"
|
||||
#define STRING_ascii0 STR_a STR_s STR_c STR_i STR_i "\0"
|
||||
#define STRING_asciihexdigit0 STR_a STR_s STR_c STR_i STR_i STR_h STR_e STR_x STR_d STR_i STR_g STR_i STR_t "\0"
|
||||
#define STRING_avestan0 STR_a STR_v STR_e STR_s STR_t STR_a STR_n "\0"
|
||||
#define STRING_avst0 STR_a STR_v STR_s STR_t "\0"
|
||||
#define STRING_bali0 STR_b STR_a STR_l STR_i "\0"
|
||||
#define STRING_balinese0 STR_b STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0"
|
||||
#define STRING_bamu0 STR_b STR_a STR_m STR_u "\0"
|
||||
#define STRING_bamum0 STR_b STR_a STR_m STR_u STR_m "\0"
|
||||
#define STRING_bass0 STR_b STR_a STR_s STR_s "\0"
|
||||
#define STRING_bassavah0 STR_b STR_a STR_s STR_s STR_a STR_v STR_a STR_h "\0"
|
||||
#define STRING_batak0 STR_b STR_a STR_t STR_a STR_k "\0"
|
||||
#define STRING_batk0 STR_b STR_a STR_t STR_k "\0"
|
||||
#define STRING_beng0 STR_b STR_e STR_n STR_g "\0"
|
||||
#define STRING_bengali0 STR_b STR_e STR_n STR_g STR_a STR_l STR_i "\0"
|
||||
#define STRING_bhaiksuki0 STR_b STR_h STR_a STR_i STR_k STR_s STR_u STR_k STR_i "\0"
|
||||
#define STRING_bhks0 STR_b STR_h STR_k STR_s "\0"
|
||||
#define STRING_bidial0 STR_b STR_i STR_d STR_i STR_a STR_l "\0"
|
||||
#define STRING_bidian0 STR_b STR_i STR_d STR_i STR_a STR_n "\0"
|
||||
#define STRING_bidib0 STR_b STR_i STR_d STR_i STR_b "\0"
|
||||
#define STRING_bidibn0 STR_b STR_i STR_d STR_i STR_b STR_n "\0"
|
||||
#define STRING_bidic0 STR_b STR_i STR_d STR_i STR_c "\0"
|
||||
#define STRING_bidicontrol0 STR_b STR_i STR_d STR_i STR_c STR_o STR_n STR_t STR_r STR_o STR_l "\0"
|
||||
#define STRING_bidics0 STR_b STR_i STR_d STR_i STR_c STR_s "\0"
|
||||
#define STRING_bidien0 STR_b STR_i STR_d STR_i STR_e STR_n "\0"
|
||||
#define STRING_bidies0 STR_b STR_i STR_d STR_i STR_e STR_s "\0"
|
||||
#define STRING_bidiet0 STR_b STR_i STR_d STR_i STR_e STR_t "\0"
|
||||
#define STRING_bidifsi0 STR_b STR_i STR_d STR_i STR_f STR_s STR_i "\0"
|
||||
#define STRING_bidil0 STR_b STR_i STR_d STR_i STR_l "\0"
|
||||
#define STRING_bidilre0 STR_b STR_i STR_d STR_i STR_l STR_r STR_e "\0"
|
||||
#define STRING_bidilri0 STR_b STR_i STR_d STR_i STR_l STR_r STR_i "\0"
|
||||
#define STRING_bidilro0 STR_b STR_i STR_d STR_i STR_l STR_r STR_o "\0"
|
||||
#define STRING_bidim0 STR_b STR_i STR_d STR_i STR_m "\0"
|
||||
#define STRING_bidimirrored0 STR_b STR_i STR_d STR_i STR_m STR_i STR_r STR_r STR_o STR_r STR_e STR_d "\0"
|
||||
#define STRING_bidinsm0 STR_b STR_i STR_d STR_i STR_n STR_s STR_m "\0"
|
||||
#define STRING_bidion0 STR_b STR_i STR_d STR_i STR_o STR_n "\0"
|
||||
#define STRING_bidipdf0 STR_b STR_i STR_d STR_i STR_p STR_d STR_f "\0"
|
||||
#define STRING_bidipdi0 STR_b STR_i STR_d STR_i STR_p STR_d STR_i "\0"
|
||||
#define STRING_bidir0 STR_b STR_i STR_d STR_i STR_r "\0"
|
||||
#define STRING_bidirle0 STR_b STR_i STR_d STR_i STR_r STR_l STR_e "\0"
|
||||
#define STRING_bidirli0 STR_b STR_i STR_d STR_i STR_r STR_l STR_i "\0"
|
||||
#define STRING_bidirlo0 STR_b STR_i STR_d STR_i STR_r STR_l STR_o "\0"
|
||||
#define STRING_bidis0 STR_b STR_i STR_d STR_i STR_s "\0"
|
||||
#define STRING_bidiws0 STR_b STR_i STR_d STR_i STR_w STR_s "\0"
|
||||
#define STRING_bopo0 STR_b STR_o STR_p STR_o "\0"
|
||||
#define STRING_bopomofo0 STR_b STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"
|
||||
#define STRING_brah0 STR_b STR_r STR_a STR_h "\0"
|
||||
#define STRING_brahmi0 STR_b STR_r STR_a STR_h STR_m STR_i "\0"
|
||||
#define STRING_brai0 STR_b STR_r STR_a STR_i "\0"
|
||||
#define STRING_braille0 STR_b STR_r STR_a STR_i STR_l STR_l STR_e "\0"
|
||||
#define STRING_bugi0 STR_b STR_u STR_g STR_i "\0"
|
||||
#define STRING_buginese0 STR_b STR_u STR_g STR_i STR_n STR_e STR_s STR_e "\0"
|
||||
#define STRING_buhd0 STR_b STR_u STR_h STR_d "\0"
|
||||
#define STRING_buhid0 STR_b STR_u STR_h STR_i STR_d "\0"
|
||||
#define STRING_c0 STR_c "\0"
|
||||
#define STRING_cakm0 STR_c STR_a STR_k STR_m "\0"
|
||||
#define STRING_canadianaboriginal0 STR_c STR_a STR_n STR_a STR_d STR_i STR_a STR_n STR_a STR_b STR_o STR_r STR_i STR_g STR_i STR_n STR_a STR_l "\0"
|
||||
#define STRING_cans0 STR_c STR_a STR_n STR_s "\0"
|
||||
#define STRING_cari0 STR_c STR_a STR_r STR_i "\0"
|
||||
#define STRING_carian0 STR_c STR_a STR_r STR_i STR_a STR_n "\0"
|
||||
#define STRING_cased0 STR_c STR_a STR_s STR_e STR_d "\0"
|
||||
#define STRING_caseignorable0 STR_c STR_a STR_s STR_e STR_i STR_g STR_n STR_o STR_r STR_a STR_b STR_l STR_e "\0"
|
||||
#define STRING_caucasianalbanian0 STR_c STR_a STR_u STR_c STR_a STR_s STR_i STR_a STR_n STR_a STR_l STR_b STR_a STR_n STR_i STR_a STR_n "\0"
|
||||
#define STRING_cc0 STR_c STR_c "\0"
|
||||
#define STRING_cf0 STR_c STR_f "\0"
|
||||
#define STRING_chakma0 STR_c STR_h STR_a STR_k STR_m STR_a "\0"
|
||||
#define STRING_cham0 STR_c STR_h STR_a STR_m "\0"
|
||||
#define STRING_changeswhencasefolded0 STR_c STR_h STR_a STR_n STR_g STR_e STR_s STR_w STR_h STR_e STR_n STR_c STR_a STR_s STR_e STR_f STR_o STR_l STR_d STR_e STR_d "\0"
|
||||
#define STRING_changeswhencasemapped0 STR_c STR_h STR_a STR_n STR_g STR_e STR_s STR_w STR_h STR_e STR_n STR_c STR_a STR_s STR_e STR_m STR_a STR_p STR_p STR_e STR_d "\0"
|
||||
#define STRING_changeswhenlowercased0 STR_c STR_h STR_a STR_n STR_g STR_e STR_s STR_w STR_h STR_e STR_n STR_l STR_o STR_w STR_e STR_r STR_c STR_a STR_s STR_e STR_d "\0"
|
||||
#define STRING_changeswhentitlecased0 STR_c STR_h STR_a STR_n STR_g STR_e STR_s STR_w STR_h STR_e STR_n STR_t STR_i STR_t STR_l STR_e STR_c STR_a STR_s STR_e STR_d "\0"
|
||||
#define STRING_changeswhenuppercased0 STR_c STR_h STR_a STR_n STR_g STR_e STR_s STR_w STR_h STR_e STR_n STR_u STR_p STR_p STR_e STR_r STR_c STR_a STR_s STR_e STR_d "\0"
|
||||
#define STRING_cher0 STR_c STR_h STR_e STR_r "\0"
|
||||
#define STRING_cherokee0 STR_c STR_h STR_e STR_r STR_o STR_k STR_e STR_e "\0"
|
||||
#define STRING_chorasmian0 STR_c STR_h STR_o STR_r STR_a STR_s STR_m STR_i STR_a STR_n "\0"
|
||||
#define STRING_chrs0 STR_c STR_h STR_r STR_s "\0"
|
||||
#define STRING_ci0 STR_c STR_i "\0"
|
||||
#define STRING_cn0 STR_c STR_n "\0"
|
||||
#define STRING_co0 STR_c STR_o "\0"
|
||||
#define STRING_common0 STR_c STR_o STR_m STR_m STR_o STR_n "\0"
|
||||
#define STRING_copt0 STR_c STR_o STR_p STR_t "\0"
|
||||
#define STRING_coptic0 STR_c STR_o STR_p STR_t STR_i STR_c "\0"
|
||||
#define STRING_cpmn0 STR_c STR_p STR_m STR_n "\0"
|
||||
#define STRING_cprt0 STR_c STR_p STR_r STR_t "\0"
|
||||
#define STRING_cs0 STR_c STR_s "\0"
|
||||
#define STRING_cuneiform0 STR_c STR_u STR_n STR_e STR_i STR_f STR_o STR_r STR_m "\0"
|
||||
#define STRING_cwcf0 STR_c STR_w STR_c STR_f "\0"
|
||||
#define STRING_cwcm0 STR_c STR_w STR_c STR_m "\0"
|
||||
#define STRING_cwl0 STR_c STR_w STR_l "\0"
|
||||
#define STRING_cwt0 STR_c STR_w STR_t "\0"
|
||||
#define STRING_cwu0 STR_c STR_w STR_u "\0"
|
||||
#define STRING_cypriot0 STR_c STR_y STR_p STR_r STR_i STR_o STR_t "\0"
|
||||
#define STRING_cyprominoan0 STR_c STR_y STR_p STR_r STR_o STR_m STR_i STR_n STR_o STR_a STR_n "\0"
|
||||
#define STRING_cyrillic0 STR_c STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
|
||||
#define STRING_cyrl0 STR_c STR_y STR_r STR_l "\0"
|
||||
#define STRING_dash0 STR_d STR_a STR_s STR_h "\0"
|
||||
#define STRING_defaultignorablecodepoint0 STR_d STR_e STR_f STR_a STR_u STR_l STR_t STR_i STR_g STR_n STR_o STR_r STR_a STR_b STR_l STR_e STR_c STR_o STR_d STR_e STR_p STR_o STR_i STR_n STR_t "\0"
|
||||
#define STRING_dep0 STR_d STR_e STR_p "\0"
|
||||
#define STRING_deprecated0 STR_d STR_e STR_p STR_r STR_e STR_c STR_a STR_t STR_e STR_d "\0"
|
||||
#define STRING_deseret0 STR_d STR_e STR_s STR_e STR_r STR_e STR_t "\0"
|
||||
#define STRING_deva0 STR_d STR_e STR_v STR_a "\0"
|
||||
#define STRING_devanagari0 STR_d STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
|
||||
#define STRING_di0 STR_d STR_i "\0"
|
||||
#define STRING_dia0 STR_d STR_i STR_a "\0"
|
||||
#define STRING_diacritic0 STR_d STR_i STR_a STR_c STR_r STR_i STR_t STR_i STR_c "\0"
|
||||
#define STRING_diak0 STR_d STR_i STR_a STR_k "\0"
|
||||
#define STRING_divesakuru0 STR_d STR_i STR_v STR_e STR_s STR_a STR_k STR_u STR_r STR_u "\0"
|
||||
#define STRING_dogr0 STR_d STR_o STR_g STR_r "\0"
|
||||
#define STRING_dogra0 STR_d STR_o STR_g STR_r STR_a "\0"
|
||||
#define STRING_dsrt0 STR_d STR_s STR_r STR_t "\0"
|
||||
#define STRING_dupl0 STR_d STR_u STR_p STR_l "\0"
|
||||
#define STRING_duployan0 STR_d STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
|
||||
#define STRING_ebase0 STR_e STR_b STR_a STR_s STR_e "\0"
|
||||
#define STRING_ecomp0 STR_e STR_c STR_o STR_m STR_p "\0"
|
||||
#define STRING_egyp0 STR_e STR_g STR_y STR_p "\0"
|
||||
#define STRING_egyptianhieroglyphs0 STR_e STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_h STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||
#define STRING_elba0 STR_e STR_l STR_b STR_a "\0"
|
||||
#define STRING_elbasan0 STR_e STR_l STR_b STR_a STR_s STR_a STR_n "\0"
|
||||
#define STRING_elym0 STR_e STR_l STR_y STR_m "\0"
|
||||
#define STRING_elymaic0 STR_e STR_l STR_y STR_m STR_a STR_i STR_c "\0"
|
||||
#define STRING_emod0 STR_e STR_m STR_o STR_d "\0"
|
||||
#define STRING_emoji0 STR_e STR_m STR_o STR_j STR_i "\0"
|
||||
#define STRING_emojicomponent0 STR_e STR_m STR_o STR_j STR_i STR_c STR_o STR_m STR_p STR_o STR_n STR_e STR_n STR_t "\0"
|
||||
#define STRING_emojimodifier0 STR_e STR_m STR_o STR_j STR_i STR_m STR_o STR_d STR_i STR_f STR_i STR_e STR_r "\0"
|
||||
#define STRING_emojimodifierbase0 STR_e STR_m STR_o STR_j STR_i STR_m STR_o STR_d STR_i STR_f STR_i STR_e STR_r STR_b STR_a STR_s STR_e "\0"
|
||||
#define STRING_emojipresentation0 STR_e STR_m STR_o STR_j STR_i STR_p STR_r STR_e STR_s STR_e STR_n STR_t STR_a STR_t STR_i STR_o STR_n "\0"
|
||||
#define STRING_epres0 STR_e STR_p STR_r STR_e STR_s "\0"
|
||||
#define STRING_ethi0 STR_e STR_t STR_h STR_i "\0"
|
||||
#define STRING_ethiopic0 STR_e STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0"
|
||||
#define STRING_ext0 STR_e STR_x STR_t "\0"
|
||||
#define STRING_extendedpictographic0 STR_e STR_x STR_t STR_e STR_n STR_d STR_e STR_d STR_p STR_i STR_c STR_t STR_o STR_g STR_r STR_a STR_p STR_h STR_i STR_c "\0"
|
||||
#define STRING_extender0 STR_e STR_x STR_t STR_e STR_n STR_d STR_e STR_r "\0"
|
||||
#define STRING_extpict0 STR_e STR_x STR_t STR_p STR_i STR_c STR_t "\0"
|
||||
#define STRING_gara0 STR_g STR_a STR_r STR_a "\0"
|
||||
#define STRING_garay0 STR_g STR_a STR_r STR_a STR_y "\0"
|
||||
#define STRING_geor0 STR_g STR_e STR_o STR_r "\0"
|
||||
#define STRING_georgian0 STR_g STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0"
|
||||
#define STRING_glag0 STR_g STR_l STR_a STR_g "\0"
|
||||
#define STRING_glagolitic0 STR_g STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0"
|
||||
#define STRING_gong0 STR_g STR_o STR_n STR_g "\0"
|
||||
#define STRING_gonm0 STR_g STR_o STR_n STR_m "\0"
|
||||
#define STRING_goth0 STR_g STR_o STR_t STR_h "\0"
|
||||
#define STRING_gothic0 STR_g STR_o STR_t STR_h STR_i STR_c "\0"
|
||||
#define STRING_gran0 STR_g STR_r STR_a STR_n "\0"
|
||||
#define STRING_grantha0 STR_g STR_r STR_a STR_n STR_t STR_h STR_a "\0"
|
||||
#define STRING_graphemebase0 STR_g STR_r STR_a STR_p STR_h STR_e STR_m STR_e STR_b STR_a STR_s STR_e "\0"
|
||||
#define STRING_graphemeextend0 STR_g STR_r STR_a STR_p STR_h STR_e STR_m STR_e STR_e STR_x STR_t STR_e STR_n STR_d "\0"
|
||||
#define STRING_graphemelink0 STR_g STR_r STR_a STR_p STR_h STR_e STR_m STR_e STR_l STR_i STR_n STR_k "\0"
|
||||
#define STRING_grbase0 STR_g STR_r STR_b STR_a STR_s STR_e "\0"
|
||||
#define STRING_greek0 STR_g STR_r STR_e STR_e STR_k "\0"
|
||||
#define STRING_grek0 STR_g STR_r STR_e STR_k "\0"
|
||||
#define STRING_grext0 STR_g STR_r STR_e STR_x STR_t "\0"
|
||||
#define STRING_grlink0 STR_g STR_r STR_l STR_i STR_n STR_k "\0"
|
||||
#define STRING_gujarati0 STR_g STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0"
|
||||
#define STRING_gujr0 STR_g STR_u STR_j STR_r "\0"
|
||||
#define STRING_gukh0 STR_g STR_u STR_k STR_h "\0"
|
||||
#define STRING_gunjalagondi0 STR_g STR_u STR_n STR_j STR_a STR_l STR_a STR_g STR_o STR_n STR_d STR_i "\0"
|
||||
#define STRING_gurmukhi0 STR_g STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0"
|
||||
#define STRING_guru0 STR_g STR_u STR_r STR_u "\0"
|
||||
#define STRING_gurungkhema0 STR_g STR_u STR_r STR_u STR_n STR_g STR_k STR_h STR_e STR_m STR_a "\0"
|
||||
#define STRING_han0 STR_h STR_a STR_n "\0"
|
||||
#define STRING_hang0 STR_h STR_a STR_n STR_g "\0"
|
||||
#define STRING_hangul0 STR_h STR_a STR_n STR_g STR_u STR_l "\0"
|
||||
#define STRING_hani0 STR_h STR_a STR_n STR_i "\0"
|
||||
#define STRING_hanifirohingya0 STR_h STR_a STR_n STR_i STR_f STR_i STR_r STR_o STR_h STR_i STR_n STR_g STR_y STR_a "\0"
|
||||
#define STRING_hano0 STR_h STR_a STR_n STR_o "\0"
|
||||
#define STRING_hanunoo0 STR_h STR_a STR_n STR_u STR_n STR_o STR_o "\0"
|
||||
#define STRING_hatr0 STR_h STR_a STR_t STR_r "\0"
|
||||
#define STRING_hatran0 STR_h STR_a STR_t STR_r STR_a STR_n "\0"
|
||||
#define STRING_hebr0 STR_h STR_e STR_b STR_r "\0"
|
||||
#define STRING_hebrew0 STR_h STR_e STR_b STR_r STR_e STR_w "\0"
|
||||
#define STRING_hex0 STR_h STR_e STR_x "\0"
|
||||
#define STRING_hexdigit0 STR_h STR_e STR_x STR_d STR_i STR_g STR_i STR_t "\0"
|
||||
#define STRING_hira0 STR_h STR_i STR_r STR_a "\0"
|
||||
#define STRING_hiragana0 STR_h STR_i STR_r STR_a STR_g STR_a STR_n STR_a "\0"
|
||||
#define STRING_hluw0 STR_h STR_l STR_u STR_w "\0"
|
||||
#define STRING_hmng0 STR_h STR_m STR_n STR_g "\0"
|
||||
#define STRING_hmnp0 STR_h STR_m STR_n STR_p "\0"
|
||||
#define STRING_hung0 STR_h STR_u STR_n STR_g "\0"
|
||||
#define STRING_idc0 STR_i STR_d STR_c "\0"
|
||||
#define STRING_idcompatmathcontinue0 STR_i STR_d STR_c STR_o STR_m STR_p STR_a STR_t STR_m STR_a STR_t STR_h STR_c STR_o STR_n STR_t STR_i STR_n STR_u STR_e "\0"
|
||||
#define STRING_idcompatmathstart0 STR_i STR_d STR_c STR_o STR_m STR_p STR_a STR_t STR_m STR_a STR_t STR_h STR_s STR_t STR_a STR_r STR_t "\0"
|
||||
#define STRING_idcontinue0 STR_i STR_d STR_c STR_o STR_n STR_t STR_i STR_n STR_u STR_e "\0"
|
||||
#define STRING_ideo0 STR_i STR_d STR_e STR_o "\0"
|
||||
#define STRING_ideographic0 STR_i STR_d STR_e STR_o STR_g STR_r STR_a STR_p STR_h STR_i STR_c "\0"
|
||||
#define STRING_ids0 STR_i STR_d STR_s "\0"
|
||||
#define STRING_idsb0 STR_i STR_d STR_s STR_b "\0"
|
||||
#define STRING_idsbinaryoperator0 STR_i STR_d STR_s STR_b STR_i STR_n STR_a STR_r STR_y STR_o STR_p STR_e STR_r STR_a STR_t STR_o STR_r "\0"
|
||||
#define STRING_idst0 STR_i STR_d STR_s STR_t "\0"
|
||||
#define STRING_idstart0 STR_i STR_d STR_s STR_t STR_a STR_r STR_t "\0"
|
||||
#define STRING_idstrinaryoperator0 STR_i STR_d STR_s STR_t STR_r STR_i STR_n STR_a STR_r STR_y STR_o STR_p STR_e STR_r STR_a STR_t STR_o STR_r "\0"
|
||||
#define STRING_idsu0 STR_i STR_d STR_s STR_u "\0"
|
||||
#define STRING_idsunaryoperator0 STR_i STR_d STR_s STR_u STR_n STR_a STR_r STR_y STR_o STR_p STR_e STR_r STR_a STR_t STR_o STR_r "\0"
|
||||
#define STRING_imperialaramaic0 STR_i STR_m STR_p STR_e STR_r STR_i STR_a STR_l STR_a STR_r STR_a STR_m STR_a STR_i STR_c "\0"
|
||||
#define STRING_incb0 STR_i STR_n STR_c STR_b "\0"
|
||||
#define STRING_inherited0 STR_i STR_n STR_h STR_e STR_r STR_i STR_t STR_e STR_d "\0"
|
||||
#define STRING_inscriptionalpahlavi0 STR_i STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_p STR_a STR_h STR_l STR_a STR_v STR_i "\0"
|
||||
#define STRING_inscriptionalparthian0 STR_i STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_p STR_a STR_r STR_t STR_h STR_i STR_a STR_n "\0"
|
||||
#define STRING_ital0 STR_i STR_t STR_a STR_l "\0"
|
||||
#define STRING_java0 STR_j STR_a STR_v STR_a "\0"
|
||||
#define STRING_javanese0 STR_j STR_a STR_v STR_a STR_n STR_e STR_s STR_e "\0"
|
||||
#define STRING_joinc0 STR_j STR_o STR_i STR_n STR_c "\0"
|
||||
#define STRING_joincontrol0 STR_j STR_o STR_i STR_n STR_c STR_o STR_n STR_t STR_r STR_o STR_l "\0"
|
||||
#define STRING_kaithi0 STR_k STR_a STR_i STR_t STR_h STR_i "\0"
|
||||
#define STRING_kali0 STR_k STR_a STR_l STR_i "\0"
|
||||
#define STRING_kana0 STR_k STR_a STR_n STR_a "\0"
|
||||
#define STRING_kannada0 STR_k STR_a STR_n STR_n STR_a STR_d STR_a "\0"
|
||||
#define STRING_katakana0 STR_k STR_a STR_t STR_a STR_k STR_a STR_n STR_a "\0"
|
||||
#define STRING_kawi0 STR_k STR_a STR_w STR_i "\0"
|
||||
#define STRING_kayahli0 STR_k STR_a STR_y STR_a STR_h STR_l STR_i "\0"
|
||||
#define STRING_khar0 STR_k STR_h STR_a STR_r "\0"
|
||||
#define STRING_kharoshthi0 STR_k STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0"
|
||||
#define STRING_khitansmallscript0 STR_k STR_h STR_i STR_t STR_a STR_n STR_s STR_m STR_a STR_l STR_l STR_s STR_c STR_r STR_i STR_p STR_t "\0"
|
||||
#define STRING_khmer0 STR_k STR_h STR_m STR_e STR_r "\0"
|
||||
#define STRING_khmr0 STR_k STR_h STR_m STR_r "\0"
|
||||
#define STRING_khoj0 STR_k STR_h STR_o STR_j "\0"
|
||||
#define STRING_khojki0 STR_k STR_h STR_o STR_j STR_k STR_i "\0"
|
||||
#define STRING_khudawadi0 STR_k STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0"
|
||||
#define STRING_kiratrai0 STR_k STR_i STR_r STR_a STR_t STR_r STR_a STR_i "\0"
|
||||
#define STRING_kits0 STR_k STR_i STR_t STR_s "\0"
|
||||
#define STRING_knda0 STR_k STR_n STR_d STR_a "\0"
|
||||
#define STRING_krai0 STR_k STR_r STR_a STR_i "\0"
|
||||
#define STRING_kthi0 STR_k STR_t STR_h STR_i "\0"
|
||||
#define STRING_l0 STR_l "\0"
|
||||
#define STRING_l_AMPERSAND0 STR_l STR_AMPERSAND "\0"
|
||||
#define STRING_lana0 STR_l STR_a STR_n STR_a "\0"
|
||||
#define STRING_lao0 STR_l STR_a STR_o "\0"
|
||||
#define STRING_laoo0 STR_l STR_a STR_o STR_o "\0"
|
||||
#define STRING_latin0 STR_l STR_a STR_t STR_i STR_n "\0"
|
||||
#define STRING_latn0 STR_l STR_a STR_t STR_n "\0"
|
||||
#define STRING_lc0 STR_l STR_c "\0"
|
||||
#define STRING_lepc0 STR_l STR_e STR_p STR_c "\0"
|
||||
#define STRING_lepcha0 STR_l STR_e STR_p STR_c STR_h STR_a "\0"
|
||||
#define STRING_limb0 STR_l STR_i STR_m STR_b "\0"
|
||||
#define STRING_limbu0 STR_l STR_i STR_m STR_b STR_u "\0"
|
||||
#define STRING_lina0 STR_l STR_i STR_n STR_a "\0"
|
||||
#define STRING_linb0 STR_l STR_i STR_n STR_b "\0"
|
||||
#define STRING_lineara0 STR_l STR_i STR_n STR_e STR_a STR_r STR_a "\0"
|
||||
#define STRING_linearb0 STR_l STR_i STR_n STR_e STR_a STR_r STR_b "\0"
|
||||
#define STRING_lisu0 STR_l STR_i STR_s STR_u "\0"
|
||||
#define STRING_ll0 STR_l STR_l "\0"
|
||||
#define STRING_lm0 STR_l STR_m "\0"
|
||||
#define STRING_lo0 STR_l STR_o "\0"
|
||||
#define STRING_loe0 STR_l STR_o STR_e "\0"
|
||||
#define STRING_logicalorderexception0 STR_l STR_o STR_g STR_i STR_c STR_a STR_l STR_o STR_r STR_d STR_e STR_r STR_e STR_x STR_c STR_e STR_p STR_t STR_i STR_o STR_n "\0"
|
||||
#define STRING_lower0 STR_l STR_o STR_w STR_e STR_r "\0"
|
||||
#define STRING_lowercase0 STR_l STR_o STR_w STR_e STR_r STR_c STR_a STR_s STR_e "\0"
|
||||
#define STRING_lt0 STR_l STR_t "\0"
|
||||
#define STRING_lu0 STR_l STR_u "\0"
|
||||
#define STRING_lyci0 STR_l STR_y STR_c STR_i "\0"
|
||||
#define STRING_lycian0 STR_l STR_y STR_c STR_i STR_a STR_n "\0"
|
||||
#define STRING_lydi0 STR_l STR_y STR_d STR_i "\0"
|
||||
#define STRING_lydian0 STR_l STR_y STR_d STR_i STR_a STR_n "\0"
|
||||
#define STRING_m0 STR_m "\0"
|
||||
#define STRING_mahajani0 STR_m STR_a STR_h STR_a STR_j STR_a STR_n STR_i "\0"
|
||||
#define STRING_mahj0 STR_m STR_a STR_h STR_j "\0"
|
||||
#define STRING_maka0 STR_m STR_a STR_k STR_a "\0"
|
||||
#define STRING_makasar0 STR_m STR_a STR_k STR_a STR_s STR_a STR_r "\0"
|
||||
#define STRING_malayalam0 STR_m STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
|
||||
#define STRING_mand0 STR_m STR_a STR_n STR_d "\0"
|
||||
#define STRING_mandaic0 STR_m STR_a STR_n STR_d STR_a STR_i STR_c "\0"
|
||||
#define STRING_mani0 STR_m STR_a STR_n STR_i "\0"
|
||||
#define STRING_manichaean0 STR_m STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0"
|
||||
#define STRING_marc0 STR_m STR_a STR_r STR_c "\0"
|
||||
#define STRING_marchen0 STR_m STR_a STR_r STR_c STR_h STR_e STR_n "\0"
|
||||
#define STRING_masaramgondi0 STR_m STR_a STR_s STR_a STR_r STR_a STR_m STR_g STR_o STR_n STR_d STR_i "\0"
|
||||
#define STRING_math0 STR_m STR_a STR_t STR_h "\0"
|
||||
#define STRING_mc0 STR_m STR_c "\0"
|
||||
#define STRING_mcm0 STR_m STR_c STR_m "\0"
|
||||
#define STRING_me0 STR_m STR_e "\0"
|
||||
#define STRING_medefaidrin0 STR_m STR_e STR_d STR_e STR_f STR_a STR_i STR_d STR_r STR_i STR_n "\0"
|
||||
#define STRING_medf0 STR_m STR_e STR_d STR_f "\0"
|
||||
#define STRING_meeteimayek0 STR_m STR_e STR_e STR_t STR_e STR_i STR_m STR_a STR_y STR_e STR_k "\0"
|
||||
#define STRING_mend0 STR_m STR_e STR_n STR_d "\0"
|
||||
#define STRING_mendekikakui0 STR_m STR_e STR_n STR_d STR_e STR_k STR_i STR_k STR_a STR_k STR_u STR_i "\0"
|
||||
#define STRING_merc0 STR_m STR_e STR_r STR_c "\0"
|
||||
#define STRING_mero0 STR_m STR_e STR_r STR_o "\0"
|
||||
#define STRING_meroiticcursive0 STR_m STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_c STR_u STR_r STR_s STR_i STR_v STR_e "\0"
|
||||
#define STRING_meroitichieroglyphs0 STR_m STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_h STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||
#define STRING_miao0 STR_m STR_i STR_a STR_o "\0"
|
||||
#define STRING_mlym0 STR_m STR_l STR_y STR_m "\0"
|
||||
#define STRING_mn0 STR_m STR_n "\0"
|
||||
#define STRING_modi0 STR_m STR_o STR_d STR_i "\0"
|
||||
#define STRING_modifiercombiningmark0 STR_m STR_o STR_d STR_i STR_f STR_i STR_e STR_r STR_c STR_o STR_m STR_b STR_i STR_n STR_i STR_n STR_g STR_m STR_a STR_r STR_k "\0"
|
||||
#define STRING_mong0 STR_m STR_o STR_n STR_g "\0"
|
||||
#define STRING_mongolian0 STR_m STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0"
|
||||
#define STRING_mro0 STR_m STR_r STR_o "\0"
|
||||
#define STRING_mroo0 STR_m STR_r STR_o STR_o "\0"
|
||||
#define STRING_mtei0 STR_m STR_t STR_e STR_i "\0"
|
||||
#define STRING_mult0 STR_m STR_u STR_l STR_t "\0"
|
||||
#define STRING_multani0 STR_m STR_u STR_l STR_t STR_a STR_n STR_i "\0"
|
||||
#define STRING_myanmar0 STR_m STR_y STR_a STR_n STR_m STR_a STR_r "\0"
|
||||
#define STRING_mymr0 STR_m STR_y STR_m STR_r "\0"
|
||||
#define STRING_n0 STR_n "\0"
|
||||
#define STRING_nabataean0 STR_n STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0"
|
||||
#define STRING_nagm0 STR_n STR_a STR_g STR_m "\0"
|
||||
#define STRING_nagmundari0 STR_n STR_a STR_g STR_m STR_u STR_n STR_d STR_a STR_r STR_i "\0"
|
||||
#define STRING_nand0 STR_n STR_a STR_n STR_d "\0"
|
||||
#define STRING_nandinagari0 STR_n STR_a STR_n STR_d STR_i STR_n STR_a STR_g STR_a STR_r STR_i "\0"
|
||||
#define STRING_narb0 STR_n STR_a STR_r STR_b "\0"
|
||||
#define STRING_nbat0 STR_n STR_b STR_a STR_t "\0"
|
||||
#define STRING_nchar0 STR_n STR_c STR_h STR_a STR_r "\0"
|
||||
#define STRING_nd0 STR_n STR_d "\0"
|
||||
#define STRING_newa0 STR_n STR_e STR_w STR_a "\0"
|
||||
#define STRING_newtailue0 STR_n STR_e STR_w STR_t STR_a STR_i STR_l STR_u STR_e "\0"
|
||||
#define STRING_nko0 STR_n STR_k STR_o "\0"
|
||||
#define STRING_nkoo0 STR_n STR_k STR_o STR_o "\0"
|
||||
#define STRING_nl0 STR_n STR_l "\0"
|
||||
#define STRING_no0 STR_n STR_o "\0"
|
||||
#define STRING_noncharactercodepoint0 STR_n STR_o STR_n STR_c STR_h STR_a STR_r STR_a STR_c STR_t STR_e STR_r STR_c STR_o STR_d STR_e STR_p STR_o STR_i STR_n STR_t "\0"
|
||||
#define STRING_nshu0 STR_n STR_s STR_h STR_u "\0"
|
||||
#define STRING_nushu0 STR_n STR_u STR_s STR_h STR_u "\0"
|
||||
#define STRING_nyiakengpuachuehmong0 STR_n STR_y STR_i STR_a STR_k STR_e STR_n STR_g STR_p STR_u STR_a STR_c STR_h STR_u STR_e STR_h STR_m STR_o STR_n STR_g "\0"
|
||||
#define STRING_ogam0 STR_o STR_g STR_a STR_m "\0"
|
||||
#define STRING_ogham0 STR_o STR_g STR_h STR_a STR_m "\0"
|
||||
#define STRING_olchiki0 STR_o STR_l STR_c STR_h STR_i STR_k STR_i "\0"
|
||||
#define STRING_olck0 STR_o STR_l STR_c STR_k "\0"
|
||||
#define STRING_oldhungarian0 STR_o STR_l STR_d STR_h STR_u STR_n STR_g STR_a STR_r STR_i STR_a STR_n "\0"
|
||||
#define STRING_olditalic0 STR_o STR_l STR_d STR_i STR_t STR_a STR_l STR_i STR_c "\0"
|
||||
#define STRING_oldnortharabian0 STR_o STR_l STR_d STR_n STR_o STR_r STR_t STR_h STR_a STR_r STR_a STR_b STR_i STR_a STR_n "\0"
|
||||
#define STRING_oldpermic0 STR_o STR_l STR_d STR_p STR_e STR_r STR_m STR_i STR_c "\0"
|
||||
#define STRING_oldpersian0 STR_o STR_l STR_d STR_p STR_e STR_r STR_s STR_i STR_a STR_n "\0"
|
||||
#define STRING_oldsogdian0 STR_o STR_l STR_d STR_s STR_o STR_g STR_d STR_i STR_a STR_n "\0"
|
||||
#define STRING_oldsoutharabian0 STR_o STR_l STR_d STR_s STR_o STR_u STR_t STR_h STR_a STR_r STR_a STR_b STR_i STR_a STR_n "\0"
|
||||
#define STRING_oldturkic0 STR_o STR_l STR_d STR_t STR_u STR_r STR_k STR_i STR_c "\0"
|
||||
#define STRING_olduyghur0 STR_o STR_l STR_d STR_u STR_y STR_g STR_h STR_u STR_r "\0"
|
||||
#define STRING_olonal0 STR_o STR_l STR_o STR_n STR_a STR_l "\0"
|
||||
#define STRING_onao0 STR_o STR_n STR_a STR_o "\0"
|
||||
#define STRING_oriya0 STR_o STR_r STR_i STR_y STR_a "\0"
|
||||
#define STRING_orkh0 STR_o STR_r STR_k STR_h "\0"
|
||||
#define STRING_orya0 STR_o STR_r STR_y STR_a "\0"
|
||||
#define STRING_osage0 STR_o STR_s STR_a STR_g STR_e "\0"
|
||||
#define STRING_osge0 STR_o STR_s STR_g STR_e "\0"
|
||||
#define STRING_osma0 STR_o STR_s STR_m STR_a "\0"
|
||||
#define STRING_osmanya0 STR_o STR_s STR_m STR_a STR_n STR_y STR_a "\0"
|
||||
#define STRING_ougr0 STR_o STR_u STR_g STR_r "\0"
|
||||
#define STRING_p0 STR_p "\0"
|
||||
#define STRING_pahawhhmong0 STR_p STR_a STR_h STR_a STR_w STR_h STR_h STR_m STR_o STR_n STR_g "\0"
|
||||
#define STRING_palm0 STR_p STR_a STR_l STR_m "\0"
|
||||
#define STRING_palmyrene0 STR_p STR_a STR_l STR_m STR_y STR_r STR_e STR_n STR_e "\0"
|
||||
#define STRING_patsyn0 STR_p STR_a STR_t STR_s STR_y STR_n "\0"
|
||||
#define STRING_patternsyntax0 STR_p STR_a STR_t STR_t STR_e STR_r STR_n STR_s STR_y STR_n STR_t STR_a STR_x "\0"
|
||||
#define STRING_patternwhitespace0 STR_p STR_a STR_t STR_t STR_e STR_r STR_n STR_w STR_h STR_i STR_t STR_e STR_s STR_p STR_a STR_c STR_e "\0"
|
||||
#define STRING_patws0 STR_p STR_a STR_t STR_w STR_s "\0"
|
||||
#define STRING_pauc0 STR_p STR_a STR_u STR_c "\0"
|
||||
#define STRING_paucinhau0 STR_p STR_a STR_u STR_c STR_i STR_n STR_h STR_a STR_u "\0"
|
||||
#define STRING_pc0 STR_p STR_c "\0"
|
||||
#define STRING_pcm0 STR_p STR_c STR_m "\0"
|
||||
#define STRING_pd0 STR_p STR_d "\0"
|
||||
#define STRING_pe0 STR_p STR_e "\0"
|
||||
#define STRING_perm0 STR_p STR_e STR_r STR_m "\0"
|
||||
#define STRING_pf0 STR_p STR_f "\0"
|
||||
#define STRING_phag0 STR_p STR_h STR_a STR_g "\0"
|
||||
#define STRING_phagspa0 STR_p STR_h STR_a STR_g STR_s STR_p STR_a "\0"
|
||||
#define STRING_phli0 STR_p STR_h STR_l STR_i "\0"
|
||||
#define STRING_phlp0 STR_p STR_h STR_l STR_p "\0"
|
||||
#define STRING_phnx0 STR_p STR_h STR_n STR_x "\0"
|
||||
#define STRING_phoenician0 STR_p STR_h STR_o STR_e STR_n STR_i STR_c STR_i STR_a STR_n "\0"
|
||||
#define STRING_pi0 STR_p STR_i "\0"
|
||||
#define STRING_plrd0 STR_p STR_l STR_r STR_d "\0"
|
||||
#define STRING_po0 STR_p STR_o "\0"
|
||||
#define STRING_prependedconcatenationmark0 STR_p STR_r STR_e STR_p STR_e STR_n STR_d STR_e STR_d STR_c STR_o STR_n STR_c STR_a STR_t STR_e STR_n STR_a STR_t STR_i STR_o STR_n STR_m STR_a STR_r STR_k "\0"
|
||||
#define STRING_prti0 STR_p STR_r STR_t STR_i "\0"
|
||||
#define STRING_ps0 STR_p STR_s "\0"
|
||||
#define STRING_psalterpahlavi0 STR_p STR_s STR_a STR_l STR_t STR_e STR_r STR_p STR_a STR_h STR_l STR_a STR_v STR_i "\0"
|
||||
#define STRING_qaac0 STR_q STR_a STR_a STR_c "\0"
|
||||
#define STRING_qaai0 STR_q STR_a STR_a STR_i "\0"
|
||||
#define STRING_qmark0 STR_q STR_m STR_a STR_r STR_k "\0"
|
||||
#define STRING_quotationmark0 STR_q STR_u STR_o STR_t STR_a STR_t STR_i STR_o STR_n STR_m STR_a STR_r STR_k "\0"
|
||||
#define STRING_radical0 STR_r STR_a STR_d STR_i STR_c STR_a STR_l "\0"
|
||||
#define STRING_regionalindicator0 STR_r STR_e STR_g STR_i STR_o STR_n STR_a STR_l STR_i STR_n STR_d STR_i STR_c STR_a STR_t STR_o STR_r "\0"
|
||||
#define STRING_rejang0 STR_r STR_e STR_j STR_a STR_n STR_g "\0"
|
||||
#define STRING_ri0 STR_r STR_i "\0"
|
||||
#define STRING_rjng0 STR_r STR_j STR_n STR_g "\0"
|
||||
#define STRING_rohg0 STR_r STR_o STR_h STR_g "\0"
|
||||
#define STRING_runic0 STR_r STR_u STR_n STR_i STR_c "\0"
|
||||
#define STRING_runr0 STR_r STR_u STR_n STR_r "\0"
|
||||
#define STRING_s0 STR_s "\0"
|
||||
#define STRING_samaritan0 STR_s STR_a STR_m STR_a STR_r STR_i STR_t STR_a STR_n "\0"
|
||||
#define STRING_samr0 STR_s STR_a STR_m STR_r "\0"
|
||||
#define STRING_sarb0 STR_s STR_a STR_r STR_b "\0"
|
||||
#define STRING_saur0 STR_s STR_a STR_u STR_r "\0"
|
||||
#define STRING_saurashtra0 STR_s STR_a STR_u STR_r STR_a STR_s STR_h STR_t STR_r STR_a "\0"
|
||||
#define STRING_sc0 STR_s STR_c "\0"
|
||||
#define STRING_sd0 STR_s STR_d "\0"
|
||||
#define STRING_sentenceterminal0 STR_s STR_e STR_n STR_t STR_e STR_n STR_c STR_e STR_t STR_e STR_r STR_m STR_i STR_n STR_a STR_l "\0"
|
||||
#define STRING_sgnw0 STR_s STR_g STR_n STR_w "\0"
|
||||
#define STRING_sharada0 STR_s STR_h STR_a STR_r STR_a STR_d STR_a "\0"
|
||||
#define STRING_shavian0 STR_s STR_h STR_a STR_v STR_i STR_a STR_n "\0"
|
||||
#define STRING_shaw0 STR_s STR_h STR_a STR_w "\0"
|
||||
#define STRING_shrd0 STR_s STR_h STR_r STR_d "\0"
|
||||
#define STRING_sidd0 STR_s STR_i STR_d STR_d "\0"
|
||||
#define STRING_siddham0 STR_s STR_i STR_d STR_d STR_h STR_a STR_m "\0"
|
||||
#define STRING_signwriting0 STR_s STR_i STR_g STR_n STR_w STR_r STR_i STR_t STR_i STR_n STR_g "\0"
|
||||
#define STRING_sind0 STR_s STR_i STR_n STR_d "\0"
|
||||
#define STRING_sinh0 STR_s STR_i STR_n STR_h "\0"
|
||||
#define STRING_sinhala0 STR_s STR_i STR_n STR_h STR_a STR_l STR_a "\0"
|
||||
#define STRING_sk0 STR_s STR_k "\0"
|
||||
#define STRING_sm0 STR_s STR_m "\0"
|
||||
#define STRING_so0 STR_s STR_o "\0"
|
||||
#define STRING_softdotted0 STR_s STR_o STR_f STR_t STR_d STR_o STR_t STR_t STR_e STR_d "\0"
|
||||
#define STRING_sogd0 STR_s STR_o STR_g STR_d "\0"
|
||||
#define STRING_sogdian0 STR_s STR_o STR_g STR_d STR_i STR_a STR_n "\0"
|
||||
#define STRING_sogo0 STR_s STR_o STR_g STR_o "\0"
|
||||
#define STRING_sora0 STR_s STR_o STR_r STR_a "\0"
|
||||
#define STRING_sorasompeng0 STR_s STR_o STR_r STR_a STR_s STR_o STR_m STR_p STR_e STR_n STR_g "\0"
|
||||
#define STRING_soyo0 STR_s STR_o STR_y STR_o "\0"
|
||||
#define STRING_soyombo0 STR_s STR_o STR_y STR_o STR_m STR_b STR_o "\0"
|
||||
#define STRING_space0 STR_s STR_p STR_a STR_c STR_e "\0"
|
||||
#define STRING_sterm0 STR_s STR_t STR_e STR_r STR_m "\0"
|
||||
#define STRING_sund0 STR_s STR_u STR_n STR_d "\0"
|
||||
#define STRING_sundanese0 STR_s STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0"
|
||||
#define STRING_sunu0 STR_s STR_u STR_n STR_u "\0"
|
||||
#define STRING_sunuwar0 STR_s STR_u STR_n STR_u STR_w STR_a STR_r "\0"
|
||||
#define STRING_sylo0 STR_s STR_y STR_l STR_o "\0"
|
||||
#define STRING_sylotinagri0 STR_s STR_y STR_l STR_o STR_t STR_i STR_n STR_a STR_g STR_r STR_i "\0"
|
||||
#define STRING_syrc0 STR_s STR_y STR_r STR_c "\0"
|
||||
#define STRING_syriac0 STR_s STR_y STR_r STR_i STR_a STR_c "\0"
|
||||
#define STRING_tagalog0 STR_t STR_a STR_g STR_a STR_l STR_o STR_g "\0"
|
||||
#define STRING_tagb0 STR_t STR_a STR_g STR_b "\0"
|
||||
#define STRING_tagbanwa0 STR_t STR_a STR_g STR_b STR_a STR_n STR_w STR_a "\0"
|
||||
#define STRING_taile0 STR_t STR_a STR_i STR_l STR_e "\0"
|
||||
#define STRING_taitham0 STR_t STR_a STR_i STR_t STR_h STR_a STR_m "\0"
|
||||
#define STRING_taiviet0 STR_t STR_a STR_i STR_v STR_i STR_e STR_t "\0"
|
||||
#define STRING_takr0 STR_t STR_a STR_k STR_r "\0"
|
||||
#define STRING_takri0 STR_t STR_a STR_k STR_r STR_i "\0"
|
||||
#define STRING_tale0 STR_t STR_a STR_l STR_e "\0"
|
||||
#define STRING_talu0 STR_t STR_a STR_l STR_u "\0"
|
||||
#define STRING_tamil0 STR_t STR_a STR_m STR_i STR_l "\0"
|
||||
#define STRING_taml0 STR_t STR_a STR_m STR_l "\0"
|
||||
#define STRING_tang0 STR_t STR_a STR_n STR_g "\0"
|
||||
#define STRING_tangsa0 STR_t STR_a STR_n STR_g STR_s STR_a "\0"
|
||||
#define STRING_tangut0 STR_t STR_a STR_n STR_g STR_u STR_t "\0"
|
||||
#define STRING_tavt0 STR_t STR_a STR_v STR_t "\0"
|
||||
#define STRING_telu0 STR_t STR_e STR_l STR_u "\0"
|
||||
#define STRING_telugu0 STR_t STR_e STR_l STR_u STR_g STR_u "\0"
|
||||
#define STRING_term0 STR_t STR_e STR_r STR_m "\0"
|
||||
#define STRING_terminalpunctuation0 STR_t STR_e STR_r STR_m STR_i STR_n STR_a STR_l STR_p STR_u STR_n STR_c STR_t STR_u STR_a STR_t STR_i STR_o STR_n "\0"
|
||||
#define STRING_tfng0 STR_t STR_f STR_n STR_g "\0"
|
||||
#define STRING_tglg0 STR_t STR_g STR_l STR_g "\0"
|
||||
#define STRING_thaa0 STR_t STR_h STR_a STR_a "\0"
|
||||
#define STRING_thaana0 STR_t STR_h STR_a STR_a STR_n STR_a "\0"
|
||||
#define STRING_thai0 STR_t STR_h STR_a STR_i "\0"
|
||||
#define STRING_tibetan0 STR_t STR_i STR_b STR_e STR_t STR_a STR_n "\0"
|
||||
#define STRING_tibt0 STR_t STR_i STR_b STR_t "\0"
|
||||
#define STRING_tifinagh0 STR_t STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0"
|
||||
#define STRING_tirh0 STR_t STR_i STR_r STR_h "\0"
|
||||
#define STRING_tirhuta0 STR_t STR_i STR_r STR_h STR_u STR_t STR_a "\0"
|
||||
#define STRING_tnsa0 STR_t STR_n STR_s STR_a "\0"
|
||||
#define STRING_todhri0 STR_t STR_o STR_d STR_h STR_r STR_i "\0"
|
||||
#define STRING_todr0 STR_t STR_o STR_d STR_r "\0"
|
||||
#define STRING_toto0 STR_t STR_o STR_t STR_o "\0"
|
||||
#define STRING_tulutigalari0 STR_t STR_u STR_l STR_u STR_t STR_i STR_g STR_a STR_l STR_a STR_r STR_i "\0"
|
||||
#define STRING_tutg0 STR_t STR_u STR_t STR_g "\0"
|
||||
#define STRING_ugar0 STR_u STR_g STR_a STR_r "\0"
|
||||
#define STRING_ugaritic0 STR_u STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0"
|
||||
#define STRING_uideo0 STR_u STR_i STR_d STR_e STR_o "\0"
|
||||
#define STRING_unifiedideograph0 STR_u STR_n STR_i STR_f STR_i STR_e STR_d STR_i STR_d STR_e STR_o STR_g STR_r STR_a STR_p STR_h "\0"
|
||||
#define STRING_unknown0 STR_u STR_n STR_k STR_n STR_o STR_w STR_n "\0"
|
||||
#define STRING_upper0 STR_u STR_p STR_p STR_e STR_r "\0"
|
||||
#define STRING_uppercase0 STR_u STR_p STR_p STR_e STR_r STR_c STR_a STR_s STR_e "\0"
|
||||
#define STRING_vai0 STR_v STR_a STR_i "\0"
|
||||
#define STRING_vaii0 STR_v STR_a STR_i STR_i "\0"
|
||||
#define STRING_variationselector0 STR_v STR_a STR_r STR_i STR_a STR_t STR_i STR_o STR_n STR_s STR_e STR_l STR_e STR_c STR_t STR_o STR_r "\0"
|
||||
#define STRING_vith0 STR_v STR_i STR_t STR_h "\0"
|
||||
#define STRING_vithkuqi0 STR_v STR_i STR_t STR_h STR_k STR_u STR_q STR_i "\0"
|
||||
#define STRING_vs0 STR_v STR_s "\0"
|
||||
#define STRING_wancho0 STR_w STR_a STR_n STR_c STR_h STR_o "\0"
|
||||
#define STRING_wara0 STR_w STR_a STR_r STR_a "\0"
|
||||
#define STRING_warangciti0 STR_w STR_a STR_r STR_a STR_n STR_g STR_c STR_i STR_t STR_i "\0"
|
||||
#define STRING_wcho0 STR_w STR_c STR_h STR_o "\0"
|
||||
#define STRING_whitespace0 STR_w STR_h STR_i STR_t STR_e STR_s STR_p STR_a STR_c STR_e "\0"
|
||||
#define STRING_wspace0 STR_w STR_s STR_p STR_a STR_c STR_e "\0"
|
||||
#define STRING_xan0 STR_x STR_a STR_n "\0"
|
||||
#define STRING_xidc0 STR_x STR_i STR_d STR_c "\0"
|
||||
#define STRING_xidcontinue0 STR_x STR_i STR_d STR_c STR_o STR_n STR_t STR_i STR_n STR_u STR_e "\0"
|
||||
#define STRING_xids0 STR_x STR_i STR_d STR_s "\0"
|
||||
#define STRING_xidstart0 STR_x STR_i STR_d STR_s STR_t STR_a STR_r STR_t "\0"
|
||||
#define STRING_xpeo0 STR_x STR_p STR_e STR_o "\0"
|
||||
#define STRING_xps0 STR_x STR_p STR_s "\0"
|
||||
#define STRING_xsp0 STR_x STR_s STR_p "\0"
|
||||
#define STRING_xsux0 STR_x STR_s STR_u STR_x "\0"
|
||||
#define STRING_xuc0 STR_x STR_u STR_c "\0"
|
||||
#define STRING_xwd0 STR_x STR_w STR_d "\0"
|
||||
#define STRING_yezi0 STR_y STR_e STR_z STR_i "\0"
|
||||
#define STRING_yezidi0 STR_y STR_e STR_z STR_i STR_d STR_i "\0"
|
||||
#define STRING_yi0 STR_y STR_i "\0"
|
||||
#define STRING_yiii0 STR_y STR_i STR_i STR_i "\0"
|
||||
#define STRING_z0 STR_z "\0"
|
||||
#define STRING_zanabazarsquare0 STR_z STR_a STR_n STR_a STR_b STR_a STR_z STR_a STR_r STR_s STR_q STR_u STR_a STR_r STR_e "\0"
|
||||
#define STRING_zanb0 STR_z STR_a STR_n STR_b "\0"
|
||||
#define STRING_zinh0 STR_z STR_i STR_n STR_h "\0"
|
||||
#define STRING_zl0 STR_z STR_l "\0"
|
||||
#define STRING_zp0 STR_z STR_p "\0"
|
||||
#define STRING_zs0 STR_z STR_s "\0"
|
||||
#define STRING_zyyy0 STR_z STR_y STR_y STR_y "\0"
|
||||
#define STRING_zzzz0 STR_z STR_z STR_z STR_z "\0"
|
||||
|
||||
const char PRIV(utt_names)[] =
|
||||
STRING_adlam0
|
||||
STRING_adlm0
|
||||
STRING_aghb0
|
||||
STRING_ahex0
|
||||
STRING_ahom0
|
||||
STRING_alpha0
|
||||
STRING_alphabetic0
|
||||
STRING_anatolianhieroglyphs0
|
||||
STRING_any0
|
||||
STRING_arab0
|
||||
STRING_arabic0
|
||||
STRING_armenian0
|
||||
STRING_armi0
|
||||
STRING_armn0
|
||||
STRING_ascii0
|
||||
STRING_asciihexdigit0
|
||||
STRING_avestan0
|
||||
STRING_avst0
|
||||
STRING_bali0
|
||||
STRING_balinese0
|
||||
STRING_bamu0
|
||||
STRING_bamum0
|
||||
STRING_bass0
|
||||
STRING_bassavah0
|
||||
STRING_batak0
|
||||
STRING_batk0
|
||||
STRING_beng0
|
||||
STRING_bengali0
|
||||
STRING_bhaiksuki0
|
||||
STRING_bhks0
|
||||
STRING_bidial0
|
||||
STRING_bidian0
|
||||
STRING_bidib0
|
||||
STRING_bidibn0
|
||||
STRING_bidic0
|
||||
STRING_bidicontrol0
|
||||
STRING_bidics0
|
||||
STRING_bidien0
|
||||
STRING_bidies0
|
||||
STRING_bidiet0
|
||||
STRING_bidifsi0
|
||||
STRING_bidil0
|
||||
STRING_bidilre0
|
||||
STRING_bidilri0
|
||||
STRING_bidilro0
|
||||
STRING_bidim0
|
||||
STRING_bidimirrored0
|
||||
STRING_bidinsm0
|
||||
STRING_bidion0
|
||||
STRING_bidipdf0
|
||||
STRING_bidipdi0
|
||||
STRING_bidir0
|
||||
STRING_bidirle0
|
||||
STRING_bidirli0
|
||||
STRING_bidirlo0
|
||||
STRING_bidis0
|
||||
STRING_bidiws0
|
||||
STRING_bopo0
|
||||
STRING_bopomofo0
|
||||
STRING_brah0
|
||||
STRING_brahmi0
|
||||
STRING_brai0
|
||||
STRING_braille0
|
||||
STRING_bugi0
|
||||
STRING_buginese0
|
||||
STRING_buhd0
|
||||
STRING_buhid0
|
||||
STRING_c0
|
||||
STRING_cakm0
|
||||
STRING_canadianaboriginal0
|
||||
STRING_cans0
|
||||
STRING_cari0
|
||||
STRING_carian0
|
||||
STRING_cased0
|
||||
STRING_caseignorable0
|
||||
STRING_caucasianalbanian0
|
||||
STRING_cc0
|
||||
STRING_cf0
|
||||
STRING_chakma0
|
||||
STRING_cham0
|
||||
STRING_changeswhencasefolded0
|
||||
STRING_changeswhencasemapped0
|
||||
STRING_changeswhenlowercased0
|
||||
STRING_changeswhentitlecased0
|
||||
STRING_changeswhenuppercased0
|
||||
STRING_cher0
|
||||
STRING_cherokee0
|
||||
STRING_chorasmian0
|
||||
STRING_chrs0
|
||||
STRING_ci0
|
||||
STRING_cn0
|
||||
STRING_co0
|
||||
STRING_common0
|
||||
STRING_copt0
|
||||
STRING_coptic0
|
||||
STRING_cpmn0
|
||||
STRING_cprt0
|
||||
STRING_cs0
|
||||
STRING_cuneiform0
|
||||
STRING_cwcf0
|
||||
STRING_cwcm0
|
||||
STRING_cwl0
|
||||
STRING_cwt0
|
||||
STRING_cwu0
|
||||
STRING_cypriot0
|
||||
STRING_cyprominoan0
|
||||
STRING_cyrillic0
|
||||
STRING_cyrl0
|
||||
STRING_dash0
|
||||
STRING_defaultignorablecodepoint0
|
||||
STRING_dep0
|
||||
STRING_deprecated0
|
||||
STRING_deseret0
|
||||
STRING_deva0
|
||||
STRING_devanagari0
|
||||
STRING_di0
|
||||
STRING_dia0
|
||||
STRING_diacritic0
|
||||
STRING_diak0
|
||||
STRING_divesakuru0
|
||||
STRING_dogr0
|
||||
STRING_dogra0
|
||||
STRING_dsrt0
|
||||
STRING_dupl0
|
||||
STRING_duployan0
|
||||
STRING_ebase0
|
||||
STRING_ecomp0
|
||||
STRING_egyp0
|
||||
STRING_egyptianhieroglyphs0
|
||||
STRING_elba0
|
||||
STRING_elbasan0
|
||||
STRING_elym0
|
||||
STRING_elymaic0
|
||||
STRING_emod0
|
||||
STRING_emoji0
|
||||
STRING_emojicomponent0
|
||||
STRING_emojimodifier0
|
||||
STRING_emojimodifierbase0
|
||||
STRING_emojipresentation0
|
||||
STRING_epres0
|
||||
STRING_ethi0
|
||||
STRING_ethiopic0
|
||||
STRING_ext0
|
||||
STRING_extendedpictographic0
|
||||
STRING_extender0
|
||||
STRING_extpict0
|
||||
STRING_gara0
|
||||
STRING_garay0
|
||||
STRING_geor0
|
||||
STRING_georgian0
|
||||
STRING_glag0
|
||||
STRING_glagolitic0
|
||||
STRING_gong0
|
||||
STRING_gonm0
|
||||
STRING_goth0
|
||||
STRING_gothic0
|
||||
STRING_gran0
|
||||
STRING_grantha0
|
||||
STRING_graphemebase0
|
||||
STRING_graphemeextend0
|
||||
STRING_graphemelink0
|
||||
STRING_grbase0
|
||||
STRING_greek0
|
||||
STRING_grek0
|
||||
STRING_grext0
|
||||
STRING_grlink0
|
||||
STRING_gujarati0
|
||||
STRING_gujr0
|
||||
STRING_gukh0
|
||||
STRING_gunjalagondi0
|
||||
STRING_gurmukhi0
|
||||
STRING_guru0
|
||||
STRING_gurungkhema0
|
||||
STRING_han0
|
||||
STRING_hang0
|
||||
STRING_hangul0
|
||||
STRING_hani0
|
||||
STRING_hanifirohingya0
|
||||
STRING_hano0
|
||||
STRING_hanunoo0
|
||||
STRING_hatr0
|
||||
STRING_hatran0
|
||||
STRING_hebr0
|
||||
STRING_hebrew0
|
||||
STRING_hex0
|
||||
STRING_hexdigit0
|
||||
STRING_hira0
|
||||
STRING_hiragana0
|
||||
STRING_hluw0
|
||||
STRING_hmng0
|
||||
STRING_hmnp0
|
||||
STRING_hung0
|
||||
STRING_idc0
|
||||
STRING_idcompatmathcontinue0
|
||||
STRING_idcompatmathstart0
|
||||
STRING_idcontinue0
|
||||
STRING_ideo0
|
||||
STRING_ideographic0
|
||||
STRING_ids0
|
||||
STRING_idsb0
|
||||
STRING_idsbinaryoperator0
|
||||
STRING_idst0
|
||||
STRING_idstart0
|
||||
STRING_idstrinaryoperator0
|
||||
STRING_idsu0
|
||||
STRING_idsunaryoperator0
|
||||
STRING_imperialaramaic0
|
||||
STRING_incb0
|
||||
STRING_inherited0
|
||||
STRING_inscriptionalpahlavi0
|
||||
STRING_inscriptionalparthian0
|
||||
STRING_ital0
|
||||
STRING_java0
|
||||
STRING_javanese0
|
||||
STRING_joinc0
|
||||
STRING_joincontrol0
|
||||
STRING_kaithi0
|
||||
STRING_kali0
|
||||
STRING_kana0
|
||||
STRING_kannada0
|
||||
STRING_katakana0
|
||||
STRING_kawi0
|
||||
STRING_kayahli0
|
||||
STRING_khar0
|
||||
STRING_kharoshthi0
|
||||
STRING_khitansmallscript0
|
||||
STRING_khmer0
|
||||
STRING_khmr0
|
||||
STRING_khoj0
|
||||
STRING_khojki0
|
||||
STRING_khudawadi0
|
||||
STRING_kiratrai0
|
||||
STRING_kits0
|
||||
STRING_knda0
|
||||
STRING_krai0
|
||||
STRING_kthi0
|
||||
STRING_l0
|
||||
STRING_l_AMPERSAND0
|
||||
STRING_lana0
|
||||
STRING_lao0
|
||||
STRING_laoo0
|
||||
STRING_latin0
|
||||
STRING_latn0
|
||||
STRING_lc0
|
||||
STRING_lepc0
|
||||
STRING_lepcha0
|
||||
STRING_limb0
|
||||
STRING_limbu0
|
||||
STRING_lina0
|
||||
STRING_linb0
|
||||
STRING_lineara0
|
||||
STRING_linearb0
|
||||
STRING_lisu0
|
||||
STRING_ll0
|
||||
STRING_lm0
|
||||
STRING_lo0
|
||||
STRING_loe0
|
||||
STRING_logicalorderexception0
|
||||
STRING_lower0
|
||||
STRING_lowercase0
|
||||
STRING_lt0
|
||||
STRING_lu0
|
||||
STRING_lyci0
|
||||
STRING_lycian0
|
||||
STRING_lydi0
|
||||
STRING_lydian0
|
||||
STRING_m0
|
||||
STRING_mahajani0
|
||||
STRING_mahj0
|
||||
STRING_maka0
|
||||
STRING_makasar0
|
||||
STRING_malayalam0
|
||||
STRING_mand0
|
||||
STRING_mandaic0
|
||||
STRING_mani0
|
||||
STRING_manichaean0
|
||||
STRING_marc0
|
||||
STRING_marchen0
|
||||
STRING_masaramgondi0
|
||||
STRING_math0
|
||||
STRING_mc0
|
||||
STRING_mcm0
|
||||
STRING_me0
|
||||
STRING_medefaidrin0
|
||||
STRING_medf0
|
||||
STRING_meeteimayek0
|
||||
STRING_mend0
|
||||
STRING_mendekikakui0
|
||||
STRING_merc0
|
||||
STRING_mero0
|
||||
STRING_meroiticcursive0
|
||||
STRING_meroitichieroglyphs0
|
||||
STRING_miao0
|
||||
STRING_mlym0
|
||||
STRING_mn0
|
||||
STRING_modi0
|
||||
STRING_modifiercombiningmark0
|
||||
STRING_mong0
|
||||
STRING_mongolian0
|
||||
STRING_mro0
|
||||
STRING_mroo0
|
||||
STRING_mtei0
|
||||
STRING_mult0
|
||||
STRING_multani0
|
||||
STRING_myanmar0
|
||||
STRING_mymr0
|
||||
STRING_n0
|
||||
STRING_nabataean0
|
||||
STRING_nagm0
|
||||
STRING_nagmundari0
|
||||
STRING_nand0
|
||||
STRING_nandinagari0
|
||||
STRING_narb0
|
||||
STRING_nbat0
|
||||
STRING_nchar0
|
||||
STRING_nd0
|
||||
STRING_newa0
|
||||
STRING_newtailue0
|
||||
STRING_nko0
|
||||
STRING_nkoo0
|
||||
STRING_nl0
|
||||
STRING_no0
|
||||
STRING_noncharactercodepoint0
|
||||
STRING_nshu0
|
||||
STRING_nushu0
|
||||
STRING_nyiakengpuachuehmong0
|
||||
STRING_ogam0
|
||||
STRING_ogham0
|
||||
STRING_olchiki0
|
||||
STRING_olck0
|
||||
STRING_oldhungarian0
|
||||
STRING_olditalic0
|
||||
STRING_oldnortharabian0
|
||||
STRING_oldpermic0
|
||||
STRING_oldpersian0
|
||||
STRING_oldsogdian0
|
||||
STRING_oldsoutharabian0
|
||||
STRING_oldturkic0
|
||||
STRING_olduyghur0
|
||||
STRING_olonal0
|
||||
STRING_onao0
|
||||
STRING_oriya0
|
||||
STRING_orkh0
|
||||
STRING_orya0
|
||||
STRING_osage0
|
||||
STRING_osge0
|
||||
STRING_osma0
|
||||
STRING_osmanya0
|
||||
STRING_ougr0
|
||||
STRING_p0
|
||||
STRING_pahawhhmong0
|
||||
STRING_palm0
|
||||
STRING_palmyrene0
|
||||
STRING_patsyn0
|
||||
STRING_patternsyntax0
|
||||
STRING_patternwhitespace0
|
||||
STRING_patws0
|
||||
STRING_pauc0
|
||||
STRING_paucinhau0
|
||||
STRING_pc0
|
||||
STRING_pcm0
|
||||
STRING_pd0
|
||||
STRING_pe0
|
||||
STRING_perm0
|
||||
STRING_pf0
|
||||
STRING_phag0
|
||||
STRING_phagspa0
|
||||
STRING_phli0
|
||||
STRING_phlp0
|
||||
STRING_phnx0
|
||||
STRING_phoenician0
|
||||
STRING_pi0
|
||||
STRING_plrd0
|
||||
STRING_po0
|
||||
STRING_prependedconcatenationmark0
|
||||
STRING_prti0
|
||||
STRING_ps0
|
||||
STRING_psalterpahlavi0
|
||||
STRING_qaac0
|
||||
STRING_qaai0
|
||||
STRING_qmark0
|
||||
STRING_quotationmark0
|
||||
STRING_radical0
|
||||
STRING_regionalindicator0
|
||||
STRING_rejang0
|
||||
STRING_ri0
|
||||
STRING_rjng0
|
||||
STRING_rohg0
|
||||
STRING_runic0
|
||||
STRING_runr0
|
||||
STRING_s0
|
||||
STRING_samaritan0
|
||||
STRING_samr0
|
||||
STRING_sarb0
|
||||
STRING_saur0
|
||||
STRING_saurashtra0
|
||||
STRING_sc0
|
||||
STRING_sd0
|
||||
STRING_sentenceterminal0
|
||||
STRING_sgnw0
|
||||
STRING_sharada0
|
||||
STRING_shavian0
|
||||
STRING_shaw0
|
||||
STRING_shrd0
|
||||
STRING_sidd0
|
||||
STRING_siddham0
|
||||
STRING_signwriting0
|
||||
STRING_sind0
|
||||
STRING_sinh0
|
||||
STRING_sinhala0
|
||||
STRING_sk0
|
||||
STRING_sm0
|
||||
STRING_so0
|
||||
STRING_softdotted0
|
||||
STRING_sogd0
|
||||
STRING_sogdian0
|
||||
STRING_sogo0
|
||||
STRING_sora0
|
||||
STRING_sorasompeng0
|
||||
STRING_soyo0
|
||||
STRING_soyombo0
|
||||
STRING_space0
|
||||
STRING_sterm0
|
||||
STRING_sund0
|
||||
STRING_sundanese0
|
||||
STRING_sunu0
|
||||
STRING_sunuwar0
|
||||
STRING_sylo0
|
||||
STRING_sylotinagri0
|
||||
STRING_syrc0
|
||||
STRING_syriac0
|
||||
STRING_tagalog0
|
||||
STRING_tagb0
|
||||
STRING_tagbanwa0
|
||||
STRING_taile0
|
||||
STRING_taitham0
|
||||
STRING_taiviet0
|
||||
STRING_takr0
|
||||
STRING_takri0
|
||||
STRING_tale0
|
||||
STRING_talu0
|
||||
STRING_tamil0
|
||||
STRING_taml0
|
||||
STRING_tang0
|
||||
STRING_tangsa0
|
||||
STRING_tangut0
|
||||
STRING_tavt0
|
||||
STRING_telu0
|
||||
STRING_telugu0
|
||||
STRING_term0
|
||||
STRING_terminalpunctuation0
|
||||
STRING_tfng0
|
||||
STRING_tglg0
|
||||
STRING_thaa0
|
||||
STRING_thaana0
|
||||
STRING_thai0
|
||||
STRING_tibetan0
|
||||
STRING_tibt0
|
||||
STRING_tifinagh0
|
||||
STRING_tirh0
|
||||
STRING_tirhuta0
|
||||
STRING_tnsa0
|
||||
STRING_todhri0
|
||||
STRING_todr0
|
||||
STRING_toto0
|
||||
STRING_tulutigalari0
|
||||
STRING_tutg0
|
||||
STRING_ugar0
|
||||
STRING_ugaritic0
|
||||
STRING_uideo0
|
||||
STRING_unifiedideograph0
|
||||
STRING_unknown0
|
||||
STRING_upper0
|
||||
STRING_uppercase0
|
||||
STRING_vai0
|
||||
STRING_vaii0
|
||||
STRING_variationselector0
|
||||
STRING_vith0
|
||||
STRING_vithkuqi0
|
||||
STRING_vs0
|
||||
STRING_wancho0
|
||||
STRING_wara0
|
||||
STRING_warangciti0
|
||||
STRING_wcho0
|
||||
STRING_whitespace0
|
||||
STRING_wspace0
|
||||
STRING_xan0
|
||||
STRING_xidc0
|
||||
STRING_xidcontinue0
|
||||
STRING_xids0
|
||||
STRING_xidstart0
|
||||
STRING_xpeo0
|
||||
STRING_xps0
|
||||
STRING_xsp0
|
||||
STRING_xsux0
|
||||
STRING_xuc0
|
||||
STRING_xwd0
|
||||
STRING_yezi0
|
||||
STRING_yezidi0
|
||||
STRING_yi0
|
||||
STRING_yiii0
|
||||
STRING_z0
|
||||
STRING_zanabazarsquare0
|
||||
STRING_zanb0
|
||||
STRING_zinh0
|
||||
STRING_zl0
|
||||
STRING_zp0
|
||||
STRING_zs0
|
||||
STRING_zyyy0
|
||||
STRING_zzzz0;
|
||||
|
||||
const ucp_type_table PRIV(utt)[] = {
|
||||
{ 0, PT_SCX, ucp_Adlam },
|
||||
{ 6, PT_SCX, ucp_Adlam },
|
||||
{ 11, PT_SCX, ucp_Caucasian_Albanian },
|
||||
{ 16, PT_BOOL, ucp_ASCII_Hex_Digit },
|
||||
{ 21, PT_SC, ucp_Ahom },
|
||||
{ 26, PT_BOOL, ucp_Alphabetic },
|
||||
{ 32, PT_BOOL, ucp_Alphabetic },
|
||||
{ 43, PT_SC, ucp_Anatolian_Hieroglyphs },
|
||||
{ 64, PT_ANY, 0 },
|
||||
{ 68, PT_SCX, ucp_Arabic },
|
||||
{ 73, PT_SCX, ucp_Arabic },
|
||||
{ 80, PT_SCX, ucp_Armenian },
|
||||
{ 89, PT_SC, ucp_Imperial_Aramaic },
|
||||
{ 94, PT_SCX, ucp_Armenian },
|
||||
{ 99, PT_BOOL, ucp_ASCII },
|
||||
{ 105, PT_BOOL, ucp_ASCII_Hex_Digit },
|
||||
{ 119, PT_SCX, ucp_Avestan },
|
||||
{ 127, PT_SCX, ucp_Avestan },
|
||||
{ 132, PT_SC, ucp_Balinese },
|
||||
{ 137, PT_SC, ucp_Balinese },
|
||||
{ 146, PT_SC, ucp_Bamum },
|
||||
{ 151, PT_SC, ucp_Bamum },
|
||||
{ 157, PT_SC, ucp_Bassa_Vah },
|
||||
{ 162, PT_SC, ucp_Bassa_Vah },
|
||||
{ 171, PT_SC, ucp_Batak },
|
||||
{ 177, PT_SC, ucp_Batak },
|
||||
{ 182, PT_SCX, ucp_Bengali },
|
||||
{ 187, PT_SCX, ucp_Bengali },
|
||||
{ 195, PT_SC, ucp_Bhaiksuki },
|
||||
{ 205, PT_SC, ucp_Bhaiksuki },
|
||||
{ 210, PT_BIDICL, ucp_bidiAL },
|
||||
{ 217, PT_BIDICL, ucp_bidiAN },
|
||||
{ 224, PT_BIDICL, ucp_bidiB },
|
||||
{ 230, PT_BIDICL, ucp_bidiBN },
|
||||
{ 237, PT_BOOL, ucp_Bidi_Control },
|
||||
{ 243, PT_BOOL, ucp_Bidi_Control },
|
||||
{ 255, PT_BIDICL, ucp_bidiCS },
|
||||
{ 262, PT_BIDICL, ucp_bidiEN },
|
||||
{ 269, PT_BIDICL, ucp_bidiES },
|
||||
{ 276, PT_BIDICL, ucp_bidiET },
|
||||
{ 283, PT_BIDICL, ucp_bidiFSI },
|
||||
{ 291, PT_BIDICL, ucp_bidiL },
|
||||
{ 297, PT_BIDICL, ucp_bidiLRE },
|
||||
{ 305, PT_BIDICL, ucp_bidiLRI },
|
||||
{ 313, PT_BIDICL, ucp_bidiLRO },
|
||||
{ 321, PT_BOOL, ucp_Bidi_Mirrored },
|
||||
{ 327, PT_BOOL, ucp_Bidi_Mirrored },
|
||||
{ 340, PT_BIDICL, ucp_bidiNSM },
|
||||
{ 348, PT_BIDICL, ucp_bidiON },
|
||||
{ 355, PT_BIDICL, ucp_bidiPDF },
|
||||
{ 363, PT_BIDICL, ucp_bidiPDI },
|
||||
{ 371, PT_BIDICL, ucp_bidiR },
|
||||
{ 377, PT_BIDICL, ucp_bidiRLE },
|
||||
{ 385, PT_BIDICL, ucp_bidiRLI },
|
||||
{ 393, PT_BIDICL, ucp_bidiRLO },
|
||||
{ 401, PT_BIDICL, ucp_bidiS },
|
||||
{ 407, PT_BIDICL, ucp_bidiWS },
|
||||
{ 414, PT_SCX, ucp_Bopomofo },
|
||||
{ 419, PT_SCX, ucp_Bopomofo },
|
||||
{ 428, PT_SC, ucp_Brahmi },
|
||||
{ 433, PT_SC, ucp_Brahmi },
|
||||
{ 440, PT_SC, ucp_Braille },
|
||||
{ 445, PT_SC, ucp_Braille },
|
||||
{ 453, PT_SCX, ucp_Buginese },
|
||||
{ 458, PT_SCX, ucp_Buginese },
|
||||
{ 467, PT_SCX, ucp_Buhid },
|
||||
{ 472, PT_SCX, ucp_Buhid },
|
||||
{ 478, PT_GC, ucp_C },
|
||||
{ 480, PT_SCX, ucp_Chakma },
|
||||
{ 485, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 504, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 509, PT_SCX, ucp_Carian },
|
||||
{ 514, PT_SCX, ucp_Carian },
|
||||
{ 521, PT_BOOL, ucp_Cased },
|
||||
{ 527, PT_BOOL, ucp_Case_Ignorable },
|
||||
{ 541, PT_SCX, ucp_Caucasian_Albanian },
|
||||
{ 559, PT_PC, ucp_Cc },
|
||||
{ 562, PT_PC, ucp_Cf },
|
||||
{ 565, PT_SCX, ucp_Chakma },
|
||||
{ 572, PT_SC, ucp_Cham },
|
||||
{ 577, PT_BOOL, ucp_Changes_When_Casefolded },
|
||||
{ 599, PT_BOOL, ucp_Changes_When_Casemapped },
|
||||
{ 621, PT_BOOL, ucp_Changes_When_Lowercased },
|
||||
{ 643, PT_BOOL, ucp_Changes_When_Titlecased },
|
||||
{ 665, PT_BOOL, ucp_Changes_When_Uppercased },
|
||||
{ 687, PT_SCX, ucp_Cherokee },
|
||||
{ 692, PT_SCX, ucp_Cherokee },
|
||||
{ 701, PT_SC, ucp_Chorasmian },
|
||||
{ 712, PT_SC, ucp_Chorasmian },
|
||||
{ 717, PT_BOOL, ucp_Case_Ignorable },
|
||||
{ 720, PT_PC, ucp_Cn },
|
||||
{ 723, PT_PC, ucp_Co },
|
||||
{ 726, PT_SC, ucp_Common },
|
||||
{ 733, PT_SCX, ucp_Coptic },
|
||||
{ 738, PT_SCX, ucp_Coptic },
|
||||
{ 745, PT_SCX, ucp_Cypro_Minoan },
|
||||
{ 750, PT_SCX, ucp_Cypriot },
|
||||
{ 755, PT_PC, ucp_Cs },
|
||||
{ 758, PT_SC, ucp_Cuneiform },
|
||||
{ 768, PT_BOOL, ucp_Changes_When_Casefolded },
|
||||
{ 773, PT_BOOL, ucp_Changes_When_Casemapped },
|
||||
{ 778, PT_BOOL, ucp_Changes_When_Lowercased },
|
||||
{ 782, PT_BOOL, ucp_Changes_When_Titlecased },
|
||||
{ 786, PT_BOOL, ucp_Changes_When_Uppercased },
|
||||
{ 790, PT_SCX, ucp_Cypriot },
|
||||
{ 798, PT_SCX, ucp_Cypro_Minoan },
|
||||
{ 810, PT_SCX, ucp_Cyrillic },
|
||||
{ 819, PT_SCX, ucp_Cyrillic },
|
||||
{ 824, PT_BOOL, ucp_Dash },
|
||||
{ 829, PT_BOOL, ucp_Default_Ignorable_Code_Point },
|
||||
{ 855, PT_BOOL, ucp_Deprecated },
|
||||
{ 859, PT_BOOL, ucp_Deprecated },
|
||||
{ 870, PT_SC, ucp_Deseret },
|
||||
{ 878, PT_SCX, ucp_Devanagari },
|
||||
{ 883, PT_SCX, ucp_Devanagari },
|
||||
{ 894, PT_BOOL, ucp_Default_Ignorable_Code_Point },
|
||||
{ 897, PT_BOOL, ucp_Diacritic },
|
||||
{ 901, PT_BOOL, ucp_Diacritic },
|
||||
{ 911, PT_SC, ucp_Dives_Akuru },
|
||||
{ 916, PT_SC, ucp_Dives_Akuru },
|
||||
{ 927, PT_SCX, ucp_Dogra },
|
||||
{ 932, PT_SCX, ucp_Dogra },
|
||||
{ 938, PT_SC, ucp_Deseret },
|
||||
{ 943, PT_SCX, ucp_Duployan },
|
||||
{ 948, PT_SCX, ucp_Duployan },
|
||||
{ 957, PT_BOOL, ucp_Emoji_Modifier_Base },
|
||||
{ 963, PT_BOOL, ucp_Emoji_Component },
|
||||
{ 969, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||
{ 974, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||
{ 994, PT_SCX, ucp_Elbasan },
|
||||
{ 999, PT_SCX, ucp_Elbasan },
|
||||
{ 1007, PT_SC, ucp_Elymaic },
|
||||
{ 1012, PT_SC, ucp_Elymaic },
|
||||
{ 1020, PT_BOOL, ucp_Emoji_Modifier },
|
||||
{ 1025, PT_BOOL, ucp_Emoji },
|
||||
{ 1031, PT_BOOL, ucp_Emoji_Component },
|
||||
{ 1046, PT_BOOL, ucp_Emoji_Modifier },
|
||||
{ 1060, PT_BOOL, ucp_Emoji_Modifier_Base },
|
||||
{ 1078, PT_BOOL, ucp_Emoji_Presentation },
|
||||
{ 1096, PT_BOOL, ucp_Emoji_Presentation },
|
||||
{ 1102, PT_SCX, ucp_Ethiopic },
|
||||
{ 1107, PT_SCX, ucp_Ethiopic },
|
||||
{ 1116, PT_BOOL, ucp_Extender },
|
||||
{ 1120, PT_BOOL, ucp_Extended_Pictographic },
|
||||
{ 1141, PT_BOOL, ucp_Extender },
|
||||
{ 1150, PT_BOOL, ucp_Extended_Pictographic },
|
||||
{ 1158, PT_SCX, ucp_Garay },
|
||||
{ 1163, PT_SCX, ucp_Garay },
|
||||
{ 1169, PT_SCX, ucp_Georgian },
|
||||
{ 1174, PT_SCX, ucp_Georgian },
|
||||
{ 1183, PT_SCX, ucp_Glagolitic },
|
||||
{ 1188, PT_SCX, ucp_Glagolitic },
|
||||
{ 1199, PT_SCX, ucp_Gunjala_Gondi },
|
||||
{ 1204, PT_SCX, ucp_Masaram_Gondi },
|
||||
{ 1209, PT_SCX, ucp_Gothic },
|
||||
{ 1214, PT_SCX, ucp_Gothic },
|
||||
{ 1221, PT_SCX, ucp_Grantha },
|
||||
{ 1226, PT_SCX, ucp_Grantha },
|
||||
{ 1234, PT_BOOL, ucp_Grapheme_Base },
|
||||
{ 1247, PT_BOOL, ucp_Grapheme_Extend },
|
||||
{ 1262, PT_BOOL, ucp_Grapheme_Link },
|
||||
{ 1275, PT_BOOL, ucp_Grapheme_Base },
|
||||
{ 1282, PT_SCX, ucp_Greek },
|
||||
{ 1288, PT_SCX, ucp_Greek },
|
||||
{ 1293, PT_BOOL, ucp_Grapheme_Extend },
|
||||
{ 1299, PT_BOOL, ucp_Grapheme_Link },
|
||||
{ 1306, PT_SCX, ucp_Gujarati },
|
||||
{ 1315, PT_SCX, ucp_Gujarati },
|
||||
{ 1320, PT_SCX, ucp_Gurung_Khema },
|
||||
{ 1325, PT_SCX, ucp_Gunjala_Gondi },
|
||||
{ 1338, PT_SCX, ucp_Gurmukhi },
|
||||
{ 1347, PT_SCX, ucp_Gurmukhi },
|
||||
{ 1352, PT_SCX, ucp_Gurung_Khema },
|
||||
{ 1364, PT_SCX, ucp_Han },
|
||||
{ 1368, PT_SCX, ucp_Hangul },
|
||||
{ 1373, PT_SCX, ucp_Hangul },
|
||||
{ 1380, PT_SCX, ucp_Han },
|
||||
{ 1385, PT_SCX, ucp_Hanifi_Rohingya },
|
||||
{ 1400, PT_SCX, ucp_Hanunoo },
|
||||
{ 1405, PT_SCX, ucp_Hanunoo },
|
||||
{ 1413, PT_SC, ucp_Hatran },
|
||||
{ 1418, PT_SC, ucp_Hatran },
|
||||
{ 1425, PT_SCX, ucp_Hebrew },
|
||||
{ 1430, PT_SCX, ucp_Hebrew },
|
||||
{ 1437, PT_BOOL, ucp_Hex_Digit },
|
||||
{ 1441, PT_BOOL, ucp_Hex_Digit },
|
||||
{ 1450, PT_SCX, ucp_Hiragana },
|
||||
{ 1455, PT_SCX, ucp_Hiragana },
|
||||
{ 1464, PT_SC, ucp_Anatolian_Hieroglyphs },
|
||||
{ 1469, PT_SC, ucp_Pahawh_Hmong },
|
||||
{ 1474, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
|
||||
{ 1479, PT_SCX, ucp_Old_Hungarian },
|
||||
{ 1484, PT_BOOL, ucp_ID_Continue },
|
||||
{ 1488, PT_BOOL, ucp_ID_Compat_Math_Continue },
|
||||
{ 1509, PT_BOOL, ucp_ID_Compat_Math_Start },
|
||||
{ 1527, PT_BOOL, ucp_ID_Continue },
|
||||
{ 1538, PT_BOOL, ucp_Ideographic },
|
||||
{ 1543, PT_BOOL, ucp_Ideographic },
|
||||
{ 1555, PT_BOOL, ucp_ID_Start },
|
||||
{ 1559, PT_BOOL, ucp_IDS_Binary_Operator },
|
||||
{ 1564, PT_BOOL, ucp_IDS_Binary_Operator },
|
||||
{ 1582, PT_BOOL, ucp_IDS_Trinary_Operator },
|
||||
{ 1587, PT_BOOL, ucp_ID_Start },
|
||||
{ 1595, PT_BOOL, ucp_IDS_Trinary_Operator },
|
||||
{ 1614, PT_BOOL, ucp_IDS_Unary_Operator },
|
||||
{ 1619, PT_BOOL, ucp_IDS_Unary_Operator },
|
||||
{ 1636, PT_SC, ucp_Imperial_Aramaic },
|
||||
{ 1652, PT_BOOL, ucp_InCB },
|
||||
{ 1657, PT_SC, ucp_Inherited },
|
||||
{ 1667, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||
{ 1688, PT_SC, ucp_Inscriptional_Parthian },
|
||||
{ 1710, PT_SC, ucp_Old_Italic },
|
||||
{ 1715, PT_SCX, ucp_Javanese },
|
||||
{ 1720, PT_SCX, ucp_Javanese },
|
||||
{ 1729, PT_BOOL, ucp_Join_Control },
|
||||
{ 1735, PT_BOOL, ucp_Join_Control },
|
||||
{ 1747, PT_SCX, ucp_Kaithi },
|
||||
{ 1754, PT_SCX, ucp_Kayah_Li },
|
||||
{ 1759, PT_SCX, ucp_Katakana },
|
||||
{ 1764, PT_SCX, ucp_Kannada },
|
||||
{ 1772, PT_SCX, ucp_Katakana },
|
||||
{ 1781, PT_SC, ucp_Kawi },
|
||||
{ 1786, PT_SCX, ucp_Kayah_Li },
|
||||
{ 1794, PT_SC, ucp_Kharoshthi },
|
||||
{ 1799, PT_SC, ucp_Kharoshthi },
|
||||
{ 1810, PT_SC, ucp_Khitan_Small_Script },
|
||||
{ 1828, PT_SC, ucp_Khmer },
|
||||
{ 1834, PT_SC, ucp_Khmer },
|
||||
{ 1839, PT_SCX, ucp_Khojki },
|
||||
{ 1844, PT_SCX, ucp_Khojki },
|
||||
{ 1851, PT_SCX, ucp_Khudawadi },
|
||||
{ 1861, PT_SC, ucp_Kirat_Rai },
|
||||
{ 1870, PT_SC, ucp_Khitan_Small_Script },
|
||||
{ 1875, PT_SCX, ucp_Kannada },
|
||||
{ 1880, PT_SC, ucp_Kirat_Rai },
|
||||
{ 1885, PT_SCX, ucp_Kaithi },
|
||||
{ 1890, PT_GC, ucp_L },
|
||||
{ 1892, PT_LAMP, 0 },
|
||||
{ 1895, PT_SC, ucp_Tai_Tham },
|
||||
{ 1900, PT_SC, ucp_Lao },
|
||||
{ 1904, PT_SC, ucp_Lao },
|
||||
{ 1909, PT_SCX, ucp_Latin },
|
||||
{ 1915, PT_SCX, ucp_Latin },
|
||||
{ 1920, PT_LAMP, 0 },
|
||||
{ 1923, PT_SC, ucp_Lepcha },
|
||||
{ 1928, PT_SC, ucp_Lepcha },
|
||||
{ 1935, PT_SCX, ucp_Limbu },
|
||||
{ 1940, PT_SCX, ucp_Limbu },
|
||||
{ 1946, PT_SCX, ucp_Linear_A },
|
||||
{ 1951, PT_SCX, ucp_Linear_B },
|
||||
{ 1956, PT_SCX, ucp_Linear_A },
|
||||
{ 1964, PT_SCX, ucp_Linear_B },
|
||||
{ 1972, PT_SCX, ucp_Lisu },
|
||||
{ 1977, PT_PC, ucp_Ll },
|
||||
{ 1980, PT_PC, ucp_Lm },
|
||||
{ 1983, PT_PC, ucp_Lo },
|
||||
{ 1986, PT_BOOL, ucp_Logical_Order_Exception },
|
||||
{ 1990, PT_BOOL, ucp_Logical_Order_Exception },
|
||||
{ 2012, PT_BOOL, ucp_Lowercase },
|
||||
{ 2018, PT_BOOL, ucp_Lowercase },
|
||||
{ 2028, PT_PC, ucp_Lt },
|
||||
{ 2031, PT_PC, ucp_Lu },
|
||||
{ 2034, PT_SCX, ucp_Lycian },
|
||||
{ 2039, PT_SCX, ucp_Lycian },
|
||||
{ 2046, PT_SCX, ucp_Lydian },
|
||||
{ 2051, PT_SCX, ucp_Lydian },
|
||||
{ 2058, PT_GC, ucp_M },
|
||||
{ 2060, PT_SCX, ucp_Mahajani },
|
||||
{ 2069, PT_SCX, ucp_Mahajani },
|
||||
{ 2074, PT_SC, ucp_Makasar },
|
||||
{ 2079, PT_SC, ucp_Makasar },
|
||||
{ 2087, PT_SCX, ucp_Malayalam },
|
||||
{ 2097, PT_SCX, ucp_Mandaic },
|
||||
{ 2102, PT_SCX, ucp_Mandaic },
|
||||
{ 2110, PT_SCX, ucp_Manichaean },
|
||||
{ 2115, PT_SCX, ucp_Manichaean },
|
||||
{ 2126, PT_SC, ucp_Marchen },
|
||||
{ 2131, PT_SC, ucp_Marchen },
|
||||
{ 2139, PT_SCX, ucp_Masaram_Gondi },
|
||||
{ 2152, PT_BOOL, ucp_Math },
|
||||
{ 2157, PT_PC, ucp_Mc },
|
||||
{ 2160, PT_BOOL, ucp_Modifier_Combining_Mark },
|
||||
{ 2164, PT_PC, ucp_Me },
|
||||
{ 2167, PT_SC, ucp_Medefaidrin },
|
||||
{ 2179, PT_SC, ucp_Medefaidrin },
|
||||
{ 2184, PT_SC, ucp_Meetei_Mayek },
|
||||
{ 2196, PT_SC, ucp_Mende_Kikakui },
|
||||
{ 2201, PT_SC, ucp_Mende_Kikakui },
|
||||
{ 2214, PT_SC, ucp_Meroitic_Cursive },
|
||||
{ 2219, PT_SCX, ucp_Meroitic_Hieroglyphs },
|
||||
{ 2224, PT_SC, ucp_Meroitic_Cursive },
|
||||
{ 2240, PT_SCX, ucp_Meroitic_Hieroglyphs },
|
||||
{ 2260, PT_SC, ucp_Miao },
|
||||
{ 2265, PT_SCX, ucp_Malayalam },
|
||||
{ 2270, PT_PC, ucp_Mn },
|
||||
{ 2273, PT_SCX, ucp_Modi },
|
||||
{ 2278, PT_BOOL, ucp_Modifier_Combining_Mark },
|
||||
{ 2300, PT_SCX, ucp_Mongolian },
|
||||
{ 2305, PT_SCX, ucp_Mongolian },
|
||||
{ 2315, PT_SC, ucp_Mro },
|
||||
{ 2319, PT_SC, ucp_Mro },
|
||||
{ 2324, PT_SC, ucp_Meetei_Mayek },
|
||||
{ 2329, PT_SCX, ucp_Multani },
|
||||
{ 2334, PT_SCX, ucp_Multani },
|
||||
{ 2342, PT_SCX, ucp_Myanmar },
|
||||
{ 2350, PT_SCX, ucp_Myanmar },
|
||||
{ 2355, PT_GC, ucp_N },
|
||||
{ 2357, PT_SC, ucp_Nabataean },
|
||||
{ 2367, PT_SC, ucp_Nag_Mundari },
|
||||
{ 2372, PT_SC, ucp_Nag_Mundari },
|
||||
{ 2383, PT_SCX, ucp_Nandinagari },
|
||||
{ 2388, PT_SCX, ucp_Nandinagari },
|
||||
{ 2400, PT_SC, ucp_Old_North_Arabian },
|
||||
{ 2405, PT_SC, ucp_Nabataean },
|
||||
{ 2410, PT_BOOL, ucp_Noncharacter_Code_Point },
|
||||
{ 2416, PT_PC, ucp_Nd },
|
||||
{ 2419, PT_SC, ucp_Newa },
|
||||
{ 2424, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 2434, PT_SCX, ucp_Nko },
|
||||
{ 2438, PT_SCX, ucp_Nko },
|
||||
{ 2443, PT_PC, ucp_Nl },
|
||||
{ 2446, PT_PC, ucp_No },
|
||||
{ 2449, PT_BOOL, ucp_Noncharacter_Code_Point },
|
||||
{ 2471, PT_SC, ucp_Nushu },
|
||||
{ 2476, PT_SC, ucp_Nushu },
|
||||
{ 2482, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
|
||||
{ 2503, PT_SC, ucp_Ogham },
|
||||
{ 2508, PT_SC, ucp_Ogham },
|
||||
{ 2514, PT_SC, ucp_Ol_Chiki },
|
||||
{ 2522, PT_SC, ucp_Ol_Chiki },
|
||||
{ 2527, PT_SCX, ucp_Old_Hungarian },
|
||||
{ 2540, PT_SC, ucp_Old_Italic },
|
||||
{ 2550, PT_SC, ucp_Old_North_Arabian },
|
||||
{ 2566, PT_SCX, ucp_Old_Permic },
|
||||
{ 2576, PT_SC, ucp_Old_Persian },
|
||||
{ 2587, PT_SC, ucp_Old_Sogdian },
|
||||
{ 2598, PT_SC, ucp_Old_South_Arabian },
|
||||
{ 2614, PT_SCX, ucp_Old_Turkic },
|
||||
{ 2624, PT_SCX, ucp_Old_Uyghur },
|
||||
{ 2634, PT_SCX, ucp_Ol_Onal },
|
||||
{ 2641, PT_SCX, ucp_Ol_Onal },
|
||||
{ 2646, PT_SCX, ucp_Oriya },
|
||||
{ 2652, PT_SCX, ucp_Old_Turkic },
|
||||
{ 2657, PT_SCX, ucp_Oriya },
|
||||
{ 2662, PT_SCX, ucp_Osage },
|
||||
{ 2668, PT_SCX, ucp_Osage },
|
||||
{ 2673, PT_SC, ucp_Osmanya },
|
||||
{ 2678, PT_SC, ucp_Osmanya },
|
||||
{ 2686, PT_SCX, ucp_Old_Uyghur },
|
||||
{ 2691, PT_GC, ucp_P },
|
||||
{ 2693, PT_SC, ucp_Pahawh_Hmong },
|
||||
{ 2705, PT_SC, ucp_Palmyrene },
|
||||
{ 2710, PT_SC, ucp_Palmyrene },
|
||||
{ 2720, PT_BOOL, ucp_Pattern_Syntax },
|
||||
{ 2727, PT_BOOL, ucp_Pattern_Syntax },
|
||||
{ 2741, PT_BOOL, ucp_Pattern_White_Space },
|
||||
{ 2759, PT_BOOL, ucp_Pattern_White_Space },
|
||||
{ 2765, PT_SC, ucp_Pau_Cin_Hau },
|
||||
{ 2770, PT_SC, ucp_Pau_Cin_Hau },
|
||||
{ 2780, PT_PC, ucp_Pc },
|
||||
{ 2783, PT_BOOL, ucp_Prepended_Concatenation_Mark },
|
||||
{ 2787, PT_PC, ucp_Pd },
|
||||
{ 2790, PT_PC, ucp_Pe },
|
||||
{ 2793, PT_SCX, ucp_Old_Permic },
|
||||
{ 2798, PT_PC, ucp_Pf },
|
||||
{ 2801, PT_SCX, ucp_Phags_Pa },
|
||||
{ 2806, PT_SCX, ucp_Phags_Pa },
|
||||
{ 2814, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||
{ 2819, PT_SCX, ucp_Psalter_Pahlavi },
|
||||
{ 2824, PT_SC, ucp_Phoenician },
|
||||
{ 2829, PT_SC, ucp_Phoenician },
|
||||
{ 2840, PT_PC, ucp_Pi },
|
||||
{ 2843, PT_SC, ucp_Miao },
|
||||
{ 2848, PT_PC, ucp_Po },
|
||||
{ 2851, PT_BOOL, ucp_Prepended_Concatenation_Mark },
|
||||
{ 2878, PT_SC, ucp_Inscriptional_Parthian },
|
||||
{ 2883, PT_PC, ucp_Ps },
|
||||
{ 2886, PT_SCX, ucp_Psalter_Pahlavi },
|
||||
{ 2901, PT_SCX, ucp_Coptic },
|
||||
{ 2906, PT_SC, ucp_Inherited },
|
||||
{ 2911, PT_BOOL, ucp_Quotation_Mark },
|
||||
{ 2917, PT_BOOL, ucp_Quotation_Mark },
|
||||
{ 2931, PT_BOOL, ucp_Radical },
|
||||
{ 2939, PT_BOOL, ucp_Regional_Indicator },
|
||||
{ 2957, PT_SC, ucp_Rejang },
|
||||
{ 2964, PT_BOOL, ucp_Regional_Indicator },
|
||||
{ 2967, PT_SC, ucp_Rejang },
|
||||
{ 2972, PT_SCX, ucp_Hanifi_Rohingya },
|
||||
{ 2977, PT_SCX, ucp_Runic },
|
||||
{ 2983, PT_SCX, ucp_Runic },
|
||||
{ 2988, PT_GC, ucp_S },
|
||||
{ 2990, PT_SCX, ucp_Samaritan },
|
||||
{ 3000, PT_SCX, ucp_Samaritan },
|
||||
{ 3005, PT_SC, ucp_Old_South_Arabian },
|
||||
{ 3010, PT_SC, ucp_Saurashtra },
|
||||
{ 3015, PT_SC, ucp_Saurashtra },
|
||||
{ 3026, PT_PC, ucp_Sc },
|
||||
{ 3029, PT_BOOL, ucp_Soft_Dotted },
|
||||
{ 3032, PT_BOOL, ucp_Sentence_Terminal },
|
||||
{ 3049, PT_SC, ucp_SignWriting },
|
||||
{ 3054, PT_SCX, ucp_Sharada },
|
||||
{ 3062, PT_SCX, ucp_Shavian },
|
||||
{ 3070, PT_SCX, ucp_Shavian },
|
||||
{ 3075, PT_SCX, ucp_Sharada },
|
||||
{ 3080, PT_SC, ucp_Siddham },
|
||||
{ 3085, PT_SC, ucp_Siddham },
|
||||
{ 3093, PT_SC, ucp_SignWriting },
|
||||
{ 3105, PT_SCX, ucp_Khudawadi },
|
||||
{ 3110, PT_SCX, ucp_Sinhala },
|
||||
{ 3115, PT_SCX, ucp_Sinhala },
|
||||
{ 3123, PT_PC, ucp_Sk },
|
||||
{ 3126, PT_PC, ucp_Sm },
|
||||
{ 3129, PT_PC, ucp_So },
|
||||
{ 3132, PT_BOOL, ucp_Soft_Dotted },
|
||||
{ 3143, PT_SCX, ucp_Sogdian },
|
||||
{ 3148, PT_SCX, ucp_Sogdian },
|
||||
{ 3156, PT_SC, ucp_Old_Sogdian },
|
||||
{ 3161, PT_SC, ucp_Sora_Sompeng },
|
||||
{ 3166, PT_SC, ucp_Sora_Sompeng },
|
||||
{ 3178, PT_SC, ucp_Soyombo },
|
||||
{ 3183, PT_SC, ucp_Soyombo },
|
||||
{ 3191, PT_BOOL, ucp_White_Space },
|
||||
{ 3197, PT_BOOL, ucp_Sentence_Terminal },
|
||||
{ 3203, PT_SC, ucp_Sundanese },
|
||||
{ 3208, PT_SC, ucp_Sundanese },
|
||||
{ 3218, PT_SCX, ucp_Sunuwar },
|
||||
{ 3223, PT_SCX, ucp_Sunuwar },
|
||||
{ 3231, PT_SCX, ucp_Syloti_Nagri },
|
||||
{ 3236, PT_SCX, ucp_Syloti_Nagri },
|
||||
{ 3248, PT_SCX, ucp_Syriac },
|
||||
{ 3253, PT_SCX, ucp_Syriac },
|
||||
{ 3260, PT_SCX, ucp_Tagalog },
|
||||
{ 3268, PT_SCX, ucp_Tagbanwa },
|
||||
{ 3273, PT_SCX, ucp_Tagbanwa },
|
||||
{ 3282, PT_SCX, ucp_Tai_Le },
|
||||
{ 3288, PT_SC, ucp_Tai_Tham },
|
||||
{ 3296, PT_SC, ucp_Tai_Viet },
|
||||
{ 3304, PT_SCX, ucp_Takri },
|
||||
{ 3309, PT_SCX, ucp_Takri },
|
||||
{ 3315, PT_SCX, ucp_Tai_Le },
|
||||
{ 3320, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 3325, PT_SCX, ucp_Tamil },
|
||||
{ 3331, PT_SCX, ucp_Tamil },
|
||||
{ 3336, PT_SCX, ucp_Tangut },
|
||||
{ 3341, PT_SC, ucp_Tangsa },
|
||||
{ 3348, PT_SCX, ucp_Tangut },
|
||||
{ 3355, PT_SC, ucp_Tai_Viet },
|
||||
{ 3360, PT_SCX, ucp_Telugu },
|
||||
{ 3365, PT_SCX, ucp_Telugu },
|
||||
{ 3372, PT_BOOL, ucp_Terminal_Punctuation },
|
||||
{ 3377, PT_BOOL, ucp_Terminal_Punctuation },
|
||||
{ 3397, PT_SCX, ucp_Tifinagh },
|
||||
{ 3402, PT_SCX, ucp_Tagalog },
|
||||
{ 3407, PT_SCX, ucp_Thaana },
|
||||
{ 3412, PT_SCX, ucp_Thaana },
|
||||
{ 3419, PT_SCX, ucp_Thai },
|
||||
{ 3424, PT_SCX, ucp_Tibetan },
|
||||
{ 3432, PT_SCX, ucp_Tibetan },
|
||||
{ 3437, PT_SCX, ucp_Tifinagh },
|
||||
{ 3446, PT_SCX, ucp_Tirhuta },
|
||||
{ 3451, PT_SCX, ucp_Tirhuta },
|
||||
{ 3459, PT_SC, ucp_Tangsa },
|
||||
{ 3464, PT_SCX, ucp_Todhri },
|
||||
{ 3471, PT_SCX, ucp_Todhri },
|
||||
{ 3476, PT_SCX, ucp_Toto },
|
||||
{ 3481, PT_SCX, ucp_Tulu_Tigalari },
|
||||
{ 3494, PT_SCX, ucp_Tulu_Tigalari },
|
||||
{ 3499, PT_SC, ucp_Ugaritic },
|
||||
{ 3504, PT_SC, ucp_Ugaritic },
|
||||
{ 3513, PT_BOOL, ucp_Unified_Ideograph },
|
||||
{ 3519, PT_BOOL, ucp_Unified_Ideograph },
|
||||
{ 3536, PT_SC, ucp_Unknown },
|
||||
{ 3544, PT_BOOL, ucp_Uppercase },
|
||||
{ 3550, PT_BOOL, ucp_Uppercase },
|
||||
{ 3560, PT_SC, ucp_Vai },
|
||||
{ 3564, PT_SC, ucp_Vai },
|
||||
{ 3569, PT_BOOL, ucp_Variation_Selector },
|
||||
{ 3587, PT_SC, ucp_Vithkuqi },
|
||||
{ 3592, PT_SC, ucp_Vithkuqi },
|
||||
{ 3601, PT_BOOL, ucp_Variation_Selector },
|
||||
{ 3604, PT_SC, ucp_Wancho },
|
||||
{ 3611, PT_SC, ucp_Warang_Citi },
|
||||
{ 3616, PT_SC, ucp_Warang_Citi },
|
||||
{ 3627, PT_SC, ucp_Wancho },
|
||||
{ 3632, PT_BOOL, ucp_White_Space },
|
||||
{ 3643, PT_BOOL, ucp_White_Space },
|
||||
{ 3650, PT_ALNUM, 0 },
|
||||
{ 3654, PT_BOOL, ucp_XID_Continue },
|
||||
{ 3659, PT_BOOL, ucp_XID_Continue },
|
||||
{ 3671, PT_BOOL, ucp_XID_Start },
|
||||
{ 3676, PT_BOOL, ucp_XID_Start },
|
||||
{ 3685, PT_SC, ucp_Old_Persian },
|
||||
{ 3690, PT_PXSPACE, 0 },
|
||||
{ 3694, PT_SPACE, 0 },
|
||||
{ 3698, PT_SC, ucp_Cuneiform },
|
||||
{ 3703, PT_UCNC, 0 },
|
||||
{ 3707, PT_WORD, 0 },
|
||||
{ 3711, PT_SCX, ucp_Yezidi },
|
||||
{ 3716, PT_SCX, ucp_Yezidi },
|
||||
{ 3723, PT_SCX, ucp_Yi },
|
||||
{ 3726, PT_SCX, ucp_Yi },
|
||||
{ 3731, PT_GC, ucp_Z },
|
||||
{ 3733, PT_SC, ucp_Zanabazar_Square },
|
||||
{ 3749, PT_SC, ucp_Zanabazar_Square },
|
||||
{ 3754, PT_SC, ucp_Inherited },
|
||||
{ 3759, PT_PC, ucp_Zl },
|
||||
{ 3762, PT_PC, ucp_Zp },
|
||||
{ 3765, PT_PC, ucp_Zs },
|
||||
{ 3768, PT_SC, ucp_Common },
|
||||
{ 3773, PT_SC, ucp_Unknown }
|
||||
};
|
||||
|
||||
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
||||
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* End of pcre2_ucptables.c */
|
||||
@@ -1,132 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE2 is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef PCRE2_UTIL_H_IDEMPOTENT_GUARD
|
||||
#define PCRE2_UTIL_H_IDEMPOTENT_GUARD
|
||||
|
||||
/* Assertion macros */
|
||||
|
||||
#ifdef PCRE2_DEBUG
|
||||
|
||||
#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
|
||||
#include <assert.h>
|
||||
#endif
|
||||
|
||||
/* PCRE2_ASSERT(x) can be used to inject an assert() for conditions
|
||||
that the code below doesn't support. It is a NOP for non debug builds
|
||||
but in debug builds will print information about the location of the
|
||||
code where it triggered and crash.
|
||||
|
||||
It is meant to work like assert(), and therefore the expression used
|
||||
should indicate what the expected state is, and shouldn't have any
|
||||
side-effects. */
|
||||
|
||||
#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
|
||||
#define PCRE2_ASSERT(x) assert(x)
|
||||
#else
|
||||
#define PCRE2_ASSERT(x) do \
|
||||
{ \
|
||||
if (!(x)) \
|
||||
{ \
|
||||
fprintf(stderr, "Assertion failed at " __FILE__ ":%d\n", __LINE__); \
|
||||
abort(); \
|
||||
} \
|
||||
} while(0)
|
||||
#endif
|
||||
|
||||
/* PCRE2_UNREACHABLE() can be used to mark locations on the code that
|
||||
shouldn't be reached. In non debug builds is defined as a hint for
|
||||
the compiler to eliminate any code after it, so it is useful also for
|
||||
performance reasons, but should be used with care because if it is
|
||||
ever reached will trigger Undefined Behaviour and if you are lucky a
|
||||
crash. In debug builds it will report the location where it was triggered
|
||||
and crash. One important point to consider when using this macro, is
|
||||
that it is only implemented for a few compilers, and therefore can't
|
||||
be relied on to always be active either, so if it is followed by some
|
||||
code it is important to make sure that the whole thing is safe to
|
||||
use even if the macro is not there (ex: make sure there is a `break`
|
||||
after it if used at the end of a `case`) and to test your code also
|
||||
with a configuration where the macro will be a NOP. */
|
||||
|
||||
#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
|
||||
#define PCRE2_UNREACHABLE() \
|
||||
assert(((void)"Execution reached unexpected point", 0))
|
||||
#else
|
||||
#define PCRE2_UNREACHABLE() do \
|
||||
{ \
|
||||
fprintf(stderr, "Execution reached unexpected point at " __FILE__ \
|
||||
":%d\n", __LINE__); \
|
||||
abort(); \
|
||||
} while(0)
|
||||
#endif
|
||||
|
||||
/* PCRE2_DEBUG_UNREACHABLE() is a debug only version of the previous
|
||||
macro. It is meant to be used in places where the code is handling
|
||||
an error situation in code that shouldn't be reached, but that has
|
||||
some sort of fallback code to normally handle the error. When in
|
||||
doubt you should use this instead of the previous macro. Like in
|
||||
the previous case, it is a good idea to document as much as possible
|
||||
the reason and the actions that should be taken if it ever triggers. */
|
||||
|
||||
#define PCRE2_DEBUG_UNREACHABLE() PCRE2_UNREACHABLE()
|
||||
|
||||
#endif /* PCRE2_DEBUG */
|
||||
|
||||
#ifndef PCRE2_DEBUG_UNREACHABLE
|
||||
#define PCRE2_DEBUG_UNREACHABLE() do {} while(0)
|
||||
#endif
|
||||
|
||||
#ifndef PCRE2_UNREACHABLE
|
||||
#ifdef HAVE_BUILTIN_UNREACHABLE
|
||||
#define PCRE2_UNREACHABLE() __builtin_unreachable()
|
||||
#elif defined(HAVE_BUILTIN_ASSUME)
|
||||
#define PCRE2_UNREACHABLE() __assume(0)
|
||||
#else
|
||||
#define PCRE2_UNREACHABLE() do {} while(0)
|
||||
#endif
|
||||
#endif /* !PCRE2_UNREACHABLE */
|
||||
|
||||
#ifndef PCRE2_ASSERT
|
||||
#define PCRE2_ASSERT(x) do {} while(0)
|
||||
#endif
|
||||
|
||||
#endif /* PCRE2_UTIL_H_IDEMPOTENT_GUARD */
|
||||
|
||||
/* End of pcre2_util.h */
|
||||
@@ -1,398 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function for validating UTF character
|
||||
strings. This file is also #included by the pcre2test program, which uses
|
||||
macros to change names from _pcre2_xxx to xxxx, thereby avoiding name clashes
|
||||
with the library. In this case, PCRE2_PCRE2TEST is defined. */
|
||||
|
||||
#ifndef PCRE2_PCRE2TEST /* We're compiling the library */
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
#include "pcre2_internal.h"
|
||||
#endif /* PCRE2_PCRE2TEST */
|
||||
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
/*************************************************
|
||||
* Dummy function when Unicode is not supported *
|
||||
*************************************************/
|
||||
|
||||
/* This function should never be called when Unicode is not supported. */
|
||||
|
||||
int
|
||||
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
|
||||
{
|
||||
(void)string;
|
||||
(void)length;
|
||||
(void)erroroffset;
|
||||
return 0;
|
||||
}
|
||||
#else /* UTF is supported */
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Validate a UTF string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called (optionally) at the start of compile or match, to
|
||||
check that a supposed UTF string is actually valid. The early check means
|
||||
that subsequent code can assume it is dealing with a valid string. The check
|
||||
can be turned off for maximum performance, but the consequences of supplying an
|
||||
invalid string are then undefined.
|
||||
|
||||
Arguments:
|
||||
string points to the string
|
||||
length length of string
|
||||
errp pointer to an error position offset variable
|
||||
|
||||
Returns: == 0 if the string is a valid UTF string
|
||||
!= 0 otherwise, setting the offset of the bad character
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
|
||||
{
|
||||
PCRE2_SPTR p;
|
||||
uint32_t c;
|
||||
|
||||
/* ----------------- Check a UTF-8 string ----------------- */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
|
||||
/* Originally, this function checked according to RFC 2279, allowing for values
|
||||
in the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were
|
||||
in the canonical format. Once somebody had pointed out RFC 3629 to me (it
|
||||
obsoletes 2279), additional restrictions were applied. The values are now
|
||||
limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
|
||||
subrange 0xd000 to 0xdfff is excluded. However, the format of 5-byte and 6-byte
|
||||
characters is still checked. Error returns are as follows:
|
||||
|
||||
PCRE2_ERROR_UTF8_ERR1 Missing 1 byte at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR2 Missing 2 bytes at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR3 Missing 3 bytes at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR4 Missing 4 bytes at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR5 Missing 5 bytes at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR6 2nd-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR7 3rd-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR8 4th-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR9 5th-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR10 6th-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR11 5-byte character is not permitted by RFC 3629
|
||||
PCRE2_ERROR_UTF8_ERR12 6-byte character is not permitted by RFC 3629
|
||||
PCRE2_ERROR_UTF8_ERR13 4-byte character with value > 0x10ffff is not permitted
|
||||
PCRE2_ERROR_UTF8_ERR14 3-byte character with value 0xd800-0xdfff is not permitted
|
||||
PCRE2_ERROR_UTF8_ERR15 Overlong 2-byte sequence
|
||||
PCRE2_ERROR_UTF8_ERR16 Overlong 3-byte sequence
|
||||
PCRE2_ERROR_UTF8_ERR17 Overlong 4-byte sequence
|
||||
PCRE2_ERROR_UTF8_ERR18 Overlong 5-byte sequence (won't ever occur)
|
||||
PCRE2_ERROR_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur)
|
||||
PCRE2_ERROR_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character)
|
||||
PCRE2_ERROR_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
|
||||
*/
|
||||
|
||||
for (p = string; length > 0; p++)
|
||||
{
|
||||
uint32_t ab, d;
|
||||
|
||||
c = *p;
|
||||
length--;
|
||||
|
||||
if (c < 128) continue; /* ASCII character */
|
||||
|
||||
if (c < 0xc0) /* Isolated 10xx xxxx byte */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string);
|
||||
return PCRE2_ERROR_UTF8_ERR20;
|
||||
}
|
||||
|
||||
if (c >= 0xfe) /* Invalid 0xfe or 0xff bytes */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string);
|
||||
return PCRE2_ERROR_UTF8_ERR21;
|
||||
}
|
||||
|
||||
ab = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes (1-5) */
|
||||
if (length < ab) /* Missing bytes */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string);
|
||||
switch(ab - length)
|
||||
{
|
||||
case 1: return PCRE2_ERROR_UTF8_ERR1;
|
||||
case 2: return PCRE2_ERROR_UTF8_ERR2;
|
||||
case 3: return PCRE2_ERROR_UTF8_ERR3;
|
||||
case 4: return PCRE2_ERROR_UTF8_ERR4;
|
||||
case 5: return PCRE2_ERROR_UTF8_ERR5;
|
||||
}
|
||||
}
|
||||
length -= ab; /* Length remaining */
|
||||
|
||||
/* Check top bits in the second byte */
|
||||
|
||||
if (((d = *(++p)) & 0xc0) != 0x80)
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 1;
|
||||
return PCRE2_ERROR_UTF8_ERR6;
|
||||
}
|
||||
|
||||
/* For each length, check that the remaining bytes start with the 0x80 bit
|
||||
set and not the 0x40 bit. Then check for an overlong sequence, and for the
|
||||
excluded range 0xd800 to 0xdfff. */
|
||||
|
||||
switch (ab)
|
||||
{
|
||||
/* 2-byte character. No further bytes to check for 0x80. Check first byte
|
||||
for for xx00 000x (overlong sequence). */
|
||||
|
||||
case 1: if ((c & 0x3e) == 0)
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 1;
|
||||
return PCRE2_ERROR_UTF8_ERR15;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 3-byte character. Check third byte for 0x80. Then check first 2 bytes
|
||||
for 1110 0000, xx0x xxxx (overlong sequence) or
|
||||
1110 1101, 1010 xxxx (0xd800 - 0xdfff) */
|
||||
|
||||
case 2:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR7;
|
||||
}
|
||||
if (c == 0xe0 && (d & 0x20) == 0)
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR16;
|
||||
}
|
||||
if (c == 0xed && d >= 0xa0)
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR14;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
|
||||
bytes for for 1111 0000, xx00 xxxx (overlong sequence), then check for a
|
||||
character greater than 0x0010ffff (f4 8f bf bf) */
|
||||
|
||||
case 3:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR7;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 3;
|
||||
return PCRE2_ERROR_UTF8_ERR8;
|
||||
}
|
||||
if (c == 0xf0 && (d & 0x30) == 0)
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 3;
|
||||
return PCRE2_ERROR_UTF8_ERR17;
|
||||
}
|
||||
if (c > 0xf4 || (c == 0xf4 && d > 0x8f))
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 3;
|
||||
return PCRE2_ERROR_UTF8_ERR13;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
|
||||
rejected by the length test below. However, we do the appropriate tests
|
||||
here so that overlong sequences get diagnosed, and also in case there is
|
||||
ever an option for handling these larger code points. */
|
||||
|
||||
/* 5-byte character. Check 3rd, 4th, and 5th bytes for 0x80. Then check for
|
||||
1111 1000, xx00 0xxx */
|
||||
|
||||
case 4:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR7;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 3;
|
||||
return PCRE2_ERROR_UTF8_ERR8;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 4;
|
||||
return PCRE2_ERROR_UTF8_ERR9;
|
||||
}
|
||||
if (c == 0xf8 && (d & 0x38) == 0)
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 4;
|
||||
return PCRE2_ERROR_UTF8_ERR18;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 6-byte character. Check 3rd-6th bytes for 0x80. Then check for
|
||||
1111 1100, xx00 00xx. */
|
||||
|
||||
case 5:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR7;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 3;
|
||||
return PCRE2_ERROR_UTF8_ERR8;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 4;
|
||||
return PCRE2_ERROR_UTF8_ERR9;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Sixth byte */
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 5;
|
||||
return PCRE2_ERROR_UTF8_ERR10;
|
||||
}
|
||||
if (c == 0xfc && (d & 0x3c) == 0)
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 5;
|
||||
return PCRE2_ERROR_UTF8_ERR19;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Character is valid under RFC 2279, but 4-byte and 5-byte characters are
|
||||
excluded by RFC 3629. The pointer p is currently at the last byte of the
|
||||
character. */
|
||||
|
||||
if (ab > 3)
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - ab;
|
||||
return (ab == 4)? PCRE2_ERROR_UTF8_ERR11 : PCRE2_ERROR_UTF8_ERR12;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
|
||||
/* ----------------- Check a UTF-16 string ----------------- */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
|
||||
/* There's not so much work, nor so many errors, for UTF-16.
|
||||
PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at the end of the string
|
||||
PCRE2_ERROR_UTF16_ERR2 Invalid low surrogate
|
||||
PCRE2_ERROR_UTF16_ERR3 Isolated low surrogate
|
||||
*/
|
||||
|
||||
for (p = string; length > 0; p++)
|
||||
{
|
||||
c = *p;
|
||||
length--;
|
||||
|
||||
if ((c & 0xf800) != 0xd800)
|
||||
{
|
||||
/* Normal UTF-16 code point. Neither high nor low surrogate. */
|
||||
}
|
||||
else if ((c & 0x0400) == 0)
|
||||
{
|
||||
/* High surrogate. Must be a followed by a low surrogate. */
|
||||
if (length == 0)
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string);
|
||||
return PCRE2_ERROR_UTF16_ERR1;
|
||||
}
|
||||
p++;
|
||||
length--;
|
||||
if ((*p & 0xfc00) != 0xdc00)
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string) - 1;
|
||||
return PCRE2_ERROR_UTF16_ERR2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Isolated low surrogate. Always an error. */
|
||||
*erroroffset = (PCRE2_SIZE)(p - string);
|
||||
return PCRE2_ERROR_UTF16_ERR3;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
|
||||
|
||||
/* ----------------- Check a UTF-32 string ----------------- */
|
||||
|
||||
#else
|
||||
|
||||
/* There is very little to do for a UTF-32 string.
|
||||
PCRE2_ERROR_UTF32_ERR1 Surrogate character
|
||||
PCRE2_ERROR_UTF32_ERR2 Character > 0x10ffff
|
||||
*/
|
||||
|
||||
for (p = string; length > 0; length--, p++)
|
||||
{
|
||||
c = *p;
|
||||
if ((c & 0xfffff800u) != 0xd800u)
|
||||
{
|
||||
/* Normal UTF-32 code point. Neither high nor low surrogate. */
|
||||
if (c > 0x10ffffu)
|
||||
{
|
||||
*erroroffset = (PCRE2_SIZE)(p - string);
|
||||
return PCRE2_ERROR_UTF32_ERR2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* A surrogate */
|
||||
*erroroffset = (PCRE2_SIZE)(p - string);
|
||||
return PCRE2_ERROR_UTF32_ERR1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
#endif /* CODE_UNIT_WIDTH */
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* End of pcre2_valid_utf.c */
|
||||
@@ -1,545 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains two internal functions that are used to match
|
||||
OP_XCLASS and OP_ECLASS. It is used by pcre2_auto_possessify() and by both
|
||||
pcre2_match() and pcre2_dfa_match(). */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
/*************************************************
|
||||
* Match character against an XCLASS *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called to match a character against an extended class that
|
||||
might contain codepoints above 255 and/or Unicode properties.
|
||||
|
||||
Arguments:
|
||||
c the character
|
||||
data points to the flag code unit of the XCLASS data
|
||||
utf TRUE if in UTF mode
|
||||
|
||||
Returns: TRUE if character matches, else FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(xclass)(uint32_t c, PCRE2_SPTR data, const uint8_t *char_lists_end, BOOL utf)
|
||||
{
|
||||
/* Update PRIV(update_classbits) when this function is changed. */
|
||||
PCRE2_UCHAR t;
|
||||
BOOL not_negated = (*data & XCL_NOT) == 0;
|
||||
uint32_t type, max_index, min_index, value;
|
||||
const uint8_t *next_char;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
/* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
|
||||
utf = TRUE;
|
||||
#endif
|
||||
|
||||
/* Code points < 256 are matched against a bitmap, if one is present. */
|
||||
|
||||
if ((*data++ & XCL_MAP) != 0)
|
||||
{
|
||||
if (c < 256)
|
||||
return (((const uint8_t *)data)[c/8] & (1u << (c&7))) != 0;
|
||||
/* Skip bitmap. */
|
||||
data += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
|
||||
/* Match against the list of Unicode properties. We won't ever
|
||||
encounter XCL_PROP or XCL_NOTPROP when UTF support is not compiled. */
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (*data == XCL_PROP || *data == XCL_NOTPROP)
|
||||
{
|
||||
/* The UCD record is the same for all properties. */
|
||||
const ucd_record *prop = GET_UCD(c);
|
||||
|
||||
do
|
||||
{
|
||||
int chartype;
|
||||
BOOL isprop = (*data++) == XCL_PROP;
|
||||
BOOL ok;
|
||||
|
||||
switch(*data)
|
||||
{
|
||||
case PT_LAMP:
|
||||
chartype = prop->chartype;
|
||||
if ((chartype == ucp_Lu || chartype == ucp_Ll ||
|
||||
chartype == ucp_Lt) == isprop) return not_negated;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop)
|
||||
return not_negated;
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
if ((data[1] == prop->chartype) == isprop) return not_negated;
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
if ((data[1] == prop->script) == isprop) return not_negated;
|
||||
break;
|
||||
|
||||
case PT_SCX:
|
||||
ok = (data[1] == prop->script ||
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), data[1]) != 0);
|
||||
if (ok == isprop) return not_negated;
|
||||
break;
|
||||
|
||||
case PT_ALNUM:
|
||||
chartype = prop->chartype;
|
||||
if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N) == isprop)
|
||||
return not_negated;
|
||||
break;
|
||||
|
||||
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||
which means that Perl space and POSIX space are now identical. PCRE
|
||||
was changed at release 8.34. */
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
if (isprop) return not_negated;
|
||||
break;
|
||||
|
||||
default:
|
||||
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == isprop)
|
||||
return not_negated;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
chartype = prop->chartype;
|
||||
if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||
chartype == ucp_Mn || chartype == ucp_Pc) == isprop)
|
||||
return not_negated;
|
||||
break;
|
||||
|
||||
case PT_UCNC:
|
||||
if (c < 0xa0)
|
||||
{
|
||||
if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
||||
c == CHAR_GRAVE_ACCENT) == isprop)
|
||||
return not_negated;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((c < 0xd800 || c > 0xdfff) == isprop)
|
||||
return not_negated;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
if ((UCD_BIDICLASS_PROP(prop) == data[1]) == isprop)
|
||||
return not_negated;
|
||||
break;
|
||||
|
||||
case PT_BOOL:
|
||||
ok = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||
UCD_BPROPS_PROP(prop), data[1]) != 0;
|
||||
if (ok == isprop) return not_negated;
|
||||
break;
|
||||
|
||||
/* The following three properties can occur only in an XCLASS, as there
|
||||
is no \p or \P coding for them. */
|
||||
|
||||
/* Graphic character. Implement this as not Z (space or separator) and
|
||||
not C (other), except for Cf (format) with a few exceptions. This seems
|
||||
to be what Perl does. The exceptional characters are:
|
||||
|
||||
U+061C Arabic Letter Mark
|
||||
U+180E Mongolian Vowel Separator
|
||||
U+2066 - U+2069 Various "isolate"s
|
||||
*/
|
||||
|
||||
case PT_PXGRAPH:
|
||||
chartype = prop->chartype;
|
||||
if ((PRIV(ucp_gentype)[chartype] != ucp_Z &&
|
||||
(PRIV(ucp_gentype)[chartype] != ucp_C ||
|
||||
(chartype == ucp_Cf &&
|
||||
c != 0x061c && c != 0x180e && (c < 0x2066 || c > 0x2069))
|
||||
)) == isprop)
|
||||
return not_negated;
|
||||
break;
|
||||
|
||||
/* Printable character: same as graphic, with the addition of Zs, i.e.
|
||||
not Zl and not Zp, and U+180E. */
|
||||
|
||||
case PT_PXPRINT:
|
||||
chartype = prop->chartype;
|
||||
if ((chartype != ucp_Zl &&
|
||||
chartype != ucp_Zp &&
|
||||
(PRIV(ucp_gentype)[chartype] != ucp_C ||
|
||||
(chartype == ucp_Cf &&
|
||||
c != 0x061c && (c < 0x2066 || c > 0x2069))
|
||||
)) == isprop)
|
||||
return not_negated;
|
||||
break;
|
||||
|
||||
/* Punctuation: all Unicode punctuation, plus ASCII characters that
|
||||
Unicode treats as symbols rather than punctuation, for Perl
|
||||
compatibility (these are $+<=>^`|~). */
|
||||
|
||||
case PT_PXPUNCT:
|
||||
chartype = prop->chartype;
|
||||
if ((PRIV(ucp_gentype)[chartype] == ucp_P ||
|
||||
(c < 128 && PRIV(ucp_gentype)[chartype] == ucp_S)) == isprop)
|
||||
return not_negated;
|
||||
break;
|
||||
|
||||
/* Perl has two sets of hex digits */
|
||||
|
||||
case PT_PXXDIGIT:
|
||||
if (((c >= CHAR_0 && c <= CHAR_9) ||
|
||||
(c >= CHAR_A && c <= CHAR_F) ||
|
||||
(c >= CHAR_a && c <= CHAR_f) ||
|
||||
(c >= 0xff10 && c <= 0xff19) || /* Fullwidth digits */
|
||||
(c >= 0xff21 && c <= 0xff26) || /* Fullwidth letters */
|
||||
(c >= 0xff41 && c <= 0xff46)) == isprop)
|
||||
return not_negated;
|
||||
break;
|
||||
|
||||
/* This should never occur, but compilers may mutter if there is no
|
||||
default. */
|
||||
|
||||
default:
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
data += 2;
|
||||
}
|
||||
while (*data == XCL_PROP || *data == XCL_NOTPROP);
|
||||
}
|
||||
#else
|
||||
(void)utf; /* Avoid compiler warning */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Match against large chars or ranges that end with a large char. */
|
||||
if (*data < XCL_LIST)
|
||||
{
|
||||
while ((t = *data++) != XCL_END)
|
||||
{
|
||||
uint32_t x, y;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
GETCHARINC(x, data); /* macro generates multiple statements */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
x = *data++;
|
||||
|
||||
if (t == XCL_SINGLE)
|
||||
{
|
||||
/* Since character ranges follow the properties, and they are
|
||||
sorted, early return is possible for all characters <= x. */
|
||||
if (c <= x) return (c == x) ? not_negated : !not_negated;
|
||||
continue;
|
||||
}
|
||||
|
||||
PCRE2_ASSERT(t == XCL_RANGE);
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
GETCHARINC(y, data); /* macro generates multiple statements */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
y = *data++;
|
||||
|
||||
/* Since character ranges follow the properties, and they are
|
||||
sorted, early return is possible for all characters <= y. */
|
||||
if (c <= y) return (c >= x) ? not_negated : !not_negated;
|
||||
}
|
||||
|
||||
return !not_negated; /* char did not match */
|
||||
}
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
type = (uint32_t)(data[0] << 8) | data[1];
|
||||
data += 2;
|
||||
#else
|
||||
type = data[0];
|
||||
data++;
|
||||
#endif /* CODE_UNIT_WIDTH */
|
||||
|
||||
/* Align characters. */
|
||||
next_char = char_lists_end - (GET(data, 0) << 1);
|
||||
type &= XCL_TYPE_MASK;
|
||||
|
||||
/* Alignment check. */
|
||||
PCRE2_ASSERT(((uintptr_t)next_char & 0x1) == 0);
|
||||
|
||||
if (c >= XCL_CHAR_LIST_HIGH_16_START)
|
||||
{
|
||||
max_index = type & XCL_ITEM_COUNT_MASK;
|
||||
if (max_index == XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
max_index = *(const uint16_t*)next_char;
|
||||
PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK);
|
||||
next_char += 2;
|
||||
}
|
||||
|
||||
next_char += max_index << 1;
|
||||
type >>= XCL_TYPE_BIT_LEN;
|
||||
}
|
||||
|
||||
if (c < XCL_CHAR_LIST_LOW_32_START)
|
||||
{
|
||||
max_index = type & XCL_ITEM_COUNT_MASK;
|
||||
|
||||
c = (uint16_t)((c << XCL_CHAR_SHIFT) | XCL_CHAR_END);
|
||||
|
||||
if (max_index == XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
max_index = *(const uint16_t*)next_char;
|
||||
PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK);
|
||||
next_char += 2;
|
||||
}
|
||||
|
||||
if (max_index == 0 || c < *(const uint16_t*)next_char)
|
||||
return ((type & XCL_BEGIN_WITH_RANGE) != 0) == not_negated;
|
||||
|
||||
min_index = 0;
|
||||
value = ((const uint16_t*)next_char)[--max_index];
|
||||
if (c >= value)
|
||||
return (value == c || (value & XCL_CHAR_END) == 0) == not_negated;
|
||||
|
||||
max_index--;
|
||||
|
||||
/* Binary search of a range. */
|
||||
while (TRUE)
|
||||
{
|
||||
uint32_t mid_index = (min_index + max_index) >> 1;
|
||||
value = ((const uint16_t*)next_char)[mid_index];
|
||||
|
||||
if (c < value)
|
||||
max_index = mid_index - 1;
|
||||
else if (((const uint16_t*)next_char)[mid_index + 1] <= c)
|
||||
min_index = mid_index + 1;
|
||||
else
|
||||
return (value == c || (value & XCL_CHAR_END) == 0) == not_negated;
|
||||
}
|
||||
}
|
||||
|
||||
/* Skip the 16 bit ranges. */
|
||||
max_index = type & XCL_ITEM_COUNT_MASK;
|
||||
if (max_index == XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
max_index = *(const uint16_t*)next_char;
|
||||
PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK);
|
||||
next_char += 2;
|
||||
}
|
||||
|
||||
next_char += (max_index << 1);
|
||||
type >>= XCL_TYPE_BIT_LEN;
|
||||
|
||||
/* Alignment check. */
|
||||
PCRE2_ASSERT(((uintptr_t)next_char & 0x3) == 0);
|
||||
|
||||
max_index = type & XCL_ITEM_COUNT_MASK;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (c >= XCL_CHAR_LIST_HIGH_32_START)
|
||||
{
|
||||
if (max_index == XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
max_index = *(const uint32_t*)next_char;
|
||||
PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK);
|
||||
next_char += 4;
|
||||
}
|
||||
|
||||
next_char += max_index << 2;
|
||||
type >>= XCL_TYPE_BIT_LEN;
|
||||
max_index = type & XCL_ITEM_COUNT_MASK;
|
||||
}
|
||||
#endif
|
||||
|
||||
c = (uint32_t)((c << XCL_CHAR_SHIFT) | XCL_CHAR_END);
|
||||
|
||||
if (max_index == XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
max_index = *(const uint32_t*)next_char;
|
||||
next_char += 4;
|
||||
}
|
||||
|
||||
if (max_index == 0 || c < *(const uint32_t*)next_char)
|
||||
return ((type & XCL_BEGIN_WITH_RANGE) != 0) == not_negated;
|
||||
|
||||
min_index = 0;
|
||||
value = ((const uint32_t*)next_char)[--max_index];
|
||||
if (c >= value)
|
||||
return (value == c || (value & XCL_CHAR_END) == 0) == not_negated;
|
||||
|
||||
max_index--;
|
||||
|
||||
/* Binary search of a range. */
|
||||
while (TRUE)
|
||||
{
|
||||
uint32_t mid_index = (min_index + max_index) >> 1;
|
||||
value = ((const uint32_t*)next_char)[mid_index];
|
||||
|
||||
if (c < value)
|
||||
max_index = mid_index - 1;
|
||||
else if (((const uint32_t*)next_char)[mid_index + 1] <= c)
|
||||
min_index = mid_index + 1;
|
||||
else
|
||||
return (value == c || (value & XCL_CHAR_END) == 0) == not_negated;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match character against an ECLASS *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called to match a character against an extended class
|
||||
used for describing characters using boolean operations on sets.
|
||||
|
||||
Arguments:
|
||||
c the character
|
||||
data_start points to the start of the ECLASS data
|
||||
data_end points one-past-the-last of the ECLASS data
|
||||
utf TRUE if in UTF mode
|
||||
|
||||
Returns: TRUE if character matches, else FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(eclass)(uint32_t c, PCRE2_SPTR data_start, PCRE2_SPTR data_end,
|
||||
const uint8_t *char_lists_end, BOOL utf)
|
||||
{
|
||||
PCRE2_SPTR ptr = data_start;
|
||||
PCRE2_UCHAR flags;
|
||||
uint32_t stack = 0;
|
||||
int stack_depth = 0;
|
||||
|
||||
PCRE2_ASSERT(data_start < data_end);
|
||||
flags = *ptr++;
|
||||
PCRE2_ASSERT((flags & ECL_MAP) == 0 ||
|
||||
(data_end - ptr) >= 32 / (int)sizeof(PCRE2_UCHAR));
|
||||
|
||||
/* Code points < 256 are matched against a bitmap, if one is present.
|
||||
Otherwise all codepoints are checked later. */
|
||||
|
||||
if ((flags & ECL_MAP) != 0)
|
||||
{
|
||||
if (c < 256)
|
||||
return (((const uint8_t *)ptr)[c/8] & (1u << (c&7))) != 0;
|
||||
|
||||
/* Skip the bitmap. */
|
||||
ptr += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
|
||||
/* Do a little loop, until we reach the end of the ECLASS. */
|
||||
while (ptr < data_end)
|
||||
{
|
||||
switch (*ptr)
|
||||
{
|
||||
case ECL_AND:
|
||||
++ptr;
|
||||
stack = (stack >> 1) & (stack | ~(uint32_t)1u);
|
||||
PCRE2_ASSERT(stack_depth >= 2);
|
||||
--stack_depth;
|
||||
break;
|
||||
|
||||
case ECL_OR:
|
||||
++ptr;
|
||||
stack = (stack >> 1) | (stack & (uint32_t)1u);
|
||||
PCRE2_ASSERT(stack_depth >= 2);
|
||||
--stack_depth;
|
||||
break;
|
||||
|
||||
case ECL_XOR:
|
||||
++ptr;
|
||||
stack = (stack >> 1) ^ (stack & (uint32_t)1u);
|
||||
PCRE2_ASSERT(stack_depth >= 2);
|
||||
--stack_depth;
|
||||
break;
|
||||
|
||||
case ECL_NOT:
|
||||
++ptr;
|
||||
stack ^= (uint32_t)1u;
|
||||
PCRE2_ASSERT(stack_depth >= 1);
|
||||
break;
|
||||
|
||||
case ECL_XCLASS:
|
||||
{
|
||||
uint32_t matched = PRIV(xclass)(c, ptr + 1 + LINK_SIZE, char_lists_end, utf);
|
||||
|
||||
ptr += GET(ptr, 1);
|
||||
stack = (stack << 1) | matched;
|
||||
++stack_depth;
|
||||
break;
|
||||
}
|
||||
|
||||
/* This should never occur, but compilers may mutter if there is no
|
||||
default. */
|
||||
|
||||
default:
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
PCRE2_ASSERT(stack_depth == 1);
|
||||
(void)stack_depth; /* Ignore unused variable, if assertions are disabled. */
|
||||
|
||||
/* The final bit left on the stack now holds the match result. */
|
||||
return (stack & 1u) != 0;
|
||||
}
|
||||
|
||||
/* End of pcre2_xclass.c */
|
||||
@@ -1,497 +0,0 @@
|
||||
/*************************************************
|
||||
* PCRE2 DEMONSTRATION PROGRAM *
|
||||
*************************************************/
|
||||
|
||||
/* This is a demonstration program to illustrate a straightforward way of
|
||||
using the PCRE2 regular expression library from a C program. See the
|
||||
pcre2sample documentation for a short discussion ("man pcre2sample" if you have
|
||||
the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
|
||||
incompatible with the original PCRE API.
|
||||
|
||||
There are actually three libraries, each supporting a different code unit
|
||||
width. This demonstration program uses the 8-bit library. The default is to
|
||||
process each code unit as a separate character, but if the pattern begins with
|
||||
"(*UTF)", both it and the subject are treated as UTF-8 strings, where
|
||||
characters may occupy multiple code units.
|
||||
|
||||
In Unix-like environments, if PCRE2 is installed in your standard system
|
||||
libraries, you should be able to compile this program using this command:
|
||||
|
||||
cc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo
|
||||
|
||||
If PCRE2 is not installed in a standard place, it is likely to be installed
|
||||
with support for the pkg-config mechanism. If you have pkg-config, you can
|
||||
compile this program using this command:
|
||||
|
||||
cc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo
|
||||
|
||||
If you do not have pkg-config, you may have to use something like this:
|
||||
|
||||
cc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \
|
||||
-R/usr/local/lib -lpcre2-8 -o pcre2demo
|
||||
|
||||
Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
|
||||
library files for PCRE2 are installed on your system. Only some operating
|
||||
systems (Solaris is one) use the -R option.
|
||||
|
||||
Building under Windows:
|
||||
|
||||
If you want to statically link this program against a non-dll .a file, you must
|
||||
define PCRE2_STATIC before including pcre2.h, so in this environment, uncomment
|
||||
the following line. */
|
||||
|
||||
/* #define PCRE2_STATIC */
|
||||
|
||||
/* The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h.
|
||||
For a program that uses only one code unit width, setting it to 8, 16, or 32
|
||||
makes it possible to use generic function names such as pcre2_compile(). Note
|
||||
that just changing 8 to 16 (for example) is not sufficient to convert this
|
||||
program to process 16-bit characters. Even in a fully 16-bit environment, where
|
||||
string-handling functions such as strcmp() and printf() work with 16-bit
|
||||
characters, the code for handling the table of named substrings will still need
|
||||
to be modified. */
|
||||
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <pcre2.h>
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
* Here is the program. The API includes the concept of "contexts" for *
|
||||
* setting up unusual interface requirements for compiling and matching, *
|
||||
* such as custom memory managers and non-standard newline definitions. *
|
||||
* This program does not do any of this, so it makes no use of contexts, *
|
||||
* always passing NULL where a context could be given. *
|
||||
**************************************************************************/
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
pcre2_code *re;
|
||||
PCRE2_SPTR pattern; /* PCRE2_SPTR is a pointer to unsigned code units of */
|
||||
PCRE2_SPTR subject; /* the appropriate width (in this case, 8 bits). */
|
||||
PCRE2_SPTR name_table;
|
||||
|
||||
int crlf_is_newline;
|
||||
int errornumber;
|
||||
int find_all;
|
||||
int i;
|
||||
int rc;
|
||||
int utf8;
|
||||
|
||||
uint32_t option_bits;
|
||||
uint32_t namecount;
|
||||
uint32_t name_entry_size;
|
||||
uint32_t newline;
|
||||
|
||||
PCRE2_SIZE erroroffset;
|
||||
PCRE2_SIZE *ovector;
|
||||
PCRE2_SIZE subject_length;
|
||||
|
||||
pcre2_match_data *match_data;
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
* First, sort out the command line. There is only one possible option at *
|
||||
* the moment, "-g" to request repeated matching to find all occurrences, *
|
||||
* like Perl's /g option. We set the variable find_all to a non-zero value *
|
||||
* if the -g option is present. *
|
||||
**************************************************************************/
|
||||
|
||||
find_all = 0;
|
||||
for (i = 1; i < argc; i++)
|
||||
{
|
||||
if (strcmp(argv[i], "-g") == 0) find_all = 1;
|
||||
else if (argv[i][0] == '-')
|
||||
{
|
||||
printf("Unrecognised option %s\n", argv[i]);
|
||||
return 1;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
||||
/* After the options, we require exactly two arguments, which are the pattern,
|
||||
and the subject string. */
|
||||
|
||||
if (argc - i != 2)
|
||||
{
|
||||
printf("Exactly two arguments required: a regex and a subject string\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Pattern and subject are char arguments, so they can be straightforwardly
|
||||
cast to PCRE2_SPTR because we are working in 8-bit code units. The subject
|
||||
length is cast to PCRE2_SIZE for completeness, though PCRE2_SIZE is in fact
|
||||
defined to be size_t. */
|
||||
|
||||
pattern = (PCRE2_SPTR)argv[i];
|
||||
subject = (PCRE2_SPTR)argv[i+1];
|
||||
subject_length = (PCRE2_SIZE)strlen((char *)subject);
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* Now we are going to compile the regular expression pattern, and handle *
|
||||
* any errors that are detected. *
|
||||
*************************************************************************/
|
||||
|
||||
re = pcre2_compile(
|
||||
pattern, /* the pattern */
|
||||
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
|
||||
0, /* default options */
|
||||
&errornumber, /* for error number */
|
||||
&erroroffset, /* for error offset */
|
||||
NULL); /* use default compile context */
|
||||
|
||||
/* Compilation failed: print the error message and exit. */
|
||||
|
||||
if (re == NULL)
|
||||
{
|
||||
PCRE2_UCHAR buffer[256];
|
||||
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
|
||||
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
|
||||
buffer);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* If the compilation succeeded, we call PCRE2 again, in order to do a *
|
||||
* pattern match against the subject string. This does just ONE match. If *
|
||||
* further matching is needed, it will be done below. Before running the *
|
||||
* match we must set up a match_data block for holding the result. Using *
|
||||
* pcre2_match_data_create_from_pattern() ensures that the block is *
|
||||
* exactly the right size for the number of capturing parentheses in the *
|
||||
* pattern. If you need to know the actual size of a match_data block as *
|
||||
* a number of bytes, you can find it like this: *
|
||||
* *
|
||||
* PCRE2_SIZE match_data_size = pcre2_get_match_data_size(match_data); *
|
||||
*************************************************************************/
|
||||
|
||||
match_data = pcre2_match_data_create_from_pattern(re, NULL);
|
||||
|
||||
/* Now run the match. */
|
||||
|
||||
rc = pcre2_match(
|
||||
re, /* the compiled pattern */
|
||||
subject, /* the subject string */
|
||||
subject_length, /* the length of the subject */
|
||||
0, /* start at offset 0 in the subject */
|
||||
0, /* default options */
|
||||
match_data, /* block for storing the result */
|
||||
NULL); /* use default match context */
|
||||
|
||||
/* Matching failed: handle error cases */
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
switch(rc)
|
||||
{
|
||||
case PCRE2_ERROR_NOMATCH: printf("No match\n"); break;
|
||||
/*
|
||||
Handle other special cases if you like
|
||||
*/
|
||||
default: printf("Matching error %d\n", rc); break;
|
||||
}
|
||||
pcre2_match_data_free(match_data); /* Release memory used for the match */
|
||||
pcre2_code_free(re); /* data and the compiled pattern. */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Match succeeded. Get a pointer to the output vector, where string offsets
|
||||
are stored. */
|
||||
|
||||
ovector = pcre2_get_ovector_pointer(match_data);
|
||||
printf("Match succeeded at offset %d\n", (int)ovector[0]);
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* We have found the first match within the subject string. If the output *
|
||||
* vector wasn't big enough, say so. Then output any substrings that were *
|
||||
* captured. *
|
||||
*************************************************************************/
|
||||
|
||||
/* The output vector wasn't big enough. This should not happen, because we used
|
||||
pcre2_match_data_create_from_pattern() above. */
|
||||
|
||||
if (rc == 0)
|
||||
printf("ovector was not big enough for all the captured substrings\n");
|
||||
|
||||
/* Since release 10.38 PCRE2 has locked out the use of \K in lookaround
|
||||
assertions. However, there is an option to re-enable the old behaviour. If that
|
||||
is set, it is possible to run patterns such as /(?=.\K)/ that use \K in an
|
||||
assertion to set the start of a match later than its end. In this demonstration
|
||||
program, we show how to detect this case, but it shouldn't arise because the
|
||||
option is never set. */
|
||||
|
||||
if (ovector[0] > ovector[1])
|
||||
{
|
||||
printf("\\K was used in an assertion to set the match start after its end.\n"
|
||||
"From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]),
|
||||
(char *)(subject + ovector[1]));
|
||||
printf("Run abandoned\n");
|
||||
pcre2_match_data_free(match_data);
|
||||
pcre2_code_free(re);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Show substrings stored in the output vector by number. Obviously, in a real
|
||||
application you might want to do things other than print them. */
|
||||
|
||||
for (i = 0; i < rc; i++)
|
||||
{
|
||||
PCRE2_SPTR substring_start = subject + ovector[2*i];
|
||||
PCRE2_SIZE substring_length = ovector[2*i+1] - ovector[2*i];
|
||||
printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
|
||||
}
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
* That concludes the basic part of this demonstration program. We have *
|
||||
* compiled a pattern, and performed a single match. The code that follows *
|
||||
* shows first how to access named substrings, and then how to code for *
|
||||
* repeated matches on the same subject. *
|
||||
**************************************************************************/
|
||||
|
||||
/* See if there are any named substrings, and if so, show them by name. First
|
||||
we have to extract the count of named parentheses from the pattern. */
|
||||
|
||||
(void)pcre2_pattern_info(
|
||||
re, /* the compiled pattern */
|
||||
PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
|
||||
&namecount); /* where to put the answer */
|
||||
|
||||
if (namecount == 0) printf("No named substrings\n"); else
|
||||
{
|
||||
PCRE2_SPTR tabptr;
|
||||
printf("Named substrings\n");
|
||||
|
||||
/* Before we can access the substrings, we must extract the table for
|
||||
translating names to numbers, and the size of each entry in the table. */
|
||||
|
||||
(void)pcre2_pattern_info(
|
||||
re, /* the compiled pattern */
|
||||
PCRE2_INFO_NAMETABLE, /* address of the table */
|
||||
&name_table); /* where to put the answer */
|
||||
|
||||
(void)pcre2_pattern_info(
|
||||
re, /* the compiled pattern */
|
||||
PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
|
||||
&name_entry_size); /* where to put the answer */
|
||||
|
||||
/* Now we can scan the table and, for each entry, print the number, the name,
|
||||
and the substring itself. In the 8-bit library the number is held in two
|
||||
bytes, most significant first. */
|
||||
|
||||
tabptr = name_table;
|
||||
for (i = 0; i < namecount; i++)
|
||||
{
|
||||
int n = (tabptr[0] << 8) | tabptr[1];
|
||||
printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
|
||||
(int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
|
||||
tabptr += name_entry_size;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* If the "-g" option was given on the command line, we want to continue *
|
||||
* to search for additional matches in the subject string, in a similar *
|
||||
* way to the /g option in Perl. This turns out to be trickier than you *
|
||||
* might think because of the possibility of matching an empty string. *
|
||||
* What happens is as follows: *
|
||||
* *
|
||||
* If the previous match was NOT for an empty string, we can just start *
|
||||
* the next match at the end of the previous one. *
|
||||
* *
|
||||
* If the previous match WAS for an empty string, we can't do that, as it *
|
||||
* would lead to an infinite loop. Instead, a call of pcre2_match() is *
|
||||
* made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The *
|
||||
* first of these tells PCRE2 that an empty string at the start of the *
|
||||
* subject is not a valid match; other possibilities must be tried. The *
|
||||
* second flag restricts PCRE2 to one match attempt at the initial string *
|
||||
* position. If this match succeeds, an alternative to the empty string *
|
||||
* match has been found, and we can print it and proceed round the loop, *
|
||||
* advancing by the length of whatever was found. If this match does not *
|
||||
* succeed, we still stay in the loop, advancing by just one character. *
|
||||
* In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be *
|
||||
* more than one byte. *
|
||||
* *
|
||||
* However, there is a complication concerned with newlines. When the *
|
||||
* newline convention is such that CRLF is a valid newline, we must *
|
||||
* advance by two characters rather than one. The newline convention can *
|
||||
* be set in the regex by (*CR), etc.; if not, we must find the default. *
|
||||
*************************************************************************/
|
||||
|
||||
if (!find_all) /* Check for -g */
|
||||
{
|
||||
pcre2_match_data_free(match_data); /* Release the memory that was used */
|
||||
pcre2_code_free(re); /* for the match data and the pattern. */
|
||||
return 0; /* Exit the program. */
|
||||
}
|
||||
|
||||
/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
|
||||
sequence. First, find the options with which the regex was compiled and extract
|
||||
the UTF state. */
|
||||
|
||||
(void)pcre2_pattern_info(re, PCRE2_INFO_ALLOPTIONS, &option_bits);
|
||||
utf8 = (option_bits & PCRE2_UTF) != 0;
|
||||
|
||||
/* Now find the newline convention and see whether CRLF is a valid newline
|
||||
sequence. */
|
||||
|
||||
(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
|
||||
crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
|
||||
newline == PCRE2_NEWLINE_CRLF ||
|
||||
newline == PCRE2_NEWLINE_ANYCRLF;
|
||||
|
||||
/* Loop for second and subsequent matches */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
uint32_t options = 0; /* Normally no options */
|
||||
PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */
|
||||
|
||||
/* If the previous match was for an empty string, we are finished if we are
|
||||
at the end of the subject. Otherwise, arrange to run another match at the
|
||||
same point to see if a non-empty match can be found. */
|
||||
|
||||
if (ovector[0] == ovector[1])
|
||||
{
|
||||
if (ovector[0] == subject_length) break;
|
||||
options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
|
||||
}
|
||||
|
||||
/* If the previous match was not an empty string, there is one tricky case to
|
||||
consider. If a pattern contains \K within a lookbehind assertion at the
|
||||
start, the end of the matched string can be at the offset where the match
|
||||
started. Without special action, this leads to a loop that keeps on matching
|
||||
the same substring. We must detect this case and arrange to move the start on
|
||||
by one character. The pcre2_get_startchar() function returns the starting
|
||||
offset that was passed to pcre2_match(). */
|
||||
|
||||
else
|
||||
{
|
||||
PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
|
||||
if (start_offset <= startchar)
|
||||
{
|
||||
if (startchar >= subject_length) break; /* Reached end of subject. */
|
||||
start_offset = startchar + 1; /* Advance by one character. */
|
||||
if (utf8) /* If UTF-8, it may be more */
|
||||
{ /* than one code unit. */
|
||||
for (; start_offset < subject_length; start_offset++)
|
||||
if ((subject[start_offset] & 0xc0) != 0x80) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Run the next matching operation */
|
||||
|
||||
rc = pcre2_match(
|
||||
re, /* the compiled pattern */
|
||||
subject, /* the subject string */
|
||||
subject_length, /* the length of the subject */
|
||||
start_offset, /* starting offset in the subject */
|
||||
options, /* options */
|
||||
match_data, /* block for storing the result */
|
||||
NULL); /* use default match context */
|
||||
|
||||
/* This time, a result of NOMATCH isn't an error. If the value in "options"
|
||||
is zero, it just means we have found all possible matches, so the loop ends.
|
||||
Otherwise, it means we have failed to find a non-empty-string match at a
|
||||
point where there was a previous empty-string match. In this case, we do what
|
||||
Perl does: advance the matching position by one character, and continue. We
|
||||
do this by setting the "end of previous match" offset, because that is picked
|
||||
up at the top of the loop as the point at which to start again.
|
||||
|
||||
There are two complications: (a) When CRLF is a valid newline sequence, and
|
||||
the current position is just before it, advance by an extra byte. (b)
|
||||
Otherwise we must ensure that we skip an entire UTF character if we are in
|
||||
UTF mode. */
|
||||
|
||||
if (rc == PCRE2_ERROR_NOMATCH)
|
||||
{
|
||||
if (options == 0) break; /* All matches found */
|
||||
ovector[1] = start_offset + 1; /* Advance one code unit */
|
||||
if (crlf_is_newline && /* If CRLF is a newline & */
|
||||
start_offset < subject_length - 1 && /* we are at CRLF, */
|
||||
subject[start_offset] == '\r' &&
|
||||
subject[start_offset + 1] == '\n')
|
||||
ovector[1] += 1; /* Advance by one more. */
|
||||
else if (utf8) /* Otherwise, ensure we */
|
||||
{ /* advance a whole UTF-8 */
|
||||
while (ovector[1] < subject_length) /* character. */
|
||||
{
|
||||
if ((subject[ovector[1]] & 0xc0) != 0x80) break;
|
||||
ovector[1] += 1;
|
||||
}
|
||||
}
|
||||
continue; /* Go round the loop again */
|
||||
}
|
||||
|
||||
/* Other matching errors are not recoverable. */
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
printf("Matching error %d\n", rc);
|
||||
pcre2_match_data_free(match_data);
|
||||
pcre2_code_free(re);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Match succeeded */
|
||||
|
||||
printf("\nMatch succeeded again at offset %d\n", (int)ovector[0]);
|
||||
|
||||
/* The match succeeded, but the output vector wasn't big enough. This
|
||||
should not happen. */
|
||||
|
||||
if (rc == 0)
|
||||
printf("ovector was not big enough for all the captured substrings\n");
|
||||
|
||||
/* We must guard against patterns such as /(?=.\K)/ that use \K in an
|
||||
assertion to set the start of a match later than its end. In this
|
||||
demonstration program, we just detect this case and give up. */
|
||||
|
||||
if (ovector[0] > ovector[1])
|
||||
{
|
||||
printf("\\K was used in an assertion to set the match start after its end.\n"
|
||||
"From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]),
|
||||
(char *)(subject + ovector[1]));
|
||||
printf("Run abandoned\n");
|
||||
pcre2_match_data_free(match_data);
|
||||
pcre2_code_free(re);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* As before, show substrings stored in the output vector by number, and then
|
||||
also any named substrings. */
|
||||
|
||||
for (i = 0; i < rc; i++)
|
||||
{
|
||||
PCRE2_SPTR substring_start = subject + ovector[2*i];
|
||||
size_t substring_length = ovector[2*i+1] - ovector[2*i];
|
||||
printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
|
||||
}
|
||||
|
||||
if (namecount == 0) printf("No named substrings\n"); else
|
||||
{
|
||||
PCRE2_SPTR tabptr = name_table;
|
||||
printf("Named substrings\n");
|
||||
for (i = 0; i < namecount; i++)
|
||||
{
|
||||
int n = (tabptr[0] << 8) | tabptr[1];
|
||||
printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
|
||||
(int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
|
||||
tabptr += name_entry_size;
|
||||
}
|
||||
}
|
||||
} /* End of loop to find second and subsequent matches */
|
||||
|
||||
printf("\n");
|
||||
pcre2_match_data_free(match_data);
|
||||
pcre2_code_free(re);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre2demo.c */
|
||||
@@ -1,4691 +0,0 @@
|
||||
/*************************************************
|
||||
* pcre2grep program *
|
||||
*************************************************/
|
||||
|
||||
/* This is a grep program that uses the 8-bit PCRE regular expression library
|
||||
via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
|
||||
and native z/OS systems it can recurse into directories, and in z/OS it can
|
||||
handle PDS files.
|
||||
|
||||
Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
|
||||
additional header is required. That header is not included in the main PCRE2
|
||||
distribution because other apparatus is needed to compile pcre2grep for z/OS.
|
||||
The header can be found in the special z/OS distribution, which is available
|
||||
from www.zaconsultants.net or from www.cbttape.org.
|
||||
|
||||
Copyright (c) 1997-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
#include <locale.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) \
|
||||
&& !defined WIN32 && !defined(__CYGWIN__)
|
||||
#define WIN32
|
||||
#endif
|
||||
|
||||
/* Some CMake's define it still */
|
||||
#if defined(__CYGWIN__) && defined(WIN32)
|
||||
#undef WIN32
|
||||
#endif
|
||||
|
||||
#ifdef __VMS
|
||||
#include clidef
|
||||
#include descrip
|
||||
#include lib$routines
|
||||
#endif
|
||||
|
||||
#ifdef WIN32
|
||||
#include <io.h> /* For _setmode() */
|
||||
#include <fcntl.h> /* For _O_BINARY */
|
||||
#endif
|
||||
|
||||
#if defined(SUPPORT_PCRE2GREP_CALLOUT) && defined(SUPPORT_PCRE2GREP_CALLOUT_FORK)
|
||||
#ifdef WIN32
|
||||
#include <process.h>
|
||||
#else
|
||||
#include <sys/wait.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_VALGRIND
|
||||
#include <valgrind/memcheck.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_LIBZ
|
||||
#include <zlib.h>
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_LIBBZ2
|
||||
#include <bzlib.h>
|
||||
#endif
|
||||
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
#include "pcre2.h"
|
||||
|
||||
/* Older versions of MSVC lack snprintf(). This define allows for
|
||||
warning/error-free compilation and testing with MSVC compilers back to at least
|
||||
MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
|
||||
/* old VC and older compilers don't support %td or %zu, and even some that claim to
|
||||
be C99 don't support it (hence DISABLE_PERCENT_ZT). */
|
||||
|
||||
#if defined(DISABLE_PERCENT_ZT) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \
|
||||
(!defined(_MSC_VER) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L))
|
||||
#ifdef _WIN64
|
||||
#define SIZ_FORM "llu"
|
||||
#else
|
||||
#define SIZ_FORM "lu"
|
||||
#endif
|
||||
#else
|
||||
#define SIZ_FORM "zu"
|
||||
#endif
|
||||
|
||||
#define FALSE 0
|
||||
#define TRUE 1
|
||||
|
||||
typedef int BOOL;
|
||||
|
||||
#define DEFAULT_CAPTURE_MAX 50
|
||||
|
||||
#if BUFSIZ > 8192
|
||||
#define MAXPATLEN BUFSIZ
|
||||
#else
|
||||
#define MAXPATLEN 8192
|
||||
#endif
|
||||
|
||||
#define FNBUFSIZ 2048
|
||||
#define ERRBUFSIZ 256
|
||||
|
||||
/* Values for the "filenames" variable, which specifies options for file name
|
||||
output. The order is important; it is assumed that a file name is wanted for
|
||||
all values greater than FN_DEFAULT. */
|
||||
|
||||
enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
|
||||
|
||||
/* File reading styles */
|
||||
|
||||
enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
|
||||
|
||||
/* Actions for the -d and -D options */
|
||||
|
||||
enum { dee_READ, dee_SKIP, dee_RECURSE };
|
||||
enum { DEE_READ, DEE_SKIP };
|
||||
|
||||
/* Actions for special processing options (flag bits) */
|
||||
|
||||
#define PO_WORD_MATCH 0x0001
|
||||
#define PO_LINE_MATCH 0x0002
|
||||
#define PO_FIXED_STRINGS 0x0004
|
||||
|
||||
/* Binary file options */
|
||||
|
||||
enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
|
||||
|
||||
/* Return values from decode_dollar_escape() */
|
||||
|
||||
enum { DDE_ERROR, DDE_CAPTURE, DDE_CHAR };
|
||||
|
||||
/* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
|
||||
environments), a warning is issued if the value of fwrite() is ignored.
|
||||
Unfortunately, casting to (void) does not suppress the warning. To get round
|
||||
this, we use a macro that compiles a fudge. Oddly, this does not also seem to
|
||||
apply to fprintf(). */
|
||||
|
||||
#define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {}
|
||||
|
||||
/* Under Windows, we have to set stdout to be binary, so that it does not
|
||||
convert \r\n at the ends of output lines to \r\r\n. However, that means that
|
||||
any messages written to stdout must have \r\n as their line terminator. This is
|
||||
handled by using STDOUT_NL as the newline string. We also use a normal double
|
||||
quote for the example, as single quotes aren't usually available. */
|
||||
|
||||
#ifdef WIN32
|
||||
#define STDOUT_NL "\r\n"
|
||||
#define STDOUT_NL_LEN 2
|
||||
#define QUOT "\""
|
||||
#else
|
||||
#define STDOUT_NL "\n"
|
||||
#define STDOUT_NL_LEN 1
|
||||
#define QUOT "'"
|
||||
#endif
|
||||
|
||||
/* This code is returned from decode_dollar_escape() when $n is encountered,
|
||||
and used to mean "output STDOUT_NL". It is, of course, not a valid Unicode code
|
||||
point. */
|
||||
|
||||
#define STDOUT_NL_CODE 0x7fffffffu
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Global variables *
|
||||
*************************************************/
|
||||
|
||||
static const char *colour_string = "1;31";
|
||||
static const char *colour_option = NULL;
|
||||
static const char *dee_option = NULL;
|
||||
static const char *DEE_option = NULL;
|
||||
static const char *locale = NULL;
|
||||
static const char *newline_arg = NULL;
|
||||
static const char *group_separator = "--";
|
||||
static const char *om_separator = NULL;
|
||||
static const char *stdin_name = "(standard input)";
|
||||
static const char *output_text = NULL;
|
||||
|
||||
static char *main_buffer = NULL;
|
||||
|
||||
static const char *printname_nl = STDOUT_NL; /* Changed to NULL for -Z */
|
||||
static int printname_colon = ':'; /* Changed to 0 for -Z */
|
||||
static int printname_hyphen = '-'; /* Changed to 0 for -Z */
|
||||
|
||||
static int after_context = 0;
|
||||
static int before_context = 0;
|
||||
static int binary_files = BIN_BINARY;
|
||||
static int both_context = 0;
|
||||
static int endlinetype;
|
||||
|
||||
static int count_limit = -1; /* Not long, so that it works with OP_NUMBER */
|
||||
static unsigned long int counts_printed = 0;
|
||||
static unsigned long int total_count = 0;
|
||||
|
||||
static PCRE2_SIZE bufthird = PCRE2GREP_BUFSIZE;
|
||||
static PCRE2_SIZE max_bufthird = PCRE2GREP_MAX_BUFSIZE;
|
||||
static PCRE2_SIZE bufsize = 3*PCRE2GREP_BUFSIZE;
|
||||
|
||||
#ifdef WIN32
|
||||
static int dee_action = dee_SKIP;
|
||||
#else
|
||||
static int dee_action = dee_READ;
|
||||
#endif
|
||||
|
||||
static int DEE_action = DEE_READ;
|
||||
static int error_count = 0;
|
||||
static int filenames = FN_DEFAULT;
|
||||
|
||||
#ifdef SUPPORT_PCRE2GREP_JIT
|
||||
static BOOL use_jit = TRUE;
|
||||
#else
|
||||
static BOOL use_jit = FALSE;
|
||||
#endif
|
||||
|
||||
static const uint8_t *character_tables = NULL;
|
||||
|
||||
static uint32_t pcre2_options = 0;
|
||||
static uint32_t extra_options = 0;
|
||||
static PCRE2_SIZE heap_limit = PCRE2_UNSET;
|
||||
static uint32_t match_limit = 0;
|
||||
static uint32_t depth_limit = 0;
|
||||
|
||||
static pcre2_compile_context *compile_context;
|
||||
static pcre2_match_context *match_context;
|
||||
static pcre2_match_data *match_data, *match_data_pair[2];
|
||||
static PCRE2_SIZE *offsets, *offsets_pair[2];
|
||||
static int match_data_toggle;
|
||||
static uint32_t offset_size;
|
||||
static uint32_t capture_max = DEFAULT_CAPTURE_MAX;
|
||||
|
||||
static BOOL all_matches = FALSE;
|
||||
static BOOL case_restrict = FALSE;
|
||||
static BOOL count_only = FALSE;
|
||||
static BOOL do_colour = FALSE;
|
||||
#ifdef WIN32
|
||||
static BOOL do_ansi = FALSE;
|
||||
#endif
|
||||
static BOOL file_offsets = FALSE;
|
||||
static BOOL hyphenpending = FALSE;
|
||||
static BOOL invert = FALSE;
|
||||
static BOOL line_buffered = FALSE;
|
||||
static BOOL line_offsets = FALSE;
|
||||
static BOOL multiline = FALSE;
|
||||
static BOOL no_ucp = FALSE;
|
||||
static BOOL number = FALSE;
|
||||
static BOOL omit_zero_count = FALSE;
|
||||
static BOOL resource_error = FALSE;
|
||||
static BOOL quiet = FALSE;
|
||||
static BOOL show_total_count = FALSE;
|
||||
static BOOL silent = FALSE;
|
||||
static BOOL utf = FALSE;
|
||||
static BOOL posix_digit = FALSE;
|
||||
static BOOL posix_pattern_file = FALSE;
|
||||
|
||||
static uint8_t utf8_buffer[8];
|
||||
|
||||
|
||||
/* Structure for list of --only-matching capturing numbers. */
|
||||
|
||||
typedef struct omstr {
|
||||
struct omstr *next;
|
||||
int groupnum;
|
||||
} omstr;
|
||||
|
||||
static omstr *only_matching = NULL;
|
||||
static omstr *only_matching_last = NULL;
|
||||
static int only_matching_count;
|
||||
|
||||
/* Structure for holding the two variables that describe a number chain. */
|
||||
|
||||
typedef struct omdatastr {
|
||||
omstr **anchor;
|
||||
omstr **lastptr;
|
||||
} omdatastr;
|
||||
|
||||
static omdatastr only_matching_data = { &only_matching, &only_matching_last };
|
||||
|
||||
/* Structure for list of file names (for -f and --{in,ex}clude-from) */
|
||||
|
||||
typedef struct fnstr {
|
||||
struct fnstr *next;
|
||||
char *name;
|
||||
} fnstr;
|
||||
|
||||
static fnstr *exclude_from = NULL;
|
||||
static fnstr *exclude_from_last = NULL;
|
||||
static fnstr *include_from = NULL;
|
||||
static fnstr *include_from_last = NULL;
|
||||
|
||||
static fnstr *file_lists = NULL;
|
||||
static fnstr *file_lists_last = NULL;
|
||||
static fnstr *pattern_files = NULL;
|
||||
static fnstr *pattern_files_last = NULL;
|
||||
|
||||
/* Structure for holding the two variables that describe a file name chain. */
|
||||
|
||||
typedef struct fndatastr {
|
||||
fnstr **anchor;
|
||||
fnstr **lastptr;
|
||||
} fndatastr;
|
||||
|
||||
static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
|
||||
static fndatastr include_from_data = { &include_from, &include_from_last };
|
||||
static fndatastr file_lists_data = { &file_lists, &file_lists_last };
|
||||
static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
|
||||
|
||||
/* Structure for pattern and its compiled form; used for matching patterns and
|
||||
also for include/exclude patterns. */
|
||||
|
||||
typedef struct patstr {
|
||||
struct patstr *next;
|
||||
char *string;
|
||||
PCRE2_SIZE length;
|
||||
pcre2_code *compiled;
|
||||
} patstr;
|
||||
|
||||
static patstr *patterns = NULL;
|
||||
static patstr *patterns_last = NULL;
|
||||
static patstr *include_patterns = NULL;
|
||||
static patstr *include_patterns_last = NULL;
|
||||
static patstr *exclude_patterns = NULL;
|
||||
static patstr *exclude_patterns_last = NULL;
|
||||
static patstr *include_dir_patterns = NULL;
|
||||
static patstr *include_dir_patterns_last = NULL;
|
||||
static patstr *exclude_dir_patterns = NULL;
|
||||
static patstr *exclude_dir_patterns_last = NULL;
|
||||
|
||||
/* Structure holding the two variables that describe a pattern chain. A pointer
|
||||
to such structures is used for each appropriate option. */
|
||||
|
||||
typedef struct patdatastr {
|
||||
patstr **anchor;
|
||||
patstr **lastptr;
|
||||
} patdatastr;
|
||||
|
||||
static patdatastr match_patdata = { &patterns, &patterns_last };
|
||||
static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
|
||||
static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
|
||||
static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
|
||||
static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
|
||||
|
||||
static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
|
||||
&include_dir_patterns, &exclude_dir_patterns };
|
||||
|
||||
static const char *incexname[4] = { "--include", "--exclude",
|
||||
"--include-dir", "--exclude-dir" };
|
||||
|
||||
/* Structure for options and list of them */
|
||||
|
||||
enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
|
||||
OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
|
||||
|
||||
typedef struct option_item {
|
||||
int type;
|
||||
int one_char;
|
||||
void *dataptr;
|
||||
const char *long_name;
|
||||
const char *help_text;
|
||||
} option_item;
|
||||
|
||||
/* Options without a single-letter equivalent get a negative value. This can be
|
||||
used to identify them. */
|
||||
|
||||
#define N_COLOUR (-1)
|
||||
#define N_EXCLUDE (-2)
|
||||
#define N_EXCLUDE_DIR (-3)
|
||||
#define N_HELP (-4)
|
||||
#define N_INCLUDE (-5)
|
||||
#define N_INCLUDE_DIR (-6)
|
||||
#define N_LABEL (-7)
|
||||
#define N_LOCALE (-8)
|
||||
#define N_NULL (-9)
|
||||
#define N_LOFFSETS (-10)
|
||||
#define N_FOFFSETS (-11)
|
||||
#define N_LBUFFER (-12)
|
||||
#define N_H_LIMIT (-13)
|
||||
#define N_M_LIMIT (-14)
|
||||
#define N_M_LIMIT_DEP (-15)
|
||||
#define N_BUFSIZE (-16)
|
||||
#define N_NOJIT (-17)
|
||||
#define N_FILE_LIST (-18)
|
||||
#define N_BINARY_FILES (-19)
|
||||
#define N_EXCLUDE_FROM (-20)
|
||||
#define N_INCLUDE_FROM (-21)
|
||||
#define N_OM_SEPARATOR (-22)
|
||||
#define N_MAX_BUFSIZE (-23)
|
||||
#define N_OM_CAPTURE (-24)
|
||||
#define N_ALLABSK (-25)
|
||||
#define N_POSIX_DIGIT (-26)
|
||||
#define N_GROUP_SEPARATOR (-27)
|
||||
#define N_NO_GROUP_SEPARATOR (-28)
|
||||
#define N_POSIX_PATFILE (-29)
|
||||
|
||||
static option_item optionlist[] = {
|
||||
{ OP_NODATA, N_NULL, NULL, "", "terminate options" },
|
||||
{ OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
|
||||
{ OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
|
||||
{ OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
|
||||
{ OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
|
||||
{ OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
|
||||
{ OP_SIZE, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer starting size" },
|
||||
{ OP_SIZE, N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number", "set processing buffer maximum size" },
|
||||
{ OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
|
||||
{ OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
|
||||
{ OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
|
||||
{ OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
|
||||
{ OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
|
||||
{ OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
|
||||
{ OP_NODATA, N_POSIX_DIGIT, NULL, "posix-digit", "\\d always matches [0-9], even in UTF/UCP mode" },
|
||||
{ OP_NODATA, 'E', NULL, "case-restrict", "restrict case matching (no mix ASCII/non-ASCII)" },
|
||||
{ OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
|
||||
{ OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
|
||||
{ OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
|
||||
{ OP_NODATA, N_POSIX_PATFILE, NULL, "posix-pattern-file", "use POSIX semantics for pattern files" },
|
||||
{ OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
|
||||
{ OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
|
||||
{ OP_STRING, N_GROUP_SEPARATOR, &group_separator, "group-separator=text", "set separator between groups of lines" },
|
||||
{ OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
|
||||
{ OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
|
||||
{ OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
|
||||
{ OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
|
||||
{ OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
|
||||
{ OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
|
||||
{ OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
|
||||
{ OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
|
||||
{ OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
|
||||
{ OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
|
||||
{ OP_SIZE, N_H_LIMIT, &heap_limit, "heap-limit=number", "set PCRE2 heap limit option (kibibytes)" },
|
||||
{ OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" },
|
||||
{ OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
|
||||
{ OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
|
||||
{ OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
|
||||
{ OP_NUMBER, 'm', &count_limit, "max-count=number", "stop after <number> matched lines" },
|
||||
{ OP_STRING, 'N', &newline_arg, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
|
||||
{ OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
|
||||
#ifdef SUPPORT_PCRE2GREP_JIT
|
||||
{ OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
|
||||
#else
|
||||
{ OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" },
|
||||
#endif
|
||||
{ OP_NODATA, N_NO_GROUP_SEPARATOR, NULL, "no-group-separator", "suppress separators between groups of lines" },
|
||||
{ OP_STRING, 'O', &output_text, "output=text", "show only this text (possibly expanded)" },
|
||||
{ OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
|
||||
{ OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
|
||||
{ OP_U32NUMBER, N_OM_CAPTURE, &capture_max, "om-capture=n", "set capture count for --only-matching" },
|
||||
{ OP_NODATA, 'P', NULL, "no-ucp", "do not enable UCP mode with Unicode" },
|
||||
{ OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
|
||||
{ OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
|
||||
{ OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
|
||||
{ OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
|
||||
{ OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
|
||||
{ OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
|
||||
{ OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
|
||||
{ OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
|
||||
{ OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
|
||||
{ OP_NODATA, 't', NULL, "total-count", "print total count of matching lines" },
|
||||
{ OP_NODATA, 'u', NULL, "utf", "use UTF/Unicode" },
|
||||
{ OP_NODATA, 'U', NULL, "utf-allow-invalid", "use UTF/Unicode, allow for invalid code units" },
|
||||
{ OP_NODATA, 'V', NULL, "version", "print version information and exit" },
|
||||
{ OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
|
||||
{ OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
|
||||
{ OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
|
||||
{ OP_NODATA, N_ALLABSK, NULL, "allow-lookaround-bsk", "allow \\K in lookarounds" },
|
||||
{ OP_NODATA, 'Z', NULL, "null", "output 0 byte after file names" },
|
||||
{ OP_NODATA, 0, NULL, NULL, NULL }
|
||||
};
|
||||
|
||||
/* Table of names for newline types. Must be kept in step with the definitions
|
||||
of PCRE2_NEWLINE_xx in pcre2.h. */
|
||||
|
||||
static const char *newlines[] = {
|
||||
"DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
|
||||
|
||||
/* UTF-8 tables */
|
||||
|
||||
const int utf8_table1[] =
|
||||
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
||||
const int utf8_table1_size = sizeof(utf8_table1) / sizeof(int);
|
||||
|
||||
const int utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
|
||||
const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
|
||||
|
||||
const char utf8_table4[] = {
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
||||
|
||||
|
||||
#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
|
||||
/*************************************************
|
||||
* Emulated memmove() for systems without it *
|
||||
*************************************************/
|
||||
|
||||
/* This function can make use of bcopy() if it is available. Otherwise do it by
|
||||
steam, as there are some non-Unix environments that lack both memmove() and
|
||||
bcopy(). */
|
||||
|
||||
static void *
|
||||
emulated_memmove(void *d, const void *s, size_t n)
|
||||
{
|
||||
#ifdef HAVE_BCOPY
|
||||
bcopy(s, d, n);
|
||||
return d;
|
||||
#else
|
||||
size_t i;
|
||||
unsigned char *dest = (unsigned char *)d;
|
||||
const unsigned char *src = (const unsigned char *)s;
|
||||
if (dest > src)
|
||||
{
|
||||
dest += n;
|
||||
src += n;
|
||||
for (i = 0; i < n; ++i) *(--dest) = *(--src);
|
||||
return (void *)dest;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < n; ++i) *dest++ = *src++;
|
||||
return (void *)(dest - n);
|
||||
}
|
||||
#endif /* not HAVE_BCOPY */
|
||||
}
|
||||
#undef memmove
|
||||
#define memmove(d,s,n) emulated_memmove(d,s,n)
|
||||
#endif /* not VPCOMPAT && not HAVE_MEMMOVE */
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Convert code point to UTF-8 *
|
||||
*************************************************/
|
||||
|
||||
/* A static buffer is used. Returns the number of bytes. */
|
||||
|
||||
static int
|
||||
ord2utf8(uint32_t value)
|
||||
{
|
||||
int i, j;
|
||||
uint8_t *utf8bytes = utf8_buffer;
|
||||
for (i = 0; i < utf8_table1_size; i++)
|
||||
if (value <= (uint32_t)utf8_table1[i]) break;
|
||||
utf8bytes += i;
|
||||
for (j = i; j > 0; j--)
|
||||
{
|
||||
*utf8bytes-- = 0x80 | (value & 0x3f);
|
||||
value >>= 6;
|
||||
}
|
||||
*utf8bytes = utf8_table2[i] | value;
|
||||
return i + 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Case-independent string compare *
|
||||
*************************************************/
|
||||
|
||||
static int
|
||||
strcmpic(const char *str1, const char *str2)
|
||||
{
|
||||
unsigned int c1, c2;
|
||||
while (*str1 != '\0' || *str2 != '\0')
|
||||
{
|
||||
c1 = tolower(*str1++);
|
||||
c2 = tolower(*str2++);
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Parse GREP_COLORS *
|
||||
*************************************************/
|
||||
|
||||
/* Extract ms or mt from GREP_COLORS.
|
||||
|
||||
Argument: the string, possibly NULL
|
||||
Returns: the value of ms or mt, or NULL if neither present
|
||||
*/
|
||||
|
||||
static char *
|
||||
parse_grep_colors(const char *gc)
|
||||
{
|
||||
static char seq[16];
|
||||
char *col;
|
||||
uint32_t len;
|
||||
if (gc == NULL) return NULL;
|
||||
col = strstr(gc, "ms=");
|
||||
if (col == NULL) col = strstr(gc, "mt=");
|
||||
if (col == NULL) return NULL;
|
||||
len = 0;
|
||||
col += 3;
|
||||
while (*col != ':' && *col != 0 && len < sizeof(seq)-1)
|
||||
seq[len++] = *col++;
|
||||
seq[len] = 0;
|
||||
return seq;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Exit from the program *
|
||||
*************************************************/
|
||||
|
||||
/* If there has been a resource error, give a suitable message.
|
||||
|
||||
Argument: the return code
|
||||
Returns: does not return
|
||||
*/
|
||||
|
||||
static void
|
||||
pcre2grep_exit(int rc)
|
||||
{
|
||||
/* VMS does exit codes differently: both exit(1) and exit(0) return with a
|
||||
status of 1, which is not helpful. To help with this problem, define a symbol
|
||||
(akin to an environment variable) called "PCRE2GREP_RC" and put the exit code
|
||||
therein. */
|
||||
|
||||
#ifdef __VMS
|
||||
char val_buf[4];
|
||||
$DESCRIPTOR(sym_nam, "PCRE2GREP_RC");
|
||||
$DESCRIPTOR(sym_val, val_buf);
|
||||
sprintf(val_buf, "%d", rc);
|
||||
sym_val.dsc$w_length = strlen(val_buf);
|
||||
lib$set_symbol(&sym_nam, &sym_val);
|
||||
#endif
|
||||
|
||||
if (resource_error)
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
|
||||
"limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
|
||||
PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
|
||||
fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
|
||||
}
|
||||
exit(rc);
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Add item to chain of patterns *
|
||||
*************************************************/
|
||||
|
||||
/* Used to add an item onto a chain, or just return an unconnected item if the
|
||||
"after" argument is NULL.
|
||||
|
||||
Arguments:
|
||||
s pattern string to add
|
||||
patlen length of pattern
|
||||
after if not NULL points to item to insert after
|
||||
|
||||
Returns: new pattern block or NULL on error
|
||||
*/
|
||||
|
||||
static patstr *
|
||||
add_pattern(char *s, PCRE2_SIZE patlen, patstr *after)
|
||||
{
|
||||
patstr *p = (patstr *)malloc(sizeof(patstr));
|
||||
|
||||
/* LCOV_EXCL_START - These won't be hit in normal testing. */
|
||||
|
||||
if (p == NULL)
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: malloc failed\n");
|
||||
pcre2grep_exit(2);
|
||||
}
|
||||
if (patlen > MAXPATLEN)
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
|
||||
MAXPATLEN);
|
||||
free(p);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* LCOV_EXCL_STOP */
|
||||
|
||||
p->next = NULL;
|
||||
p->string = s;
|
||||
p->length = patlen;
|
||||
p->compiled = NULL;
|
||||
|
||||
if (after != NULL)
|
||||
{
|
||||
p->next = after->next;
|
||||
after->next = p;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free chain of patterns *
|
||||
*************************************************/
|
||||
|
||||
/* Used for several chains of patterns.
|
||||
|
||||
Argument: pointer to start of chain
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
free_pattern_chain(patstr *pc)
|
||||
{
|
||||
while (pc != NULL)
|
||||
{
|
||||
patstr *p = pc;
|
||||
pc = p->next;
|
||||
if (p->compiled != NULL) pcre2_code_free(p->compiled);
|
||||
free(p);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free chain of file names *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Argument: pointer to start of chain
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
free_file_chain(fnstr *fn)
|
||||
{
|
||||
while (fn != NULL)
|
||||
{
|
||||
fnstr *f = fn;
|
||||
fn = f->next;
|
||||
free(f);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* OS-specific functions *
|
||||
*************************************************/
|
||||
|
||||
/* These definitions are needed in all Windows environments, even those where
|
||||
Unix-style directory scanning can be used (see below). */
|
||||
|
||||
#ifdef WIN32
|
||||
|
||||
#ifndef STRICT
|
||||
# define STRICT
|
||||
#endif
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
# define WIN32_LEAN_AND_MEAN
|
||||
#endif
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
#define iswild(name) (strpbrk(name, "*?") != NULL)
|
||||
|
||||
/* Convert ANSI BGR format to RGB used by Windows */
|
||||
#define BGR_RGB(x) (((x) & 1 ? 4 : 0) | ((x) & 2) | ((x) & 4 ? 1 : 0))
|
||||
|
||||
static HANDLE hstdout;
|
||||
static CONSOLE_SCREEN_BUFFER_INFO csbi;
|
||||
static WORD match_colour;
|
||||
|
||||
static WORD
|
||||
decode_ANSI_colour(const char *cs)
|
||||
{
|
||||
WORD result = csbi.wAttributes;
|
||||
while (*cs)
|
||||
{
|
||||
if (isdigit((unsigned char)(*cs)))
|
||||
{
|
||||
int code = atoi(cs);
|
||||
if (code == 1) result |= 0x08;
|
||||
else if (code == 4) result |= 0x8000;
|
||||
else if (code == 5) result |= 0x80;
|
||||
else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30);
|
||||
else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F);
|
||||
else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4);
|
||||
else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0);
|
||||
/* aixterm high intensity colour codes */
|
||||
else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08;
|
||||
else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80;
|
||||
|
||||
while (isdigit((unsigned char)(*cs))) cs++;
|
||||
}
|
||||
if (*cs) cs++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
init_colour_output()
|
||||
{
|
||||
if (do_colour)
|
||||
{
|
||||
hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
|
||||
/* This fails when redirected to con; try again if so. */
|
||||
if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi)
|
||||
{
|
||||
HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE,
|
||||
FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
|
||||
GetConsoleScreenBufferInfo(hcon, &csbi);
|
||||
CloseHandle(hcon);
|
||||
}
|
||||
match_colour = decode_ANSI_colour(colour_string);
|
||||
/* No valid colour found - turn off colouring */
|
||||
if (!match_colour) do_colour = FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* WIN32 */
|
||||
|
||||
|
||||
/* The following sets of functions are defined so that they can be made system
|
||||
specific. At present there are versions for Unix-style environments, Windows,
|
||||
native z/OS, and "no support". */
|
||||
|
||||
|
||||
/************* Directory scanning Unix-style and z/OS ***********/
|
||||
|
||||
#if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <dirent.h>
|
||||
|
||||
#if defined NATIVE_ZOS
|
||||
/************* Directory and PDS/E scanning for z/OS ***********/
|
||||
/************* z/OS looks mostly like Unix with USS ************/
|
||||
/* However, z/OS needs the #include statements in this header */
|
||||
#include "pcrzosfs.h"
|
||||
/* That header is not included in the main PCRE distribution because
|
||||
other apparatus is needed to compile pcre2grep for z/OS. The header
|
||||
can be found in the special z/OS distribution, which is available
|
||||
from www.zaconsultants.net or from www.cbttape.org. */
|
||||
#endif
|
||||
|
||||
typedef DIR directory_type;
|
||||
#define FILESEP '/'
|
||||
|
||||
static int
|
||||
isdirectory(char *filename)
|
||||
{
|
||||
struct stat statbuf;
|
||||
if (stat(filename, &statbuf) < 0)
|
||||
return 0; /* In the expectation that opening as a file will fail */
|
||||
return S_ISDIR(statbuf.st_mode);
|
||||
}
|
||||
|
||||
static directory_type *
|
||||
opendirectory(char *filename)
|
||||
{
|
||||
return opendir(filename);
|
||||
}
|
||||
|
||||
static char *
|
||||
readdirectory(directory_type *dir)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
struct dirent *dent = readdir(dir);
|
||||
if (dent == NULL) break;
|
||||
if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
|
||||
return dent->d_name;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
closedirectory(directory_type *dir)
|
||||
{
|
||||
closedir(dir);
|
||||
}
|
||||
|
||||
|
||||
/************* Test for regular file, Unix-style **********/
|
||||
|
||||
static int
|
||||
isregfile(char *filename)
|
||||
{
|
||||
struct stat statbuf;
|
||||
if (stat(filename, &statbuf) < 0)
|
||||
return 1; /* In the expectation that opening as a file will fail */
|
||||
return S_ISREG(statbuf.st_mode);
|
||||
}
|
||||
|
||||
|
||||
#if defined NATIVE_ZOS
|
||||
/************* Test for a terminal in z/OS **********/
|
||||
/* isatty() does not work in a TSO environment, so always give FALSE.*/
|
||||
|
||||
static BOOL
|
||||
is_stdout_tty(void)
|
||||
{
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static BOOL
|
||||
is_file_tty(FILE *f)
|
||||
{
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
/************* Test for a terminal, Unix-style **********/
|
||||
|
||||
#else
|
||||
static BOOL
|
||||
is_stdout_tty(void)
|
||||
{
|
||||
return isatty(fileno(stdout));
|
||||
}
|
||||
|
||||
static BOOL
|
||||
is_file_tty(FILE *f)
|
||||
{
|
||||
return isatty(fileno(f));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/************* Print optionally coloured match Unix-style and z/OS **********/
|
||||
|
||||
static void
|
||||
print_match(const void *buf, int length)
|
||||
{
|
||||
if (length == 0) return;
|
||||
if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
|
||||
FWRITE_IGNORE(buf, 1, length, stdout);
|
||||
if (do_colour) fprintf(stdout, "%c[0m", 0x1b);
|
||||
}
|
||||
|
||||
/* End of Unix-style or native z/OS environment functions. */
|
||||
|
||||
|
||||
/************* Directory scanning in Windows ***********/
|
||||
|
||||
/* I (Philip Hazel) have no means of testing this code. It was contributed by
|
||||
Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
|
||||
when it did not exist. David Byron added a patch that moved the #include of
|
||||
<windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
|
||||
*/
|
||||
|
||||
#elif defined WIN32
|
||||
|
||||
#ifndef INVALID_FILE_ATTRIBUTES
|
||||
#define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
|
||||
#endif
|
||||
|
||||
typedef struct directory_type
|
||||
{
|
||||
HANDLE handle;
|
||||
BOOL first;
|
||||
WIN32_FIND_DATA data;
|
||||
} directory_type;
|
||||
|
||||
#define FILESEP '/'
|
||||
|
||||
int
|
||||
isdirectory(char *filename)
|
||||
{
|
||||
DWORD attr = GetFileAttributes(filename);
|
||||
if (attr == INVALID_FILE_ATTRIBUTES)
|
||||
return 0;
|
||||
return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
|
||||
}
|
||||
|
||||
directory_type *
|
||||
opendirectory(char *filename)
|
||||
{
|
||||
size_t len;
|
||||
char *pattern;
|
||||
directory_type *dir;
|
||||
DWORD err;
|
||||
len = strlen(filename);
|
||||
pattern = (char *)malloc(len + 3);
|
||||
dir = (directory_type *)malloc(sizeof(*dir));
|
||||
if ((pattern == NULL) || (dir == NULL))
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: malloc failed\n");
|
||||
pcre2grep_exit(2);
|
||||
}
|
||||
memcpy(pattern, filename, len);
|
||||
if (iswild(filename))
|
||||
pattern[len] = 0;
|
||||
else
|
||||
memcpy(&(pattern[len]), "\\*", 3);
|
||||
dir->handle = FindFirstFile(pattern, &(dir->data));
|
||||
if (dir->handle != INVALID_HANDLE_VALUE)
|
||||
{
|
||||
free(pattern);
|
||||
dir->first = TRUE;
|
||||
return dir;
|
||||
}
|
||||
err = GetLastError();
|
||||
free(pattern);
|
||||
free(dir);
|
||||
errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *
|
||||
readdirectory(directory_type *dir)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
if (!dir->first)
|
||||
{
|
||||
if (!FindNextFile(dir->handle, &(dir->data)))
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
dir->first = FALSE;
|
||||
}
|
||||
if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
|
||||
return dir->data.cFileName;
|
||||
}
|
||||
#ifndef _MSC_VER
|
||||
return NULL; /* Keep compiler happy; never executed */
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
closedirectory(directory_type *dir)
|
||||
{
|
||||
FindClose(dir->handle);
|
||||
free(dir);
|
||||
}
|
||||
|
||||
|
||||
/************* Test for regular file in Windows **********/
|
||||
|
||||
/* I don't know how to do this, or if it can be done; assume all paths are
|
||||
regular if they are not directories. */
|
||||
|
||||
int isregfile(char *filename)
|
||||
{
|
||||
return !isdirectory(filename);
|
||||
}
|
||||
|
||||
|
||||
/************* Test for a terminal in Windows **********/
|
||||
|
||||
static BOOL
|
||||
is_stdout_tty(void)
|
||||
{
|
||||
return _isatty(_fileno(stdout));
|
||||
}
|
||||
|
||||
static BOOL
|
||||
is_file_tty(FILE *f)
|
||||
{
|
||||
return _isatty(_fileno(f));
|
||||
}
|
||||
|
||||
|
||||
/************* Print optionally coloured match in Windows **********/
|
||||
|
||||
static void
|
||||
print_match(const void *buf, int length)
|
||||
{
|
||||
if (length == 0) return;
|
||||
if (do_colour)
|
||||
{
|
||||
if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
|
||||
else SetConsoleTextAttribute(hstdout, match_colour);
|
||||
}
|
||||
FWRITE_IGNORE(buf, 1, length, stdout);
|
||||
if (do_colour)
|
||||
{
|
||||
if (do_ansi) fprintf(stdout, "%c[0m", 0x1b);
|
||||
else SetConsoleTextAttribute(hstdout, csbi.wAttributes);
|
||||
}
|
||||
}
|
||||
|
||||
/* End of Windows functions */
|
||||
|
||||
|
||||
/************* Directory scanning when we can't do it ***********/
|
||||
|
||||
/* The type is void, and apart from isdirectory(), the functions do nothing. */
|
||||
|
||||
#else
|
||||
|
||||
#define FILESEP 0
|
||||
typedef void directory_type;
|
||||
|
||||
int isdirectory(char *filename) { return 0; }
|
||||
directory_type * opendirectory(char *filename) { return (directory_type*)0;}
|
||||
char *readdirectory(directory_type *dir) { return (char*)0;}
|
||||
void closedirectory(directory_type *dir) {}
|
||||
|
||||
|
||||
/************* Test for regular file when we can't do it **********/
|
||||
|
||||
/* Assume all files are regular. */
|
||||
|
||||
int isregfile(char *filename) { return 1; }
|
||||
|
||||
|
||||
/************* Test for a terminal when we can't do it **********/
|
||||
|
||||
static BOOL
|
||||
is_stdout_tty(void)
|
||||
{
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static BOOL
|
||||
is_file_tty(FILE *f)
|
||||
{
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
/************* Print optionally coloured match when we can't do it **********/
|
||||
|
||||
static void
|
||||
print_match(const void *buf, int length)
|
||||
{
|
||||
if (length == 0) return;
|
||||
FWRITE_IGNORE(buf, 1, length, stdout);
|
||||
}
|
||||
|
||||
#endif /* End of system-specific functions */
|
||||
|
||||
|
||||
|
||||
#ifndef HAVE_STRERROR
|
||||
/*************************************************
|
||||
* Provide strerror() for non-ANSI libraries *
|
||||
*************************************************/
|
||||
|
||||
/* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
|
||||
in their libraries, but can provide the same facility by this simple
|
||||
alternative function. */
|
||||
|
||||
extern int sys_nerr;
|
||||
extern char *sys_errlist[];
|
||||
|
||||
char *
|
||||
strerror(int n)
|
||||
{
|
||||
if (n < 0 || n >= sys_nerr) return "unknown error number";
|
||||
return sys_errlist[n];
|
||||
}
|
||||
#endif /* HAVE_STRERROR */
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Usage function *
|
||||
*************************************************/
|
||||
|
||||
static int
|
||||
usage(int rc)
|
||||
{
|
||||
option_item *op;
|
||||
fprintf(stderr, "Usage: pcre2grep [-");
|
||||
for (op = optionlist; op->one_char != 0; op++)
|
||||
{
|
||||
if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
|
||||
}
|
||||
fprintf(stderr, "] [long options] [pattern] [files]\n");
|
||||
fprintf(stderr, "Type \"pcre2grep --help\" for more information and the long "
|
||||
"options.\n");
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Help function *
|
||||
*************************************************/
|
||||
|
||||
static void
|
||||
help(void)
|
||||
{
|
||||
option_item *op;
|
||||
|
||||
printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
|
||||
printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
|
||||
printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
|
||||
|
||||
#ifdef SUPPORT_PCRE2GREP_CALLOUT
|
||||
#ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
|
||||
printf("All callout scripts in patterns are supported." STDOUT_NL);
|
||||
#else
|
||||
printf("Non-fork callout scripts in patterns are supported." STDOUT_NL);
|
||||
#endif
|
||||
#else
|
||||
printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
|
||||
#endif
|
||||
|
||||
printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
|
||||
|
||||
#ifdef SUPPORT_LIBZ
|
||||
printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_LIBBZ2
|
||||
printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
|
||||
#endif
|
||||
|
||||
#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
|
||||
printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
|
||||
#else
|
||||
printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
|
||||
#endif
|
||||
|
||||
printf("Example: pcre2grep -i " QUOT "hello.*world" QUOT " menu.h main.c" STDOUT_NL STDOUT_NL);
|
||||
printf("Options:" STDOUT_NL);
|
||||
|
||||
for (op = optionlist; op->one_char != 0; op++)
|
||||
{
|
||||
int n;
|
||||
char s[4];
|
||||
|
||||
if (op->one_char > 0 && (op->long_name)[0] == 0)
|
||||
n = 31 - printf(" -%c", op->one_char);
|
||||
else
|
||||
{
|
||||
if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
|
||||
else strcpy(s, " ");
|
||||
n = 31 - printf(" %s --%s", s, op->long_name);
|
||||
}
|
||||
|
||||
if (n < 1) n = 1;
|
||||
printf("%.*s%s" STDOUT_NL, n, " ", op->help_text);
|
||||
}
|
||||
|
||||
printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL);
|
||||
printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
|
||||
printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE);
|
||||
printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
|
||||
printf("space is removed and blank lines are ignored." STDOUT_NL);
|
||||
printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
|
||||
|
||||
printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
|
||||
printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Test exclude/includes *
|
||||
*************************************************/
|
||||
|
||||
/* If any exclude pattern matches, the path is excluded. Otherwise, unless
|
||||
there are no includes, the path must match an include pattern.
|
||||
|
||||
Arguments:
|
||||
path the path to be matched
|
||||
ip the chain of include patterns
|
||||
ep the chain of exclude patterns
|
||||
|
||||
Returns: TRUE if the path is not excluded
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
test_incexc(char *path, patstr *ip, patstr *ep)
|
||||
{
|
||||
int plen = strlen((const char *)path);
|
||||
|
||||
for (; ep != NULL; ep = ep->next)
|
||||
{
|
||||
if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (ip == NULL) return TRUE;
|
||||
|
||||
for (; ip != NULL; ip = ip->next)
|
||||
{
|
||||
if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Decode integer argument value *
|
||||
*************************************************/
|
||||
|
||||
/* Integer arguments can be followed by K or M. Avoid the use of strtoul()
|
||||
because SunOS4 doesn't have it. This is used only for unpicking arguments, so
|
||||
just keep it simple.
|
||||
|
||||
Arguments:
|
||||
option_data the option data string
|
||||
op the option item (for error messages)
|
||||
longop TRUE if option given in long form
|
||||
|
||||
Returns: a long integer
|
||||
*/
|
||||
|
||||
static long int
|
||||
decode_number(char *option_data, option_item *op, BOOL longop)
|
||||
{
|
||||
unsigned long int n = 0;
|
||||
char *endptr = option_data;
|
||||
while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
|
||||
while (isdigit((unsigned char)(*endptr)))
|
||||
n = n * 10 + (int)(*endptr++ - '0');
|
||||
if (toupper(*endptr) == 'K')
|
||||
{
|
||||
n *= 1024;
|
||||
endptr++;
|
||||
}
|
||||
else if (toupper(*endptr) == 'M')
|
||||
{
|
||||
n *= 1024*1024;
|
||||
endptr++;
|
||||
}
|
||||
|
||||
if (*endptr != 0) /* Error */
|
||||
{
|
||||
if (longop)
|
||||
{
|
||||
char *equals = strchr(op->long_name, '=');
|
||||
int nlen = (equals == NULL)? (int)strlen(op->long_name) :
|
||||
(int)(equals - op->long_name);
|
||||
fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
|
||||
option_data, nlen, op->long_name);
|
||||
}
|
||||
else
|
||||
fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
|
||||
option_data, op->one_char);
|
||||
pcre2grep_exit(usage(2));
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Add item to a chain of numbers *
|
||||
*************************************************/
|
||||
|
||||
/* Used to add an item onto a chain, or just return an unconnected item if the
|
||||
"after" argument is NULL.
|
||||
|
||||
Arguments:
|
||||
n the number to add
|
||||
after if not NULL points to item to insert after
|
||||
|
||||
Returns: new number block
|
||||
*/
|
||||
|
||||
static omstr *
|
||||
add_number(int n, omstr *after)
|
||||
{
|
||||
omstr *om = (omstr *)malloc(sizeof(omstr));
|
||||
|
||||
/* LCOV_EXCL_START - These lines won't be hit in normal testing. */
|
||||
|
||||
if (om == NULL)
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: malloc failed\n");
|
||||
pcre2grep_exit(2);
|
||||
}
|
||||
|
||||
/* LCOV_EXCL_STOP */
|
||||
|
||||
om->next = NULL;
|
||||
om->groupnum = n;
|
||||
|
||||
if (after != NULL)
|
||||
{
|
||||
om->next = after->next;
|
||||
after->next = om;
|
||||
}
|
||||
return om;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Read one line of input *
|
||||
*************************************************/
|
||||
|
||||
/* Normally, input that is to be scanned is read using fread() (or gzread, or
|
||||
BZ2_read) into a large buffer, so many lines may be read at once. However,
|
||||
doing this for tty input means that no output appears until a lot of input has
|
||||
been typed. Instead, tty input is handled line by line. We cannot use fgets()
|
||||
for this, because it does not stop at a binary zero, and therefore there is no
|
||||
way of telling how many characters it has read, because there may be binary
|
||||
zeros embedded in the data. This function is also used for reading patterns
|
||||
from files (the -f option).
|
||||
|
||||
Arguments:
|
||||
buffer the buffer to read into
|
||||
length the maximum number of characters to read
|
||||
f the file
|
||||
|
||||
Returns: the number of characters read, zero at end of file
|
||||
*/
|
||||
|
||||
static PCRE2_SIZE
|
||||
read_one_line(char *buffer, PCRE2_SIZE length, FILE *f)
|
||||
{
|
||||
int c;
|
||||
PCRE2_SIZE yield = 0;
|
||||
while ((c = fgetc(f)) != EOF)
|
||||
{
|
||||
buffer[yield++] = c;
|
||||
if (c == '\n' || yield >= length) break;
|
||||
}
|
||||
return yield;
|
||||
}
|
||||
|
||||
/*************************************************
|
||||
* Read one pattern from file *
|
||||
*************************************************/
|
||||
|
||||
/* Wrap around read_one_line() to make sure any terminating '\n' is not
|
||||
included in the pattern and empty patterns are correctly identified.
|
||||
|
||||
Arguments:
|
||||
buffer the buffer to read into
|
||||
length maximum number of characters to read and report how many were
|
||||
f the file
|
||||
|
||||
Returns: TRUE if a pattern was read into buffer
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
read_pattern(char *buffer, PCRE2_SIZE *length, FILE *f)
|
||||
{
|
||||
*buffer = '\0';
|
||||
*length = read_one_line(buffer, *length, f);
|
||||
if (*length > 0 && buffer[*length-1] == '\n') *length = *length - 1;
|
||||
if (posix_pattern_file && *length > 0 && buffer[*length-1] == '\r')
|
||||
{
|
||||
*length = *length - 1;
|
||||
if (*length == 0) return TRUE;
|
||||
}
|
||||
return (*length > 0 || *buffer == '\n');
|
||||
}
|
||||
|
||||
/*************************************************
|
||||
* Find end of line *
|
||||
*************************************************/
|
||||
|
||||
/* The length of the endline sequence that is found is set via lenptr. This may
|
||||
be zero at the very end of the file if there is no line-ending sequence there.
|
||||
|
||||
Arguments:
|
||||
p current position in line
|
||||
endptr end of available data
|
||||
lenptr where to put the length of the eol sequence
|
||||
|
||||
Returns: pointer after the last byte of the line,
|
||||
including the newline byte(s)
|
||||
*/
|
||||
|
||||
static char *
|
||||
end_of_line(char *p, char *endptr, int *lenptr)
|
||||
{
|
||||
switch(endlinetype)
|
||||
{
|
||||
default: /* Just in case */
|
||||
case PCRE2_NEWLINE_LF:
|
||||
while (p < endptr && *p != '\n') p++;
|
||||
if (p < endptr)
|
||||
{
|
||||
*lenptr = 1;
|
||||
return p + 1;
|
||||
}
|
||||
*lenptr = 0;
|
||||
return endptr;
|
||||
|
||||
case PCRE2_NEWLINE_CR:
|
||||
while (p < endptr && *p != '\r') p++;
|
||||
if (p < endptr)
|
||||
{
|
||||
*lenptr = 1;
|
||||
return p + 1;
|
||||
}
|
||||
*lenptr = 0;
|
||||
return endptr;
|
||||
|
||||
case PCRE2_NEWLINE_NUL:
|
||||
while (p < endptr && *p != '\0') p++;
|
||||
if (p < endptr)
|
||||
{
|
||||
*lenptr = 1;
|
||||
return p + 1;
|
||||
}
|
||||
*lenptr = 0;
|
||||
return endptr;
|
||||
|
||||
case PCRE2_NEWLINE_CRLF:
|
||||
for (;;)
|
||||
{
|
||||
while (p < endptr && *p != '\r') p++;
|
||||
if (p == endptr)
|
||||
{
|
||||
*lenptr = 0;
|
||||
return endptr;
|
||||
}
|
||||
p++;
|
||||
if (p < endptr && *p == '\n')
|
||||
{
|
||||
*lenptr = 2;
|
||||
return p + 1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_ANYCRLF:
|
||||
while (p < endptr)
|
||||
{
|
||||
if (*p == '\n')
|
||||
{
|
||||
*lenptr = 1;
|
||||
return p + 1;
|
||||
}
|
||||
|
||||
if (*p == '\r')
|
||||
{
|
||||
if (p + 1 < endptr && p[1] == '\n')
|
||||
{
|
||||
*lenptr = 2;
|
||||
return p + 2;
|
||||
}
|
||||
|
||||
*lenptr = 1;
|
||||
return p + 1;
|
||||
}
|
||||
|
||||
p++;
|
||||
} /* End of loop for ANYCRLF case */
|
||||
|
||||
*lenptr = 0; /* Must have hit the end */
|
||||
return endptr;
|
||||
|
||||
case PCRE2_NEWLINE_ANY:
|
||||
while (p < endptr)
|
||||
{
|
||||
int extra = 0;
|
||||
int c = *((unsigned char *)p);
|
||||
|
||||
if (utf && c >= 0xc0)
|
||||
{
|
||||
int gcii, gcss;
|
||||
extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
||||
if (endptr - p < 1 + extra)
|
||||
{
|
||||
*lenptr = 0; /* Hit the end, halfway through a character */
|
||||
return endptr;
|
||||
}
|
||||
gcss = 6*extra;
|
||||
c = (c & utf8_table3[extra]) << gcss;
|
||||
for (gcii = 1; gcii <= extra; gcii++)
|
||||
{
|
||||
gcss -= 6;
|
||||
c |= (p[gcii] & 0x3f) << gcss;
|
||||
}
|
||||
}
|
||||
|
||||
p += 1 + extra;
|
||||
|
||||
switch (c)
|
||||
{
|
||||
case '\n': /* LF */
|
||||
case '\v': /* VT */
|
||||
case '\f': /* FF */
|
||||
*lenptr = 1 + extra;
|
||||
return p;
|
||||
|
||||
case '\r': /* CR */
|
||||
if (extra == 0 && p < endptr && *p == '\n')
|
||||
{
|
||||
*lenptr = 2;
|
||||
p++;
|
||||
}
|
||||
else *lenptr = 1 + extra;
|
||||
return p;
|
||||
|
||||
#ifndef EBCDIC
|
||||
case 0x85: /* Unicode NEL */
|
||||
*lenptr = 1 + extra;
|
||||
return p;
|
||||
|
||||
case 0x2028: /* Unicode LS */
|
||||
case 0x2029: /* Unicode PS */
|
||||
*lenptr = 1 + extra;
|
||||
return p;
|
||||
#endif /* Not EBCDIC */
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} /* End of loop for ANY case */
|
||||
|
||||
*lenptr = 0; /* Must have hit the end */
|
||||
return endptr;
|
||||
} /* End of overall switch */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find start of previous line *
|
||||
*************************************************/
|
||||
|
||||
/* This is called when looking back for before lines to print.
|
||||
|
||||
Arguments:
|
||||
p start of the subsequent line
|
||||
startptr start of available data
|
||||
|
||||
Returns: pointer to the start of the previous line
|
||||
*/
|
||||
|
||||
static char *
|
||||
previous_line(char *p, char *startptr)
|
||||
{
|
||||
switch(endlinetype)
|
||||
{
|
||||
default: /* Just in case */
|
||||
case PCRE2_NEWLINE_LF:
|
||||
p--;
|
||||
while (p > startptr && p[-1] != '\n') p--;
|
||||
return p;
|
||||
|
||||
case PCRE2_NEWLINE_CR:
|
||||
p--;
|
||||
while (p > startptr && p[-1] != '\n') p--;
|
||||
return p;
|
||||
|
||||
case PCRE2_NEWLINE_NUL:
|
||||
p--;
|
||||
while (p > startptr && p[-1] != '\0') p--;
|
||||
return p;
|
||||
|
||||
case PCRE2_NEWLINE_CRLF:
|
||||
p -= 2;
|
||||
for (;;)
|
||||
{
|
||||
while (p > startptr && p[-1] != '\n') p--;
|
||||
if (p == startptr) break;
|
||||
if (p - startptr >= 2 && p[-2] == '\r') break;
|
||||
p--;
|
||||
}
|
||||
return p;
|
||||
|
||||
case PCRE2_NEWLINE_ANYCRLF:
|
||||
if (p - startptr >= 2 && p[-2] == '\r' && p[-1] == '\n') p -= 2;
|
||||
else p--;
|
||||
while (p > startptr)
|
||||
{
|
||||
if (p[-1] == '\n' || p[-1] == '\r') break;
|
||||
p--;
|
||||
}
|
||||
return p;
|
||||
|
||||
case PCRE2_NEWLINE_ANY:
|
||||
if (p - startptr >= 2 && p[-2] == '\r' && p[-1] == '\n') p -= 2;
|
||||
else
|
||||
{
|
||||
if (utf) while (p > startptr && (p[-1] & 0xc0) == 0x80) p--;
|
||||
if (p > startptr) p--;
|
||||
}
|
||||
|
||||
while (p > startptr)
|
||||
{
|
||||
int c;
|
||||
char *pp = p - 1;
|
||||
|
||||
if (utf)
|
||||
{
|
||||
int extra = 0;
|
||||
while (pp > startptr && (*pp & 0xc0) == 0x80) pp--;
|
||||
c = *((unsigned char *)pp);
|
||||
if (c >= 0xc0)
|
||||
{
|
||||
int gcii, gcss;
|
||||
extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
||||
if (p - pp < 1 + extra)
|
||||
{
|
||||
p = pp; /* Rewind over the broken character */
|
||||
continue;
|
||||
}
|
||||
gcss = 6*extra;
|
||||
c = (c & utf8_table3[extra]) << gcss;
|
||||
for (gcii = 1; gcii <= extra; gcii++)
|
||||
{
|
||||
gcss -= 6;
|
||||
c |= (pp[gcii] & 0x3f) << gcss;
|
||||
}
|
||||
}
|
||||
}
|
||||
else c = *((unsigned char *)pp);
|
||||
|
||||
switch (c)
|
||||
{
|
||||
case '\n': /* LF */
|
||||
case '\v': /* VT */
|
||||
case '\f': /* FF */
|
||||
case '\r': /* CR */
|
||||
#ifndef EBCDIC
|
||||
case 0x85: /* Unicode NEL */
|
||||
case 0x2028: /* Unicode LS */
|
||||
case 0x2029: /* Unicode PS */
|
||||
#endif /* Not EBCDIC */
|
||||
return p;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
p = pp; /* Back one character */
|
||||
} /* End of loop for ANY case */
|
||||
|
||||
return p;
|
||||
} /* End of overall switch */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Output newline at end *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called if the final line of a file has been written to
|
||||
stdout, but it does not have a terminating newline.
|
||||
|
||||
Arguments: none
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
write_final_newline(void)
|
||||
{
|
||||
switch(endlinetype)
|
||||
{
|
||||
default: /* Just in case */
|
||||
case PCRE2_NEWLINE_LF:
|
||||
case PCRE2_NEWLINE_ANY:
|
||||
case PCRE2_NEWLINE_ANYCRLF:
|
||||
fprintf(stdout, "\n");
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_CR:
|
||||
fprintf(stdout, "\r");
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_CRLF:
|
||||
fprintf(stdout, "\r\n");
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_NUL:
|
||||
fprintf(stdout, "%c", 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print the previous "after" lines *
|
||||
*************************************************/
|
||||
|
||||
/* This is called if we are about to lose said lines because of buffer filling,
|
||||
and at the end of the file. The data in the line is written using fwrite() so
|
||||
that a binary zero does not terminate it.
|
||||
|
||||
Arguments:
|
||||
lastmatchnumber the number of the last matching line, plus one
|
||||
lastmatchrestart where we restarted after the last match
|
||||
endptr end of available data
|
||||
printname filename for printing
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
|
||||
char *endptr, const char *printname)
|
||||
{
|
||||
if (after_context > 0 && lastmatchnumber > 0)
|
||||
{
|
||||
int count = 0;
|
||||
int ellength = 0;
|
||||
while (lastmatchrestart < endptr && count < after_context)
|
||||
{
|
||||
char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
|
||||
if (ellength == 0 && pp == main_buffer + bufsize) break;
|
||||
if (printname != NULL) fprintf(stdout, "%s%c", printname, printname_hyphen);
|
||||
if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
|
||||
FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
|
||||
lastmatchrestart = pp;
|
||||
count++;
|
||||
}
|
||||
|
||||
/* If we have printed any lines, arrange for a hyphen separator if anything
|
||||
else follows. Also, if the last line is the final line in the file and it had
|
||||
no newline, add one. */
|
||||
|
||||
if (count > 0)
|
||||
{
|
||||
hyphenpending = TRUE;
|
||||
if (ellength == 0 && lastmatchrestart >= endptr)
|
||||
write_final_newline();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Apply patterns to subject till one matches *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called to run through all the patterns, looking for a
|
||||
match. When all possible matches are required, for example, for colouring, it
|
||||
checks all patterns for matching, and returns the earliest match. Otherwise, it
|
||||
returns the first pattern that has matched.
|
||||
|
||||
Arguments:
|
||||
matchptr the start of the subject
|
||||
length the length of the subject to match
|
||||
options options for pcre2_match
|
||||
startoffset where to start matching
|
||||
mrc address of where to put the result of pcre2_match()
|
||||
|
||||
Returns: TRUE if there was a match, match_data and offsets are set
|
||||
FALSE if there was no match (but no errors)
|
||||
invert if there was a non-fatal error
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options,
|
||||
PCRE2_SIZE startoffset, int *mrc)
|
||||
{
|
||||
PCRE2_SIZE slen = length;
|
||||
int first = -1;
|
||||
int firstrc = 0;
|
||||
patstr *p = patterns;
|
||||
const char *msg = "this text:\n\n";
|
||||
|
||||
if (slen > 200)
|
||||
{
|
||||
slen = 200;
|
||||
msg = "text that starts:\n\n";
|
||||
}
|
||||
|
||||
for (int i = 1; p != NULL; p = p->next, i++)
|
||||
{
|
||||
int rc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, length,
|
||||
startoffset, options, match_data, match_context);
|
||||
if (rc == PCRE2_ERROR_NOMATCH) continue;
|
||||
|
||||
/* Handle a successful match. When all_matches is false, we are done.
|
||||
Otherwise we must save the earliest match. */
|
||||
|
||||
if (rc >= 0)
|
||||
{
|
||||
if (!all_matches)
|
||||
{
|
||||
*mrc = rc;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
if (first < 0 || offsets[0] < offsets_pair[first][0] ||
|
||||
(offsets[0] == offsets_pair[first][0] &&
|
||||
offsets[1] > offsets_pair[first][1]))
|
||||
{
|
||||
first = match_data_toggle;
|
||||
firstrc = rc;
|
||||
match_data_toggle ^= 1;
|
||||
match_data = match_data_pair[match_data_toggle];
|
||||
offsets = offsets_pair[match_data_toggle];
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Deal with PCRE2 error. */
|
||||
|
||||
fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", rc);
|
||||
if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
|
||||
fprintf(stderr, "%s", msg);
|
||||
FWRITE_IGNORE(matchptr, 1, slen, stderr); /* In case binary zero included */
|
||||
fprintf(stderr, "\n\n");
|
||||
if (rc <= PCRE2_ERROR_UTF8_ERR1 &&
|
||||
rc >= PCRE2_ERROR_UTF8_ERR21)
|
||||
{
|
||||
unsigned char mbuffer[256];
|
||||
PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
|
||||
(void)pcre2_get_error_message(rc, mbuffer, sizeof(mbuffer));
|
||||
fprintf(stderr, "%s at offset %" SIZ_FORM "\n\n", mbuffer, startchar);
|
||||
}
|
||||
if (rc == PCRE2_ERROR_MATCHLIMIT || rc == PCRE2_ERROR_DEPTHLIMIT ||
|
||||
rc == PCRE2_ERROR_HEAPLIMIT || rc == PCRE2_ERROR_JIT_STACKLIMIT)
|
||||
resource_error = TRUE;
|
||||
if (error_count++ > 20)
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
|
||||
pcre2grep_exit(2);
|
||||
}
|
||||
return invert; /* No more matching; don't show the line again */
|
||||
}
|
||||
|
||||
/* We get here when all patterns have been tried. If all_matches is false,
|
||||
this means that none of them matched. If all_matches is true, matched_first
|
||||
will be non-NULL if there was at least one match, and it will point to the
|
||||
appropriate match_data block. */
|
||||
|
||||
if (!all_matches || first < 0) return FALSE;
|
||||
|
||||
match_data_toggle = first;
|
||||
match_data = match_data_pair[first];
|
||||
offsets = offsets_pair[first];
|
||||
*mrc = firstrc;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Decode dollar escape sequence *
|
||||
*************************************************/
|
||||
|
||||
/* Called from various places to decode $ escapes in output strings. The escape
|
||||
sequences are as follows:
|
||||
|
||||
$<digits> or ${<digits>} returns a capture number. However, if callout is TRUE,
|
||||
zero is never returned; '0' is substituted.
|
||||
|
||||
$a returns bell.
|
||||
$b returns backspace.
|
||||
$e returns escape.
|
||||
$f returns form feed.
|
||||
$n returns newline.
|
||||
$r returns carriage return.
|
||||
$t returns tab.
|
||||
$v returns vertical tab.
|
||||
$o<digits> returns the character represented by the given octal
|
||||
number; up to three digits are processed.
|
||||
$o{<digits>} does the same, up to 7 digits, but gives an error for mode-invalid
|
||||
code points.
|
||||
$x<digits> returns the character represented by the given hexadecimal
|
||||
number; up to two digits are processed.
|
||||
$x{<digits} does the same, up to 6 digits, but gives an error for mode-invalid
|
||||
code points.
|
||||
Any other character is substituted by itself. E.g: $$ is replaced by a single
|
||||
dollar.
|
||||
|
||||
Arguments:
|
||||
begin the start of the whole string
|
||||
string points to the $
|
||||
callout TRUE if in a callout (inhibits error messages)
|
||||
value where to return a value
|
||||
last where to return pointer to the last used character
|
||||
|
||||
Returns: DDE_ERROR after a syntax error
|
||||
DDE_CAPTURE if *value is a capture number
|
||||
DDE_CHAR if *value is a character code
|
||||
*/
|
||||
|
||||
static int
|
||||
decode_dollar_escape(PCRE2_SPTR begin, PCRE2_SPTR string, BOOL callout,
|
||||
uint32_t *value, PCRE2_SPTR *last)
|
||||
{
|
||||
uint32_t c = 0;
|
||||
int base = 10;
|
||||
int dcount;
|
||||
int rc = DDE_CHAR;
|
||||
BOOL brace = FALSE;
|
||||
|
||||
switch (*(++string))
|
||||
{
|
||||
case 0: /* Syntax error: a character must be present after $. */
|
||||
if (!callout)
|
||||
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
|
||||
(int)(string - begin), "no character after $");
|
||||
*last = string;
|
||||
return DDE_ERROR;
|
||||
|
||||
case '&':
|
||||
/* In a callout, no capture is available. Return the character '0' for
|
||||
consistency with $0. */
|
||||
|
||||
if (callout) *value = '0';
|
||||
else
|
||||
{
|
||||
*value = 0;
|
||||
rc = DDE_CAPTURE;
|
||||
}
|
||||
break;
|
||||
|
||||
case '{':
|
||||
brace = TRUE;
|
||||
string++;
|
||||
if (!isdigit((unsigned char)(*string))) /* Syntax error: */
|
||||
{ /* a decimal number required. */
|
||||
if (!callout)
|
||||
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
|
||||
(int)(string - begin), "decimal number expected");
|
||||
rc = DDE_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Fall through */
|
||||
|
||||
/* The maximum capture number is 65535, so any number greater than that will
|
||||
always be an unknown capture number. We just stop incrementing, in order to
|
||||
avoid overflow. */
|
||||
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
do
|
||||
{
|
||||
if (c <= 65535) c = c * 10 + (*string - '0');
|
||||
string++;
|
||||
}
|
||||
while (*string >= '0' && *string <= '9');
|
||||
string--; /* Point to last digit */
|
||||
|
||||
/* In a callout, capture number 0 is not available. No error can be given,
|
||||
so just return the character '0'. */
|
||||
|
||||
if (callout && c == 0)
|
||||
{
|
||||
*value = '0';
|
||||
}
|
||||
else
|
||||
{
|
||||
*value = c;
|
||||
rc = DDE_CAPTURE;
|
||||
}
|
||||
break;
|
||||
|
||||
/* Limit octal numbers to 3 digits without braces, or up to 7 with braces,
|
||||
for valid Unicode code points. */
|
||||
|
||||
case 'o':
|
||||
base = 8;
|
||||
string++;
|
||||
if (*string == '{')
|
||||
{
|
||||
brace = TRUE;
|
||||
string++;
|
||||
dcount = 7;
|
||||
}
|
||||
else dcount = 3;
|
||||
for (; dcount > 0; dcount--)
|
||||
{
|
||||
if (*string < '0' || *string > '7') break;
|
||||
c = c * 8 + (*string++ - '0');
|
||||
}
|
||||
*value = c;
|
||||
string--; /* Point to last digit */
|
||||
break;
|
||||
|
||||
/* Limit hex numbers to 2 digits without braces, or up to 6 with braces,
|
||||
for valid Unicode code points. */
|
||||
|
||||
case 'x':
|
||||
base = 16;
|
||||
string++;
|
||||
if (*string == '{')
|
||||
{
|
||||
brace = TRUE;
|
||||
string++;
|
||||
dcount = 6;
|
||||
}
|
||||
else dcount = 2;
|
||||
for (; dcount > 0; dcount--)
|
||||
{
|
||||
if (!isxdigit(*string)) break;
|
||||
if (*string >= '0' && *string <= '9')
|
||||
c = c *16 + (*string++ - '0');
|
||||
else
|
||||
c = c * 16 + ((*string++ | 0x20) - 'a') + 10;
|
||||
}
|
||||
*value = c;
|
||||
string--; /* Point to last digit */
|
||||
break;
|
||||
|
||||
case 'a': *value = '\a'; break;
|
||||
case 'b': *value = '\b'; break;
|
||||
#ifndef EBCDIC
|
||||
case 'e': *value = '\033'; break;
|
||||
#else
|
||||
case 'e': *value = '\047'; break;
|
||||
#endif
|
||||
case 'f': *value = '\f'; break;
|
||||
case 'n': *value = STDOUT_NL_CODE; break;
|
||||
case 'r': *value = '\r'; break;
|
||||
case 't': *value = '\t'; break;
|
||||
case 'v': *value = '\v'; break;
|
||||
|
||||
default: *value = *string; break;
|
||||
}
|
||||
|
||||
if (brace)
|
||||
{
|
||||
c = string[1];
|
||||
if (c != '}')
|
||||
{
|
||||
rc = DDE_ERROR;
|
||||
if (!callout)
|
||||
{
|
||||
if ((base == 8 && c >= '0' && c <= '7') ||
|
||||
(base == 16 && isxdigit(c)))
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
|
||||
"too many %s digits\n", (int)(string - begin),
|
||||
(base == 8)? "octal" : "hex");
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
|
||||
(int)(string - begin), "missing closing brace");
|
||||
}
|
||||
}
|
||||
}
|
||||
else string++;
|
||||
}
|
||||
|
||||
/* Check maximum code point values, but take note of STDOUT_NL_CODE. */
|
||||
|
||||
if (rc == DDE_CHAR && *value != STDOUT_NL_CODE)
|
||||
{
|
||||
uint32_t max = utf? 0x0010ffffu : 0xffu;
|
||||
if (*value > max)
|
||||
{
|
||||
if (!callout)
|
||||
fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
|
||||
"code point greater than 0x%x is invalid\n", (int)(string - begin), max);
|
||||
rc = DDE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
*last = string;
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check output text for errors *
|
||||
*************************************************/
|
||||
|
||||
/* Called early, to get errors before doing anything for -O text; also called
|
||||
from callouts to check before outputting.
|
||||
|
||||
Arguments:
|
||||
string an --output text string
|
||||
callout TRUE if in a callout (stops printing errors)
|
||||
|
||||
Returns: TRUE if OK, FALSE on error
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
|
||||
{
|
||||
uint32_t value;
|
||||
PCRE2_SPTR begin = string;
|
||||
|
||||
for (; *string != 0; string++)
|
||||
{
|
||||
if (*string == '$' &&
|
||||
decode_dollar_escape(begin, string, callout, &value, &string) == DDE_ERROR)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Display output text *
|
||||
*************************************************/
|
||||
|
||||
/* Display the output text, which is assumed to have already been syntax
|
||||
checked. Output may contain escape sequences started by the dollar sign.
|
||||
|
||||
Arguments:
|
||||
string: the output text
|
||||
callout: TRUE for the builtin callout, FALSE for --output
|
||||
subject the start of the subject
|
||||
ovector: capture offsets
|
||||
capture_top: number of captures
|
||||
|
||||
Returns: TRUE if something was output, other than newline
|
||||
FALSE if nothing was output, or newline was last output
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
|
||||
PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
|
||||
{
|
||||
uint32_t value;
|
||||
BOOL printed = FALSE;
|
||||
PCRE2_SPTR begin = string;
|
||||
|
||||
for (; *string != 0; string++)
|
||||
{
|
||||
if (*string == '$')
|
||||
{
|
||||
switch(decode_dollar_escape(begin, string, callout, &value, &string))
|
||||
{
|
||||
case DDE_CHAR:
|
||||
if (value == STDOUT_NL_CODE)
|
||||
{
|
||||
fprintf(stdout, STDOUT_NL);
|
||||
printed = FALSE;
|
||||
continue;
|
||||
}
|
||||
break; /* Will print value */
|
||||
|
||||
case DDE_CAPTURE:
|
||||
if (value < capture_top)
|
||||
{
|
||||
PCRE2_SIZE capturesize;
|
||||
value *= 2;
|
||||
capturesize = ovector[value + 1] - ovector[value];
|
||||
if (capturesize > 0)
|
||||
{
|
||||
print_match(subject + ovector[value], capturesize);
|
||||
printed = TRUE;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
|
||||
/* LCOV_EXCL_START */
|
||||
default: /* Should not occur */
|
||||
break;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
}
|
||||
|
||||
else value = *string; /* Not a $ escape */
|
||||
|
||||
if (!utf || value <= 127) fprintf(stdout, "%c", value); else
|
||||
{
|
||||
int n = ord2utf8(value);
|
||||
for (int i = 0; i < n; i++) fputc(utf8_buffer[i], stdout);
|
||||
}
|
||||
|
||||
printed = TRUE;
|
||||
}
|
||||
|
||||
return printed;
|
||||
}
|
||||
|
||||
|
||||
#ifdef SUPPORT_PCRE2GREP_CALLOUT
|
||||
|
||||
/*************************************************
|
||||
* Parse and execute callout scripts *
|
||||
*************************************************/
|
||||
|
||||
/* If SUPPORT_PCRE2GREP_CALLOUT_FORK is defined, this function parses a callout
|
||||
string block and executes the program specified by the string. The string is a
|
||||
list of substrings separated by pipe characters. The first substring represents
|
||||
the executable name, and the following substrings specify the arguments:
|
||||
|
||||
program_name|param1|param2|...
|
||||
|
||||
Any substring (including the program name) can contain escape sequences
|
||||
started by the dollar character. The escape sequences are substituted as
|
||||
follows:
|
||||
|
||||
$<digits> or ${<digits>} is replaced by the captured substring of the given
|
||||
decimal number, which must be greater than zero. If the number is greater
|
||||
than the number of capturing substrings, or if the capture is unset, the
|
||||
replacement is empty.
|
||||
|
||||
Any other character is substituted by itself. E.g: $$ is replaced by a single
|
||||
dollar or $| replaced by a pipe character.
|
||||
|
||||
Alternatively, if string starts with pipe, the remainder is taken as an output
|
||||
string, same as --output. This is the only form that is supported if
|
||||
SUPPORT_PCRE2GREP_FORK is not defined. In this case, --om-separator is used to
|
||||
separate each callout, defaulting to newline.
|
||||
|
||||
Example:
|
||||
|
||||
echo -e "abcde\n12345" | pcre2grep \
|
||||
'(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
|
||||
|
||||
Output:
|
||||
|
||||
Arg1: [a] [bcd] [d] Arg2: |a| ()
|
||||
abcde
|
||||
Arg1: [1] [234] [4] Arg2: |1| ()
|
||||
12345
|
||||
|
||||
Arguments:
|
||||
blockptr the callout block
|
||||
|
||||
Returns: currently it always returns with 0
|
||||
*/
|
||||
|
||||
static int
|
||||
pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
|
||||
{
|
||||
PCRE2_SIZE length = calloutptr->callout_string_length;
|
||||
PCRE2_SPTR string = calloutptr->callout_string;
|
||||
PCRE2_SPTR subject = calloutptr->subject;
|
||||
PCRE2_SIZE *ovector = calloutptr->offset_vector;
|
||||
PCRE2_SIZE capture_top = calloutptr->capture_top;
|
||||
|
||||
#ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
|
||||
PCRE2_SIZE argsvectorlen = 2;
|
||||
PCRE2_SIZE argslen = 1;
|
||||
char *args;
|
||||
char *argsptr;
|
||||
char **argsvector;
|
||||
char **argsvectorptr;
|
||||
#ifndef WIN32
|
||||
pid_t pid;
|
||||
#endif
|
||||
int result = 0;
|
||||
#endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
|
||||
|
||||
(void)unused; /* Avoid compiler warning */
|
||||
|
||||
/* Only callouts with strings are supported. */
|
||||
|
||||
if (string == NULL || length == 0) return 0;
|
||||
|
||||
/* If there's no command, output the remainder directly. */
|
||||
|
||||
if (*string == '|')
|
||||
{
|
||||
string++;
|
||||
if (!syntax_check_output_text(string, TRUE)) return 0;
|
||||
(void)display_output_text(string, TRUE, subject, ovector, capture_top);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifndef SUPPORT_PCRE2GREP_CALLOUT_FORK
|
||||
return 0;
|
||||
#else
|
||||
|
||||
/* Checking syntax and compute the number of string fragments. Callout strings
|
||||
are silently ignored in the event of a syntax error. */
|
||||
|
||||
while (length > 0)
|
||||
{
|
||||
if (*string == '|')
|
||||
{
|
||||
argsvectorlen++;
|
||||
if (argsvectorlen > 10000) return 0; /* Too many args */
|
||||
}
|
||||
|
||||
else if (*string == '$')
|
||||
{
|
||||
uint32_t value;
|
||||
PCRE2_SPTR begin = string;
|
||||
|
||||
switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
|
||||
{
|
||||
case DDE_CAPTURE:
|
||||
if (value < capture_top)
|
||||
{
|
||||
value *= 2;
|
||||
argslen += ovector[value + 1] - ovector[value];
|
||||
}
|
||||
argslen--; /* Negate the effect of argslen++ below. */
|
||||
break;
|
||||
|
||||
case DDE_CHAR:
|
||||
if (value == STDOUT_NL_CODE) argslen += STDOUT_NL_LEN - 1;
|
||||
else if (utf && value > 127) argslen += ord2utf8(value) - 1;
|
||||
break;
|
||||
|
||||
/* LCOV_EXCL_START */
|
||||
default: /* Should not occur */
|
||||
case DDE_ERROR:
|
||||
return 0;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
length -= (string - begin);
|
||||
}
|
||||
|
||||
string++;
|
||||
length--;
|
||||
argslen++;
|
||||
}
|
||||
|
||||
/* Get memory for the argument vector and its strings. */
|
||||
|
||||
args = (char*)malloc(argslen);
|
||||
if (args == NULL) return 0;
|
||||
|
||||
argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
|
||||
if (argsvector == NULL)
|
||||
{
|
||||
/* LCOV_EXCL_START */
|
||||
free(args);
|
||||
return 0;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
/* Now reprocess the string and set up the arguments. */
|
||||
|
||||
argsptr = args;
|
||||
argsvectorptr = argsvector;
|
||||
*argsvectorptr++ = argsptr;
|
||||
|
||||
length = calloutptr->callout_string_length;
|
||||
string = calloutptr->callout_string;
|
||||
|
||||
while (length > 0)
|
||||
{
|
||||
if (*string == '|')
|
||||
{
|
||||
*argsptr++ = '\0';
|
||||
*argsvectorptr++ = argsptr;
|
||||
}
|
||||
|
||||
else if (*string == '$')
|
||||
{
|
||||
uint32_t value;
|
||||
PCRE2_SPTR begin = string;
|
||||
|
||||
switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
|
||||
{
|
||||
case DDE_CAPTURE:
|
||||
if (value < capture_top)
|
||||
{
|
||||
PCRE2_SIZE capturesize;
|
||||
value *= 2;
|
||||
capturesize = ovector[value + 1] - ovector[value];
|
||||
memcpy(argsptr, subject + ovector[value], capturesize);
|
||||
argsptr += capturesize;
|
||||
}
|
||||
break;
|
||||
|
||||
case DDE_CHAR:
|
||||
if (value == STDOUT_NL_CODE)
|
||||
{
|
||||
memcpy(argsptr, STDOUT_NL, STDOUT_NL_LEN);
|
||||
argsptr += STDOUT_NL_LEN;
|
||||
}
|
||||
else if (utf && value > 127)
|
||||
{
|
||||
int n = ord2utf8(value);
|
||||
memcpy(argsptr, utf8_buffer, n);
|
||||
argsptr += n;
|
||||
}
|
||||
else
|
||||
{
|
||||
*argsptr++ = value;
|
||||
}
|
||||
break;
|
||||
|
||||
/* LCOV_EXCL_START */
|
||||
default:
|
||||
/* Even though this should not occur, the string having been checked above,
|
||||
* we need to include the free() calls so that source checkers do not complain. */
|
||||
case DDE_ERROR:
|
||||
free(args);
|
||||
free(argsvector);
|
||||
abort();
|
||||
return 0;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
length -= (string - begin);
|
||||
}
|
||||
|
||||
else *argsptr++ = *string;
|
||||
|
||||
/* Advance along the string */
|
||||
|
||||
string++;
|
||||
length--;
|
||||
}
|
||||
|
||||
*argsptr++ = '\0';
|
||||
*argsvectorptr = NULL;
|
||||
|
||||
/* Running an external command is system-dependent. Handle Windows and VMS as
|
||||
necessary, otherwise assume fork(). */
|
||||
|
||||
#ifdef WIN32
|
||||
(void)fflush(stdout);
|
||||
result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector);
|
||||
|
||||
#elif defined __VMS
|
||||
{
|
||||
char cmdbuf[500];
|
||||
short i = 0;
|
||||
int flags = CLI$M_NOCLISYM|CLI$M_NOLOGNAM|CLI$M_NOKEYPAD, status, retstat;
|
||||
$DESCRIPTOR(cmd, cmdbuf);
|
||||
|
||||
cmdbuf[0] = 0;
|
||||
while (argsvector[i])
|
||||
{
|
||||
strcat(cmdbuf, argsvector[i]);
|
||||
strcat(cmdbuf, " ");
|
||||
i++;
|
||||
}
|
||||
cmd.dsc$w_length = strlen(cmdbuf) - 1;
|
||||
status = lib$spawn(&cmd, 0,0, &flags, 0,0, &retstat);
|
||||
if (!(status & 1)) result = 0;
|
||||
else result = retstat & 1 ? 0 : 1;
|
||||
}
|
||||
|
||||
#else /* Neither Windows nor VMS */
|
||||
(void)fflush(stdout);
|
||||
pid = fork();
|
||||
if (pid == 0)
|
||||
{
|
||||
(void)execv(argsvector[0], argsvector);
|
||||
/* Control gets here if there is an error, e.g. a non-existent program */
|
||||
exit(1);
|
||||
}
|
||||
else if (pid > 0)
|
||||
{
|
||||
(void)waitpid(pid, &result, 0);
|
||||
}
|
||||
#endif /* End Windows/VMS/other handling */
|
||||
|
||||
free(args);
|
||||
free(argsvector);
|
||||
|
||||
/* Currently negative return values are not supported, only zero (match
|
||||
continues) or non-zero (match fails). */
|
||||
|
||||
return result != 0;
|
||||
#endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
|
||||
}
|
||||
#endif /* SUPPORT_PCRE2GREP_CALLOUT */
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Read a portion of the file into buffer *
|
||||
*************************************************/
|
||||
|
||||
static PCRE2_SIZE
|
||||
fill_buffer(void *handle, int frtype, char *buffer, PCRE2_SIZE length,
|
||||
BOOL input_line_buffered)
|
||||
{
|
||||
PCRE2_SIZE nread;
|
||||
(void)frtype; /* Avoid warning when not used */
|
||||
|
||||
#ifdef SUPPORT_LIBZ
|
||||
if (frtype == FR_LIBZ)
|
||||
return gzread((gzFile)handle, buffer, length);
|
||||
else
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_LIBBZ2
|
||||
if (frtype == FR_LIBBZ2)
|
||||
return (PCRE2_SIZE)BZ2_bzread((BZFILE *)handle, buffer, length);
|
||||
else
|
||||
#endif
|
||||
|
||||
nread = (input_line_buffered ?
|
||||
read_one_line(buffer, length, (FILE *)handle) :
|
||||
fread(buffer, 1, length, (FILE *)handle));
|
||||
|
||||
#ifdef SUPPORT_VALGRIND
|
||||
if (nread > 0) VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(buffer, nread);
|
||||
if (nread < length) VALGRIND_MAKE_MEM_UNDEFINED(buffer + nread, length - nread);
|
||||
#endif
|
||||
|
||||
return nread;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Grep an individual file *
|
||||
*************************************************/
|
||||
|
||||
/* This is called from grep_or_recurse() below. It uses a buffer that is three
|
||||
times the value of bufthird. The matching point is never allowed to stray into
|
||||
the top third of the buffer, thus keeping more of the file available for
|
||||
context printing or for multiline scanning. For large files, the pointer will
|
||||
be in the middle third most of the time, so the bottom third is available for
|
||||
"before" context printing.
|
||||
|
||||
Arguments:
|
||||
handle the fopened FILE stream for a normal file
|
||||
the gzFile pointer when reading is via libz
|
||||
the BZFILE pointer when reading is via libbz2
|
||||
frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
|
||||
filename the file name or NULL (for errors)
|
||||
printname the file name if it is to be printed for each match
|
||||
or NULL if the file name is not to be printed
|
||||
it cannot be NULL if filenames[_nomatch]_only is set
|
||||
|
||||
Returns: 0 if there was at least one match
|
||||
1 otherwise (no matches)
|
||||
2 if an overlong line is encountered
|
||||
3 if there is a read error on a .bz2 file
|
||||
*/
|
||||
|
||||
static int
|
||||
pcre2grep(void *handle, int frtype, const char *filename, const char *printname)
|
||||
{
|
||||
int rc = 1;
|
||||
int filepos = 0;
|
||||
unsigned long int linenumber = 1;
|
||||
unsigned long int lastmatchnumber = 0;
|
||||
unsigned long int count = 0;
|
||||
long int count_matched_lines = 0;
|
||||
char *lastmatchrestart = main_buffer;
|
||||
char *ptr = main_buffer;
|
||||
char *endptr;
|
||||
PCRE2_SIZE bufflength;
|
||||
BOOL binary = FALSE;
|
||||
BOOL endhyphenpending = FALSE;
|
||||
BOOL lines_printed = FALSE;
|
||||
BOOL input_line_buffered = line_buffered;
|
||||
FILE *in = NULL; /* Ensure initialized */
|
||||
long stream_start = -1; /* Only non-negative if relevant */
|
||||
|
||||
/* Do the first read into the start of the buffer and set up the pointer to end
|
||||
of what we have. In the case of libz, a non-zipped .gz file will be read as a
|
||||
plain file. However, if a .bz2 file isn't actually bzipped, the first read will
|
||||
fail. */
|
||||
|
||||
if (frtype != FR_LIBZ && frtype != FR_LIBBZ2)
|
||||
{
|
||||
in = (FILE *)handle;
|
||||
if (feof(in)) return 1;
|
||||
if (is_file_tty(in)) input_line_buffered = TRUE;
|
||||
else
|
||||
{
|
||||
if (count_limit >= 0 && filename == stdin_name)
|
||||
stream_start = ftell(in);
|
||||
}
|
||||
}
|
||||
else input_line_buffered = FALSE;
|
||||
|
||||
bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
|
||||
input_line_buffered);
|
||||
|
||||
#ifdef SUPPORT_LIBBZ2
|
||||
if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 3; /* Gotcha: bufflength is PCRE2_SIZE */
|
||||
#endif
|
||||
|
||||
endptr = main_buffer + bufflength;
|
||||
|
||||
/* Unless binary-files=text, see if we have a binary file. This uses the same
|
||||
rule as GNU grep, namely, a search for a binary zero byte near the start of the
|
||||
file. However, when the newline convention is binary zero, we can't do this. */
|
||||
|
||||
if (binary_files != BIN_TEXT)
|
||||
{
|
||||
if (endlinetype != PCRE2_NEWLINE_NUL)
|
||||
binary = memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength)
|
||||
!= NULL;
|
||||
if (binary && binary_files == BIN_NOMATCH) return 1;
|
||||
}
|
||||
|
||||
/* Loop while the current pointer is not at the end of the file. For large
|
||||
files, endptr will be at the end of the buffer when we are in the middle of the
|
||||
file, but ptr will never get there, because as soon as it gets over 2/3 of the
|
||||
way, the buffer is shifted left and re-filled. */
|
||||
|
||||
while (ptr < endptr)
|
||||
{
|
||||
int endlinelength;
|
||||
int mrc = 0;
|
||||
unsigned int options = 0;
|
||||
BOOL match;
|
||||
BOOL line_matched = FALSE;
|
||||
char *t = ptr;
|
||||
PCRE2_SIZE length, linelength;
|
||||
PCRE2_SIZE startoffset = 0;
|
||||
|
||||
/* If the -m option set a limit for the number of matched or non-matched
|
||||
lines, check it here. A limit of zero means that no matching is ever done.
|
||||
For stdin from a file, set the file position. */
|
||||
|
||||
if (count_limit >= 0 && count_matched_lines >= count_limit)
|
||||
{
|
||||
if (stream_start >= 0)
|
||||
(void)fseek(handle, stream_start + (long int)filepos, SEEK_SET);
|
||||
rc = (count_limit == 0)? 1 : 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/* At this point, ptr is at the start of a line. We need to find the length
|
||||
of the subject string to pass to pcre2_match(). In multiline mode, it is the
|
||||
length remainder of the data in the buffer. Otherwise, it is the length of
|
||||
the next line, excluding the terminating newline. After matching, we always
|
||||
advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
|
||||
option is used for compiling, so that any match is constrained to be in the
|
||||
first line. */
|
||||
|
||||
t = end_of_line(t, endptr, &endlinelength);
|
||||
linelength = t - ptr - endlinelength;
|
||||
length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength;
|
||||
|
||||
/* Check to see if the line we are looking at extends right to the very end
|
||||
of the buffer without a line terminator. This means the line is too long to
|
||||
handle at the current buffer size. Until the buffer reaches its maximum size,
|
||||
try doubling it and reading more data. */
|
||||
|
||||
if (endlinelength == 0 && t == main_buffer + bufsize)
|
||||
{
|
||||
if (bufthird < max_bufthird)
|
||||
{
|
||||
char *new_buffer;
|
||||
PCRE2_SIZE new_bufthird = 2*bufthird;
|
||||
|
||||
if (new_bufthird > max_bufthird) new_bufthird = max_bufthird;
|
||||
new_buffer = (char *)malloc(3*new_bufthird);
|
||||
|
||||
if (new_buffer == NULL)
|
||||
{
|
||||
/* LCOV_EXCL_START */
|
||||
fprintf(stderr,
|
||||
"pcre2grep: line %lu%s%s is too long for the internal buffer\n"
|
||||
"pcre2grep: not enough memory to increase the buffer size to %"
|
||||
SIZ_FORM "\n",
|
||||
linenumber,
|
||||
(filename == NULL)? "" : " of file ",
|
||||
(filename == NULL)? "" : filename,
|
||||
new_bufthird);
|
||||
return 2;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
/* Copy the data and adjust pointers to the new buffer location. */
|
||||
|
||||
memcpy(new_buffer, main_buffer, bufsize);
|
||||
bufthird = new_bufthird;
|
||||
bufsize = 3*bufthird;
|
||||
ptr = new_buffer + (ptr - main_buffer);
|
||||
lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer);
|
||||
free(main_buffer);
|
||||
main_buffer = new_buffer;
|
||||
|
||||
/* Read more data into the buffer and then try to find the line ending
|
||||
again. */
|
||||
|
||||
bufflength += fill_buffer(handle, frtype, main_buffer + bufflength,
|
||||
bufsize - bufflength, input_line_buffered);
|
||||
endptr = main_buffer + bufflength;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr,
|
||||
"pcre2grep: line %lu%s%s is too long for the internal buffer\n"
|
||||
"pcre2grep: the maximum buffer size is %" SIZ_FORM "\n"
|
||||
"pcre2grep: use the --max-buffer-size option to change it\n",
|
||||
linenumber,
|
||||
(filename == NULL)? "" : " of file ",
|
||||
(filename == NULL)? "" : filename,
|
||||
bufthird);
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
/* We come back here after a match when only_matching_count is non-zero, in
|
||||
order to find any further matches in the same line. This applies to
|
||||
--only-matching, --file-offsets, and --line-offsets. */
|
||||
|
||||
ONLY_MATCHING_RESTART:
|
||||
|
||||
/* Run through all the patterns until one matches or there is an error other
|
||||
than NOMATCH. This code is in a subroutine so that it can be re-used for
|
||||
finding subsequent matches when colouring matched lines. After finding one
|
||||
match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
|
||||
this line. */
|
||||
|
||||
match = match_patterns(ptr, length, options, startoffset, &mrc);
|
||||
options = PCRE2_NOTEMPTY;
|
||||
|
||||
/* If it's a match or a not-match (as required), do what's wanted. NOTE: Use
|
||||
only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its
|
||||
return code - to output data lines, so that binary zeroes are treated as just
|
||||
another data character. */
|
||||
|
||||
if (match != invert)
|
||||
{
|
||||
BOOL hyphenprinted = FALSE;
|
||||
|
||||
/* We've failed if we want a file that doesn't have any matches. */
|
||||
|
||||
if (filenames == FN_NOMATCH_ONLY) return 1;
|
||||
|
||||
/* Remember that this line matched (for counting matched lines) */
|
||||
|
||||
line_matched = TRUE;
|
||||
|
||||
/* If all we want is a yes/no answer, we can return immediately. */
|
||||
|
||||
if (quiet) return 0;
|
||||
|
||||
/* Just count if just counting is wanted. */
|
||||
|
||||
else if (count_only || show_total_count) count++;
|
||||
|
||||
/* When handling a binary file and binary-files==binary, the "binary"
|
||||
variable will be set true (it's false in all other cases). In this
|
||||
situation we just want to output the file name. No need to scan further. */
|
||||
|
||||
else if (binary)
|
||||
{
|
||||
fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Likewise, if all we want is a file name, there is no need to scan any
|
||||
more lines in the file. */
|
||||
|
||||
else if (filenames == FN_MATCH_ONLY)
|
||||
{
|
||||
fprintf(stdout, "%s", printname);
|
||||
if (printname_nl == NULL) fprintf(stdout, "%c", 0);
|
||||
else fprintf(stdout, "%s", printname_nl);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The --only-matching option prints just the substring that matched,
|
||||
and/or one or more captured portions of it, as long as these strings are
|
||||
not empty. The --file-offsets and --line-offsets options output offsets for
|
||||
the matching substring (all three set only_matching_count non-zero). None
|
||||
of these mutually exclusive options prints any context. Afterwards, adjust
|
||||
the start and then jump back to look for further matches in the same line.
|
||||
If we are in invert mode, however, nothing is printed and we do not restart
|
||||
- this could still be useful because the return code is set. */
|
||||
|
||||
else if (only_matching_count != 0)
|
||||
{
|
||||
if (!invert)
|
||||
{
|
||||
PCRE2_SIZE oldstartoffset;
|
||||
|
||||
if (printname != NULL) fprintf(stdout, "%s%c", printname,
|
||||
printname_colon);
|
||||
if (number) fprintf(stdout, "%lu:", linenumber);
|
||||
|
||||
/* Handle --line-offsets */
|
||||
|
||||
if (line_offsets)
|
||||
fprintf(stdout, "%d,%d" STDOUT_NL, (int)(ptr + offsets[0] - ptr),
|
||||
(int)(offsets[1] - offsets[0]));
|
||||
|
||||
/* Handle --file-offsets */
|
||||
|
||||
else if (file_offsets)
|
||||
fprintf(stdout, "%d,%d" STDOUT_NL,
|
||||
(int)(filepos + ptr + offsets[0] - ptr),
|
||||
(int)(offsets[1] - offsets[0]));
|
||||
|
||||
/* Handle --output (which has already been syntax checked) */
|
||||
|
||||
else if (output_text != NULL)
|
||||
{
|
||||
(void)display_output_text((PCRE2_SPTR)output_text, FALSE,
|
||||
(PCRE2_SPTR)ptr, offsets, mrc);
|
||||
fprintf(stdout, STDOUT_NL);
|
||||
}
|
||||
|
||||
/* Handle --only-matching, which may occur many times */
|
||||
|
||||
else
|
||||
{
|
||||
BOOL printed = FALSE;
|
||||
omstr *om;
|
||||
|
||||
for (om = only_matching; om != NULL; om = om->next)
|
||||
{
|
||||
int n = om->groupnum;
|
||||
if (n == 0 || n < mrc)
|
||||
{
|
||||
int plen = offsets[2*n + 1] - offsets[2*n];
|
||||
if (plen > 0)
|
||||
{
|
||||
if (printed && om_separator != NULL)
|
||||
fprintf(stdout, "%s", om_separator);
|
||||
print_match(ptr + offsets[n*2], plen);
|
||||
printed = TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (printed || printname != NULL || number)
|
||||
fprintf(stdout, STDOUT_NL);
|
||||
}
|
||||
|
||||
/* Prepare to repeat to find the next match in the line. */
|
||||
|
||||
//match = FALSE;
|
||||
if (line_buffered) fflush(stdout);
|
||||
rc = 0; /* Had some success */
|
||||
|
||||
/* If the pattern contained a lookbehind that included \K, it is
|
||||
possible that the end of the match might be at or before the actual
|
||||
starting offset we have just used. In this case, start one character
|
||||
further on. */
|
||||
|
||||
startoffset = offsets[1]; /* Restart after the match */
|
||||
oldstartoffset = pcre2_get_startchar(match_data);
|
||||
if (startoffset <= oldstartoffset)
|
||||
{
|
||||
if (startoffset >= length) goto END_ONE_MATCH; /* Were at end */
|
||||
startoffset = oldstartoffset + 1;
|
||||
if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
|
||||
}
|
||||
|
||||
/* If the current match ended past the end of the line (only possible
|
||||
in multiline mode), we must move on to the line in which it did end
|
||||
before searching for more matches. */
|
||||
|
||||
while (startoffset > linelength)
|
||||
{
|
||||
ptr += linelength + endlinelength;
|
||||
filepos += (int)(linelength + endlinelength);
|
||||
linenumber++;
|
||||
startoffset -= (int)(linelength + endlinelength);
|
||||
t = end_of_line(ptr, endptr, &endlinelength);
|
||||
linelength = t - ptr - endlinelength;
|
||||
length = (PCRE2_SIZE)(endptr - ptr);
|
||||
}
|
||||
|
||||
goto ONLY_MATCHING_RESTART;
|
||||
}
|
||||
}
|
||||
|
||||
/* This is the default case when none of the above options is set. We print
|
||||
the matching lines(s), possibly preceded and/or followed by other lines of
|
||||
context. */
|
||||
|
||||
else
|
||||
{
|
||||
lines_printed = TRUE;
|
||||
|
||||
/* See if there is a requirement to print some "after" lines from a
|
||||
previous match. We never print any overlaps. */
|
||||
|
||||
if (after_context > 0 && lastmatchnumber > 0)
|
||||
{
|
||||
int ellength;
|
||||
int linecount = 0;
|
||||
char *p = lastmatchrestart;
|
||||
|
||||
while (p < ptr && linecount < after_context)
|
||||
{
|
||||
p = end_of_line(p, ptr, &ellength);
|
||||
linecount++;
|
||||
}
|
||||
|
||||
/* It is important to advance lastmatchrestart during this printing so
|
||||
that it interacts correctly with any "before" printing below. Print
|
||||
each line's data using fwrite() in case there are binary zeroes. */
|
||||
|
||||
while (lastmatchrestart < p)
|
||||
{
|
||||
char *pp = lastmatchrestart;
|
||||
if (printname != NULL) fprintf(stdout, "%s%c", printname,
|
||||
printname_hyphen);
|
||||
if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
|
||||
pp = end_of_line(pp, endptr, &ellength);
|
||||
FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
|
||||
lastmatchrestart = pp;
|
||||
}
|
||||
|
||||
if (lastmatchrestart != ptr) hyphenpending = TRUE;
|
||||
}
|
||||
|
||||
/* If hyphenpending is TRUE when there is no "after" context, it means we
|
||||
are at the start of a new file, having output something from the previous
|
||||
file. Output a separator if enabled.*/
|
||||
|
||||
else if (hyphenpending)
|
||||
{
|
||||
if (group_separator != NULL)
|
||||
fprintf(stdout, "%s%s", group_separator, STDOUT_NL);
|
||||
hyphenpending = FALSE;
|
||||
hyphenprinted = TRUE;
|
||||
}
|
||||
|
||||
/* See if there is a requirement to print some "before" lines for this
|
||||
match. Again, don't print overlaps. */
|
||||
|
||||
if (before_context > 0)
|
||||
{
|
||||
int linecount = 0;
|
||||
char *p = ptr;
|
||||
|
||||
while (p > main_buffer &&
|
||||
(lastmatchnumber == 0 || p > lastmatchrestart) &&
|
||||
linecount < before_context)
|
||||
{
|
||||
linecount++;
|
||||
p = previous_line(p, main_buffer);
|
||||
}
|
||||
|
||||
if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted &&
|
||||
group_separator != NULL)
|
||||
fprintf(stdout, "%s%s", group_separator, STDOUT_NL);
|
||||
hyphenpending = FALSE;
|
||||
|
||||
while (p < ptr)
|
||||
{
|
||||
int ellength;
|
||||
char *pp = p;
|
||||
if (printname != NULL) fprintf(stdout, "%s%c", printname,
|
||||
printname_hyphen);
|
||||
if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
|
||||
pp = end_of_line(pp, endptr, &ellength);
|
||||
FWRITE_IGNORE(p, 1, pp - p, stdout);
|
||||
p = pp;
|
||||
}
|
||||
}
|
||||
|
||||
/* If hyphenpending is TRUE here, it was set after outputting some
|
||||
"after" lines (and there are no "before" lines). */
|
||||
|
||||
else if (hyphenpending)
|
||||
{
|
||||
if (group_separator != NULL)
|
||||
fprintf(stdout, "%s%s", group_separator, STDOUT_NL);
|
||||
hyphenpending = FALSE;
|
||||
}
|
||||
|
||||
/* Now print the matching line(s); ensure we set hyphenpending at the end
|
||||
of the file if any context lines are being output. */
|
||||
|
||||
if (after_context > 0 || before_context > 0)
|
||||
endhyphenpending = TRUE;
|
||||
|
||||
|
||||
if (printname != NULL) fprintf(stdout, "%s%c", printname,
|
||||
printname_colon);
|
||||
if (number) fprintf(stdout, "%lu:", linenumber);
|
||||
|
||||
/* In multiline mode, or if colouring, we have to split the line(s) up
|
||||
and search for further matches, but not of course if the line is a
|
||||
non-match. In multiline mode this is necessary in case there is another
|
||||
match that spans the end of the current line. When colouring we want to
|
||||
colour all matches. */
|
||||
|
||||
if ((multiline || do_colour) && !invert)
|
||||
{
|
||||
int plength;
|
||||
PCRE2_SIZE endprevious;
|
||||
|
||||
/* The use of \K may make the end offset earlier than the start. In
|
||||
this situation, swap them round. */
|
||||
|
||||
if (offsets[0] > offsets[1])
|
||||
{
|
||||
PCRE2_SIZE temp = offsets[0];
|
||||
offsets[0] = offsets[1];
|
||||
offsets[1] = temp;
|
||||
}
|
||||
|
||||
FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
|
||||
print_match(ptr + offsets[0], offsets[1] - offsets[0]);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data);
|
||||
|
||||
endprevious = offsets[1];
|
||||
startoffset = endprevious; /* Advance after previous match. */
|
||||
|
||||
/* If the pattern contained a lookbehind that included \K, it is
|
||||
possible that the end of the match might be at or before the actual
|
||||
starting offset we have just used. In this case, start one character
|
||||
further on. */
|
||||
|
||||
if (startoffset <= oldstartoffset)
|
||||
{
|
||||
startoffset = oldstartoffset + 1;
|
||||
if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
|
||||
}
|
||||
|
||||
/* If the current match ended past the end of the line (only possible
|
||||
in multiline mode), we must move on to the line in which it did end
|
||||
before searching for more matches. Because the PCRE2_FIRSTLINE option
|
||||
is set, the start of the match will always be before the first
|
||||
newline sequence. */
|
||||
|
||||
while (startoffset > linelength + endlinelength)
|
||||
{
|
||||
ptr += linelength + endlinelength;
|
||||
filepos += (int)(linelength + endlinelength);
|
||||
linenumber++;
|
||||
startoffset -= (int)(linelength + endlinelength);
|
||||
endprevious -= (int)(linelength + endlinelength);
|
||||
t = end_of_line(ptr, endptr, &endlinelength);
|
||||
linelength = t - ptr - endlinelength;
|
||||
length = (PCRE2_SIZE)(endptr - ptr);
|
||||
}
|
||||
|
||||
/* If startoffset is at the exact end of the line it means this
|
||||
complete line was the final part of the match, so there is nothing
|
||||
more to do. */
|
||||
|
||||
if (startoffset == linelength + endlinelength) break;
|
||||
|
||||
/* Otherwise, run a match from within the final line, and if found,
|
||||
loop for any that may follow. */
|
||||
|
||||
if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
|
||||
|
||||
/* The use of \K may make the end offset earlier than the start. In
|
||||
this situation, swap them round. */
|
||||
|
||||
if (offsets[0] > offsets[1])
|
||||
{
|
||||
PCRE2_SIZE temp = offsets[0];
|
||||
offsets[0] = offsets[1];
|
||||
offsets[1] = temp;
|
||||
}
|
||||
|
||||
FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout);
|
||||
print_match(ptr + offsets[0], offsets[1] - offsets[0]);
|
||||
}
|
||||
|
||||
/* In multiline mode, we may have already printed the complete line
|
||||
and its line-ending characters (if they matched the pattern), so there
|
||||
may be no more to print. */
|
||||
|
||||
plength = (int)((linelength + endlinelength) - endprevious);
|
||||
if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout);
|
||||
}
|
||||
|
||||
/* Not colouring or multiline; no need to search for further matches. */
|
||||
|
||||
else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout);
|
||||
}
|
||||
|
||||
/* End of doing what has to be done for a match. If --line-buffered was
|
||||
given, flush the output. */
|
||||
|
||||
if (line_buffered) fflush(stdout);
|
||||
rc = 0; /* Had some success */
|
||||
|
||||
/* Remember where the last match happened for after_context. We remember
|
||||
where we are about to restart, and that line's number. */
|
||||
|
||||
lastmatchrestart = ptr + linelength + endlinelength;
|
||||
lastmatchnumber = linenumber + 1;
|
||||
|
||||
/* If a line was printed and we are now at the end of the file and the last
|
||||
line had no newline, output one. */
|
||||
|
||||
if (lines_printed && lastmatchrestart >= endptr && endlinelength == 0)
|
||||
write_final_newline();
|
||||
}
|
||||
|
||||
/* For a match in multiline inverted mode (which of course did not cause
|
||||
anything to be printed), we have to move on to the end of the match before
|
||||
proceeding. */
|
||||
|
||||
if (multiline && invert && match)
|
||||
{
|
||||
int ellength;
|
||||
char *endmatch = ptr + offsets[1];
|
||||
t = ptr;
|
||||
while (t < endmatch)
|
||||
{
|
||||
t = end_of_line(t, endptr, &ellength);
|
||||
if (t <= endmatch) linenumber++; else break;
|
||||
}
|
||||
endmatch = end_of_line(endmatch, endptr, &ellength);
|
||||
linelength = endmatch - ptr - ellength;
|
||||
}
|
||||
|
||||
/* Advance to after the newline and increment the line number. The file
|
||||
offset to the current line is maintained in filepos. */
|
||||
|
||||
END_ONE_MATCH:
|
||||
ptr += linelength + endlinelength;
|
||||
filepos += (int)(linelength + endlinelength);
|
||||
linenumber++;
|
||||
|
||||
/* If there was at least one match (or a non-match, as required) in the line,
|
||||
increment the count for the -m option. */
|
||||
|
||||
if (line_matched) count_matched_lines++;
|
||||
|
||||
/* If input is line buffered, and the buffer is not yet full, read another
|
||||
line and add it into the buffer. */
|
||||
|
||||
if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize)
|
||||
{
|
||||
PCRE2_SIZE add = read_one_line(ptr, bufsize - (ptr - main_buffer), in);
|
||||
bufflength += add;
|
||||
endptr += add;
|
||||
}
|
||||
|
||||
/* If we haven't yet reached the end of the file (the buffer is full), and
|
||||
the current point is in the top 1/3 of the buffer, slide the buffer down by
|
||||
1/3 and refill it. Before we do this, if some unprinted "after" lines are
|
||||
about to be lost, print them. */
|
||||
|
||||
if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird)
|
||||
{
|
||||
if (after_context > 0 &&
|
||||
lastmatchnumber > 0 &&
|
||||
lastmatchrestart < main_buffer + bufthird)
|
||||
{
|
||||
do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
|
||||
lastmatchnumber = 0; /* Indicates no after lines pending */
|
||||
}
|
||||
|
||||
/* Now do the shuffle */
|
||||
|
||||
(void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
|
||||
ptr -= bufthird;
|
||||
|
||||
bufflength = 2*bufthird + fill_buffer(handle, frtype,
|
||||
main_buffer + 2*bufthird, bufthird, input_line_buffered);
|
||||
endptr = main_buffer + bufflength;
|
||||
|
||||
/* Adjust any last match point */
|
||||
|
||||
if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
|
||||
}
|
||||
} /* Loop through the whole file */
|
||||
|
||||
/* End of file; print final "after" lines if wanted; do_after_lines sets
|
||||
hyphenpending if it prints something. */
|
||||
|
||||
if (only_matching_count == 0 && !(count_only|show_total_count))
|
||||
{
|
||||
do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
|
||||
hyphenpending |= endhyphenpending;
|
||||
}
|
||||
|
||||
/* Print the file name if we are looking for those without matches and there
|
||||
were none. If we found a match, we won't have got this far. */
|
||||
|
||||
if (filenames == FN_NOMATCH_ONLY)
|
||||
{
|
||||
fprintf(stdout, "%s", printname);
|
||||
if (printname_nl == NULL) fprintf(stdout, "%c", 0);
|
||||
else fprintf(stdout, "%s", printname_nl);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Print the match count if wanted */
|
||||
|
||||
if (count_only && !quiet)
|
||||
{
|
||||
if (count > 0 || !omit_zero_count)
|
||||
{
|
||||
if (printname != NULL && filenames != FN_NONE)
|
||||
fprintf(stdout, "%s%c", printname, printname_colon);
|
||||
fprintf(stdout, "%lu" STDOUT_NL, count);
|
||||
counts_printed++;
|
||||
}
|
||||
}
|
||||
|
||||
total_count += count; /* Can be set without count_only */
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Grep a file or recurse into a directory *
|
||||
*************************************************/
|
||||
|
||||
/* Given a path name, if it's a directory, scan all the files if we are
|
||||
recursing; if it's a file, grep it.
|
||||
|
||||
Arguments:
|
||||
pathname the path to investigate
|
||||
dir_recurse TRUE if recursing is wanted (-r or -drecurse)
|
||||
only_one_at_top TRUE if the path is the only one at toplevel
|
||||
|
||||
Returns: -1 the file/directory was skipped
|
||||
0 if there was at least one match
|
||||
1 if there were no matches
|
||||
2 there was some kind of error
|
||||
|
||||
However, file opening failures are suppressed if "silent" is set.
|
||||
*/
|
||||
|
||||
static int
|
||||
grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
|
||||
{
|
||||
int rc = 1;
|
||||
int frtype;
|
||||
void *handle;
|
||||
char *lastcomp;
|
||||
FILE *in = NULL; /* Ensure initialized */
|
||||
|
||||
#ifdef SUPPORT_LIBZ
|
||||
gzFile ingz = NULL;
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_LIBBZ2
|
||||
BZFILE *inbz2 = NULL;
|
||||
#endif
|
||||
|
||||
#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
|
||||
int pathlen;
|
||||
#endif
|
||||
|
||||
#if defined NATIVE_ZOS
|
||||
int zos_type;
|
||||
FILE *zos_test_file;
|
||||
#endif
|
||||
|
||||
/* If the file name is "-" we scan stdin */
|
||||
|
||||
if (strcmp(pathname, "-") == 0)
|
||||
{
|
||||
if (count_limit >= 0) setbuf(stdin, NULL);
|
||||
return pcre2grep(stdin, FR_PLAIN, stdin_name,
|
||||
(filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
|
||||
stdin_name : NULL);
|
||||
}
|
||||
|
||||
/* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
|
||||
directories, whereas --include and --exclude apply to everything else. The test
|
||||
is against the final component of the path. */
|
||||
|
||||
lastcomp = strrchr(pathname, FILESEP);
|
||||
lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
|
||||
|
||||
/* If the file is a directory, skip if not recursing or if explicitly excluded.
|
||||
Otherwise, scan the directory and recurse for each path within it. The scanning
|
||||
code is localized so it can be made system-specific. */
|
||||
|
||||
|
||||
/* For z/OS, determine the file type. */
|
||||
|
||||
#if defined NATIVE_ZOS
|
||||
zos_test_file = fopen(pathname,"rb");
|
||||
|
||||
if (zos_test_file == NULL)
|
||||
{
|
||||
if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
|
||||
pathname, strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
zos_type = identifyzosfiletype (zos_test_file);
|
||||
fclose (zos_test_file);
|
||||
|
||||
/* Handle a PDS in separate code */
|
||||
|
||||
if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
|
||||
{
|
||||
return travelonpdsdir (pathname, only_one_at_top);
|
||||
}
|
||||
|
||||
/* Deal with regular files in the normal way below. These types are:
|
||||
zos_type == __ZOS_PDS_MEMBER
|
||||
zos_type == __ZOS_PS
|
||||
zos_type == __ZOS_VSAM_KSDS
|
||||
zos_type == __ZOS_VSAM_ESDS
|
||||
zos_type == __ZOS_VSAM_RRDS
|
||||
*/
|
||||
|
||||
/* Handle a z/OS directory using common code. */
|
||||
|
||||
else if (zos_type == __ZOS_HFS)
|
||||
{
|
||||
#endif /* NATIVE_ZOS */
|
||||
|
||||
|
||||
/* Handle directories: common code for all OS */
|
||||
|
||||
if (isdirectory(pathname))
|
||||
{
|
||||
if (dee_action == dee_SKIP ||
|
||||
!test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
|
||||
return -1;
|
||||
|
||||
if (dee_action == dee_RECURSE)
|
||||
{
|
||||
char childpath[FNBUFSIZ];
|
||||
char *nextfile;
|
||||
directory_type *dir = opendirectory(pathname);
|
||||
|
||||
if (dir == NULL)
|
||||
{
|
||||
/* LCOV_EXCL_START - this is a "never" event */
|
||||
if (!silent)
|
||||
fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
|
||||
strerror(errno));
|
||||
return 2;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
while ((nextfile = readdirectory(dir)) != NULL)
|
||||
{
|
||||
int frc;
|
||||
int fnlength = strlen(pathname) + strlen(nextfile) + 2;
|
||||
if (fnlength > FNBUFSIZ)
|
||||
{
|
||||
/* LCOV_EXCL_START - this is a "never" event */
|
||||
fprintf(stderr, "pcre2grep: recursive filename is too long\n");
|
||||
rc = 2;
|
||||
break;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
sprintf(childpath, "%s%c%s", pathname, FILESEP, nextfile);
|
||||
|
||||
/* If the realpath() function is available, we can try to prevent endless
|
||||
recursion caused by a symlink pointing to a parent directory (GitHub
|
||||
issue #2 (old Bugzilla #2794). Original patch from Thomas Tempelmann.
|
||||
Modified to avoid using strlcat() because that isn't a standard C
|
||||
function, and also modified not to copy back the fully resolved path,
|
||||
because that affects the output from pcre2grep. */
|
||||
|
||||
#ifdef HAVE_REALPATH
|
||||
{
|
||||
char resolvedpath[PATH_MAX];
|
||||
BOOL isSame;
|
||||
size_t rlen;
|
||||
if (realpath(childpath, resolvedpath) == NULL)
|
||||
/* LCOV_EXCL_START - this is a "never" event */
|
||||
continue; /* This path is invalid - we can skip processing this */
|
||||
/* LCOV_EXCL_STOP */
|
||||
isSame = strcmp(pathname, resolvedpath) == 0;
|
||||
if (isSame) continue; /* We have a recursion */
|
||||
rlen = strlen(resolvedpath);
|
||||
if (rlen++ < sizeof(resolvedpath) - 3)
|
||||
{
|
||||
BOOL contained;
|
||||
strcat(resolvedpath, "/");
|
||||
contained = strncmp(pathname, resolvedpath, rlen) == 0;
|
||||
if (contained) continue; /* We have a recursion */
|
||||
}
|
||||
}
|
||||
#endif /* HAVE_REALPATH */
|
||||
|
||||
frc = grep_or_recurse(childpath, dir_recurse, FALSE);
|
||||
if (frc > 1) rc = frc;
|
||||
else if (frc == 0 && rc == 1) rc = 0;
|
||||
}
|
||||
|
||||
closedirectory(dir);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef WIN32
|
||||
if (iswild(pathname))
|
||||
{
|
||||
char buffer[1024];
|
||||
char *nextfile;
|
||||
char *name;
|
||||
directory_type *dir = opendirectory(pathname);
|
||||
|
||||
if (dir == NULL)
|
||||
return 0;
|
||||
|
||||
for (nextfile = name = pathname; *nextfile != 0; nextfile++)
|
||||
if (*nextfile == '/' || *nextfile == '\\')
|
||||
name = nextfile + 1;
|
||||
*name = 0;
|
||||
|
||||
while ((nextfile = readdirectory(dir)) != NULL)
|
||||
{
|
||||
int frc;
|
||||
sprintf(buffer, "%.512s%.128s", pathname, nextfile);
|
||||
frc = grep_or_recurse(buffer, dir_recurse, FALSE);
|
||||
if (frc > 1) rc = frc;
|
||||
else if (frc == 0 && rc == 1) rc = 0;
|
||||
}
|
||||
|
||||
closedirectory(dir);
|
||||
return rc;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined NATIVE_ZOS
|
||||
}
|
||||
#endif
|
||||
|
||||
/* If the file is not a directory, check for a regular file, and if it is not,
|
||||
skip it if that's been requested. Otherwise, check for an explicit inclusion or
|
||||
exclusion. */
|
||||
|
||||
else if (
|
||||
#if defined NATIVE_ZOS
|
||||
(zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
|
||||
#else /* all other OS */
|
||||
(!isregfile(pathname) && DEE_action == DEE_SKIP) ||
|
||||
#endif
|
||||
!test_incexc(lastcomp, include_patterns, exclude_patterns))
|
||||
return -1; /* File skipped */
|
||||
|
||||
/* Control reaches here if we have a regular file, or if we have a directory
|
||||
and recursion or skipping was not requested, or if we have anything else and
|
||||
skipping was not requested. The scan proceeds. If this is the first and only
|
||||
argument at top level, we don't show the file name, unless we are only showing
|
||||
the file name, or the filename was forced (-H). */
|
||||
|
||||
#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
|
||||
pathlen = (int)(strlen(pathname));
|
||||
#endif
|
||||
|
||||
/* Open using zlib if it is supported and the file name ends with .gz. */
|
||||
|
||||
#ifdef SUPPORT_LIBZ
|
||||
if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
|
||||
{
|
||||
ingz = gzopen(pathname, "rb");
|
||||
if (ingz == NULL)
|
||||
{
|
||||
/* LCOV_EXCL_START */
|
||||
if (!silent)
|
||||
fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
|
||||
strerror(errno));
|
||||
return 2;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
handle = (void *)ingz;
|
||||
frtype = FR_LIBZ;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
||||
/* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
|
||||
|
||||
#ifdef SUPPORT_LIBBZ2
|
||||
if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
|
||||
{
|
||||
inbz2 = BZ2_bzopen(pathname, "rb");
|
||||
handle = (void *)inbz2;
|
||||
frtype = FR_LIBBZ2;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
||||
/* Otherwise use plain fopen(). The label is so that we can come back here if
|
||||
an attempt to read a .bz2 file indicates that it really is a plain file. */
|
||||
|
||||
#ifdef SUPPORT_LIBBZ2
|
||||
PLAIN_FILE:
|
||||
#endif
|
||||
{
|
||||
in = fopen(pathname, "rb");
|
||||
handle = (void *)in;
|
||||
frtype = FR_PLAIN;
|
||||
}
|
||||
|
||||
/* All the opening methods return errno when they fail. */
|
||||
|
||||
if (handle == NULL)
|
||||
{
|
||||
if (!silent)
|
||||
fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
|
||||
strerror(errno));
|
||||
return 2;
|
||||
}
|
||||
|
||||
/* Now grep the file */
|
||||
|
||||
rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
|
||||
(filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
|
||||
|
||||
/* Close in an appropriate manner. */
|
||||
|
||||
#ifdef SUPPORT_LIBZ
|
||||
if (frtype == FR_LIBZ)
|
||||
gzclose(ingz);
|
||||
else
|
||||
#endif
|
||||
|
||||
/* If it is a .bz2 file and the result is 3, it means that the first attempt to
|
||||
read failed. If the error indicates that the file isn't in fact bzipped, try
|
||||
again as a normal file. */
|
||||
|
||||
#ifdef SUPPORT_LIBBZ2
|
||||
if (frtype == FR_LIBBZ2)
|
||||
{
|
||||
if (rc == 3)
|
||||
{
|
||||
int errnum;
|
||||
const char *err = BZ2_bzerror(inbz2, &errnum);
|
||||
if (errnum == BZ_DATA_ERROR_MAGIC)
|
||||
{
|
||||
BZ2_bzclose(inbz2);
|
||||
goto PLAIN_FILE;
|
||||
}
|
||||
/* LCOV_EXCL_START */
|
||||
else if (!silent)
|
||||
fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
|
||||
pathname, err);
|
||||
rc = 2; /* The normal "something went wrong" code */
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
BZ2_bzclose(inbz2);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
||||
/* Normal file close */
|
||||
|
||||
fclose(in);
|
||||
|
||||
/* Pass back the yield from pcre2grep(). */
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Handle a no-data option *
|
||||
*************************************************/
|
||||
|
||||
/* This is called when a known option has been identified. */
|
||||
|
||||
static int
|
||||
handle_option(int letter, int options)
|
||||
{
|
||||
switch(letter)
|
||||
{
|
||||
case N_FOFFSETS: file_offsets = TRUE; break;
|
||||
case N_HELP: help(); pcre2grep_exit(0); break; /* Stops compiler warning */
|
||||
case N_LBUFFER: line_buffered = TRUE; break;
|
||||
case N_LOFFSETS: line_offsets = number = TRUE; break;
|
||||
case N_NOJIT: use_jit = FALSE; break;
|
||||
case N_ALLABSK: extra_options |= PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK; break;
|
||||
case N_NO_GROUP_SEPARATOR: group_separator = NULL; break;
|
||||
case N_POSIX_PATFILE: posix_pattern_file = TRUE; break;
|
||||
case 'a': binary_files = BIN_TEXT; break;
|
||||
case 'c': count_only = TRUE; break;
|
||||
case N_POSIX_DIGIT: posix_digit = TRUE; break;
|
||||
case 'E': case_restrict = TRUE; break;
|
||||
case 'F': options |= PCRE2_LITERAL; break;
|
||||
case 'H': filenames = FN_FORCE; break;
|
||||
case 'I': binary_files = BIN_NOMATCH; break;
|
||||
case 'h': filenames = FN_NONE; break;
|
||||
case 'i': options |= PCRE2_CASELESS; break;
|
||||
case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
|
||||
case 'L': filenames = FN_NOMATCH_ONLY; break;
|
||||
case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
|
||||
case 'n': number = TRUE; break;
|
||||
|
||||
case 'o':
|
||||
only_matching_last = add_number(0, only_matching_last);
|
||||
if (only_matching == NULL) only_matching = only_matching_last;
|
||||
break;
|
||||
|
||||
case 'P': no_ucp = TRUE; break;
|
||||
case 'q': quiet = TRUE; break;
|
||||
case 'r': dee_action = dee_RECURSE; break;
|
||||
case 's': silent = TRUE; break;
|
||||
case 't': show_total_count = TRUE; break;
|
||||
case 'u': options |= PCRE2_UTF | PCRE2_UCP; utf = TRUE; break;
|
||||
case 'U': options |= PCRE2_UTF | PCRE2_MATCH_INVALID_UTF | PCRE2_UCP;
|
||||
utf = TRUE; break;
|
||||
case 'v': invert = TRUE; break;
|
||||
|
||||
case 'V':
|
||||
{
|
||||
unsigned char buffer[128];
|
||||
(void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
|
||||
fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
|
||||
}
|
||||
pcre2grep_exit(0);
|
||||
break; /* LCOV_EXCL_LINE - statement kept to avoid compiler warning */
|
||||
|
||||
case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
|
||||
case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
|
||||
case 'Z': printname_colon = printname_hyphen = 0; printname_nl = NULL; break;
|
||||
|
||||
/* LCOV_EXCL_START - this is a "never event" */
|
||||
default:
|
||||
fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
|
||||
pcre2grep_exit(usage(2));
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Construct printed ordinal *
|
||||
*************************************************/
|
||||
|
||||
/* This turns a number into "1st", "3rd", etc. */
|
||||
|
||||
static char *
|
||||
ordin(int n)
|
||||
{
|
||||
static char buffer[14];
|
||||
char *p = buffer;
|
||||
sprintf(p, "%d", n);
|
||||
while (*p != 0) p++;
|
||||
n %= 100;
|
||||
if (n >= 11 && n <= 13) n = 0;
|
||||
switch (n%10)
|
||||
{
|
||||
case 1: strcpy(p, "st"); break;
|
||||
case 2: strcpy(p, "nd"); break;
|
||||
case 3: strcpy(p, "rd"); break;
|
||||
default: strcpy(p, "th"); break;
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compile a single pattern *
|
||||
*************************************************/
|
||||
|
||||
/* Do nothing if the pattern has already been compiled. This is the case for
|
||||
include/exclude patterns read from a file.
|
||||
|
||||
When the -F option has been used, each "pattern" may be a list of strings,
|
||||
separated by line breaks. They will be matched literally. We split such a
|
||||
string and compile the first substring, inserting an additional block into the
|
||||
pattern chain.
|
||||
|
||||
Arguments:
|
||||
p points to the pattern block
|
||||
options the PCRE options
|
||||
fromfile TRUE if the pattern was read from a file
|
||||
fromtext file name or identifying text (e.g. "include")
|
||||
count 0 if this is the only command line pattern, or
|
||||
number of the command line pattern, or
|
||||
linenumber for a pattern from a file
|
||||
|
||||
Returns: TRUE on success, FALSE after an error
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
compile_pattern(patstr *p, int options, int fromfile, const char *fromtext,
|
||||
int count)
|
||||
{
|
||||
char *ps;
|
||||
int errcode;
|
||||
PCRE2_SIZE patlen, erroffset;
|
||||
PCRE2_UCHAR errmessbuffer[ERRBUFSIZ];
|
||||
|
||||
if (p->compiled != NULL) return TRUE;
|
||||
ps = p->string;
|
||||
patlen = p->length;
|
||||
|
||||
if ((options & PCRE2_LITERAL) != 0)
|
||||
{
|
||||
int ellength;
|
||||
char *eop = ps + patlen;
|
||||
char *pe = end_of_line(ps, eop, &ellength);
|
||||
|
||||
if (ellength != 0)
|
||||
{
|
||||
patlen = pe - ps - ellength;
|
||||
if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
p->compiled = pcre2_compile((PCRE2_SPTR)ps, patlen, options, &errcode,
|
||||
&erroffset, compile_context);
|
||||
|
||||
/* Handle successful compile. Try JIT-compiling if supported and enabled. We
|
||||
ignore any JIT compiler errors, relying falling back to interpreting if
|
||||
anything goes wrong with JIT. */
|
||||
|
||||
if (p->compiled != NULL)
|
||||
{
|
||||
#ifdef SUPPORT_PCRE2GREP_JIT
|
||||
if (use_jit) (void)pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
|
||||
#endif
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* Handle compile errors */
|
||||
|
||||
if (erroffset > patlen) erroffset = patlen;
|
||||
pcre2_get_error_message(errcode, errmessbuffer, sizeof(errmessbuffer));
|
||||
|
||||
if (fromfile)
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
|
||||
"at offset %d: %s\n", count, fromtext, (int)erroffset, errmessbuffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (count == 0)
|
||||
fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
|
||||
fromtext, (int)erroffset, errmessbuffer);
|
||||
else
|
||||
fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
|
||||
ordin(count), fromtext, (int)erroffset, errmessbuffer);
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Read and compile a file of patterns *
|
||||
*************************************************/
|
||||
|
||||
/* This is used for --filelist, --include-from, and --exclude-from.
|
||||
|
||||
Arguments:
|
||||
name the name of the file; "-" is stdin
|
||||
patptr pointer to the pattern chain anchor
|
||||
patlastptr pointer to the last pattern pointer
|
||||
|
||||
Returns: TRUE if all went well
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
read_pattern_file(char *name, patstr **patptr, patstr **patlastptr)
|
||||
{
|
||||
int linenumber = 0;
|
||||
PCRE2_SIZE patlen;
|
||||
FILE *f;
|
||||
const char *filename;
|
||||
char buffer[MAXPATLEN+20];
|
||||
|
||||
if (strcmp(name, "-") == 0)
|
||||
{
|
||||
f = stdin;
|
||||
filename = stdin_name;
|
||||
}
|
||||
else
|
||||
{
|
||||
f = fopen(name, "r");
|
||||
if (f == NULL)
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
|
||||
return FALSE;
|
||||
}
|
||||
filename = name;
|
||||
}
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
patlen = sizeof(buffer);
|
||||
if (!read_pattern(buffer, &patlen, f))
|
||||
break;
|
||||
|
||||
if (!posix_pattern_file)
|
||||
{
|
||||
while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
|
||||
}
|
||||
|
||||
linenumber++;
|
||||
if (!posix_pattern_file && patlen == 0) continue; /* Skip blank lines */
|
||||
|
||||
/* Note: this call to add_pattern() puts a pointer to the local variable
|
||||
"buffer" into the pattern chain. However, that pointer is used only when
|
||||
compiling the pattern, which happens immediately below, so we flatten it
|
||||
afterwards, as a precaution against any later code trying to use it. */
|
||||
|
||||
*patlastptr = add_pattern(buffer, patlen, *patlastptr);
|
||||
if (*patlastptr == NULL)
|
||||
{
|
||||
/* LCOV_EXCL_START - won't happen in testing */
|
||||
if (f != stdin) fclose(f);
|
||||
return FALSE;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
if (*patptr == NULL) *patptr = *patlastptr;
|
||||
|
||||
/* This loop is needed because compiling a "pattern" when -F is set may add
|
||||
on additional literal patterns if the original contains a newline. In the
|
||||
common case, it never will, because read_one_line() stops at a newline.
|
||||
However, the -N option can be used to give pcre2grep a different newline
|
||||
setting. */
|
||||
|
||||
for(;;)
|
||||
{
|
||||
if (!compile_pattern(*patlastptr, pcre2_options, TRUE, filename,
|
||||
linenumber))
|
||||
{
|
||||
if (f != stdin) fclose(f);
|
||||
return FALSE;
|
||||
}
|
||||
(*patlastptr)->string = NULL; /* Insurance */
|
||||
if ((*patlastptr)->next == NULL) break;
|
||||
*patlastptr = (*patlastptr)->next;
|
||||
}
|
||||
}
|
||||
|
||||
if (f != stdin) fclose(f);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Main program *
|
||||
*************************************************/
|
||||
|
||||
/* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
int i, j;
|
||||
int rc = 1;
|
||||
BOOL only_one_at_top;
|
||||
patstr *cp;
|
||||
fnstr *fn;
|
||||
omstr *om;
|
||||
const char *locale_from = "--locale";
|
||||
|
||||
#ifdef SUPPORT_PCRE2GREP_JIT
|
||||
pcre2_jit_stack *jit_stack = NULL;
|
||||
#endif
|
||||
|
||||
/* In Windows, stdout is set up as a text stream, which means that \n is
|
||||
converted to \r\n. This causes output lines that are copied from the input to
|
||||
change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
|
||||
that stdout is a binary stream. Note that this means all other output to stdout
|
||||
must use STDOUT_NL to terminate lines. */
|
||||
|
||||
#ifdef WIN32
|
||||
_setmode(_fileno(stdout), _O_BINARY);
|
||||
#endif
|
||||
|
||||
/* Process the options */
|
||||
|
||||
for (i = 1; i < argc; i++)
|
||||
{
|
||||
option_item *op = NULL;
|
||||
char *option_data = (char *)""; /* default to keep compiler happy */
|
||||
BOOL longop;
|
||||
BOOL longopwasequals = FALSE;
|
||||
|
||||
if (argv[i][0] != '-') break;
|
||||
|
||||
/* If we hit an argument that is just "-", it may be a reference to STDIN,
|
||||
but only if we have previously had -e or -f to define the patterns. */
|
||||
|
||||
if (argv[i][1] == 0)
|
||||
{
|
||||
if (pattern_files != NULL || patterns != NULL) break;
|
||||
else pcre2grep_exit(usage(2));
|
||||
}
|
||||
|
||||
/* Handle a long name option, or -- to terminate the options */
|
||||
|
||||
if (argv[i][1] == '-')
|
||||
{
|
||||
char *arg = argv[i] + 2;
|
||||
char *argequals = strchr(arg, '=');
|
||||
|
||||
if (*arg == 0) /* -- terminates options */
|
||||
{
|
||||
i++;
|
||||
break; /* out of the options-handling loop */
|
||||
}
|
||||
|
||||
longop = TRUE;
|
||||
|
||||
/* Some long options have data that follows after =, for example file=name.
|
||||
Some options have variations in the long name spelling: specifically, we
|
||||
allow "regexp" because GNU grep allows it, though I personally go along
|
||||
with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
|
||||
These options are entered in the table as "regex(p)". Options can be in
|
||||
both these categories. */
|
||||
|
||||
for (op = optionlist; op->one_char != 0; op++)
|
||||
{
|
||||
char *opbra = strchr(op->long_name, '(');
|
||||
char *equals = strchr(op->long_name, '=');
|
||||
|
||||
/* Handle options with only one spelling of the name */
|
||||
|
||||
if (opbra == NULL) /* Does not contain '(' */
|
||||
{
|
||||
if (equals == NULL) /* Not thing=data case */
|
||||
{
|
||||
if (strcmp(arg, op->long_name) == 0) break;
|
||||
}
|
||||
else /* Special case xxx=data */
|
||||
{
|
||||
int oplen = (int)(equals - op->long_name);
|
||||
int arglen = (argequals == NULL)?
|
||||
(int)strlen(arg) : (int)(argequals - arg);
|
||||
if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
|
||||
{
|
||||
option_data = arg + arglen;
|
||||
if (*option_data == '=')
|
||||
{
|
||||
option_data++;
|
||||
longopwasequals = TRUE;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle options with an alternate spelling of the name */
|
||||
|
||||
else
|
||||
{
|
||||
char buff1[24];
|
||||
char buff2[24];
|
||||
int ret;
|
||||
|
||||
int baselen = (int)(opbra - op->long_name);
|
||||
int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
|
||||
int arglen = (argequals == NULL || equals == NULL)?
|
||||
(int)strlen(arg) : (int)(argequals - arg);
|
||||
|
||||
if ((ret = snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name),
|
||||
ret < 0 || ret > (int)sizeof(buff1)) ||
|
||||
(ret = snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
|
||||
fulllen - baselen - 2, opbra + 1),
|
||||
ret < 0 || ret > (int)sizeof(buff2)))
|
||||
{
|
||||
/* LCOV_EXCL_START - this is a "never" event */
|
||||
fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n",
|
||||
op->long_name);
|
||||
pcre2grep_exit(2);
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
if (strncmp(arg, buff1, arglen) == 0 ||
|
||||
strncmp(arg, buff2, arglen) == 0)
|
||||
{
|
||||
if (equals != NULL && argequals != NULL)
|
||||
{
|
||||
option_data = argequals;
|
||||
if (*option_data == '=')
|
||||
{
|
||||
option_data++;
|
||||
longopwasequals = TRUE;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (op->one_char == 0)
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
|
||||
pcre2grep_exit(usage(2));
|
||||
}
|
||||
}
|
||||
|
||||
/* One-char options; many that have no data may be in a single argument; we
|
||||
continue till we hit the last one or one that needs data. */
|
||||
|
||||
else
|
||||
{
|
||||
char *s = argv[i] + 1;
|
||||
longop = FALSE;
|
||||
|
||||
while (*s != 0)
|
||||
{
|
||||
for (op = optionlist; op->one_char != 0; op++)
|
||||
{
|
||||
if (*s == op->one_char) break;
|
||||
}
|
||||
if (op->one_char == 0)
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
|
||||
*s, argv[i]);
|
||||
pcre2grep_exit(usage(2));
|
||||
}
|
||||
|
||||
option_data = s+1;
|
||||
|
||||
/* Break out if this is the last character in the string; it's handled
|
||||
below like a single multi-char option. */
|
||||
|
||||
if (*option_data == 0) break;
|
||||
|
||||
/* Check for a single-character option that has data: OP_OP_NUMBER(S)
|
||||
are used for ones that either have a numerical number or defaults, i.e.
|
||||
the data is optional. If a digit follows, there is data; if not, carry on
|
||||
with other single-character options in the same string. */
|
||||
|
||||
if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
|
||||
{
|
||||
if (isdigit((unsigned char)(s[1]))) break;
|
||||
}
|
||||
else /* Check for an option with data */
|
||||
{
|
||||
if (op->type != OP_NODATA) break;
|
||||
}
|
||||
|
||||
/* Handle a single-character option with no data, then loop for the
|
||||
next character in the string. */
|
||||
|
||||
pcre2_options = handle_option(*s++, pcre2_options);
|
||||
}
|
||||
}
|
||||
|
||||
/* At this point we should have op pointing to a matched option. If the type
|
||||
is NO_DATA, it means that there is no data, and the option might set
|
||||
something in the PCRE options. */
|
||||
|
||||
if (op->type == OP_NODATA)
|
||||
{
|
||||
pcre2_options = handle_option(op->one_char, pcre2_options);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
|
||||
either has a value or defaults to something. It cannot have data in a
|
||||
separate item. At the moment, the only such options are "colo(u)r",
|
||||
and "only-matching". */
|
||||
|
||||
if (*option_data == 0 &&
|
||||
(op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
|
||||
op->type == OP_OP_NUMBERS))
|
||||
{
|
||||
switch (op->one_char)
|
||||
{
|
||||
case N_COLOUR:
|
||||
colour_option = "auto";
|
||||
break;
|
||||
|
||||
case 'o':
|
||||
only_matching_last = add_number(0, only_matching_last);
|
||||
if (only_matching == NULL) only_matching = only_matching_last;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Otherwise, find the data string for the option. */
|
||||
|
||||
if (*option_data == 0)
|
||||
{
|
||||
if (i >= argc - 1 || longopwasequals)
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
|
||||
pcre2grep_exit(usage(2));
|
||||
}
|
||||
option_data = argv[++i];
|
||||
}
|
||||
|
||||
/* If the option type is OP_OP_NUMBERS, the value is a number that is to be
|
||||
added to a chain of numbers. */
|
||||
|
||||
if (op->type == OP_OP_NUMBERS)
|
||||
{
|
||||
unsigned long int n = decode_number(option_data, op, longop);
|
||||
omdatastr *omd = (omdatastr *)op->dataptr;
|
||||
*(omd->lastptr) = add_number((int)n, *(omd->lastptr));
|
||||
if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
|
||||
}
|
||||
|
||||
/* If the option type is OP_PATLIST, it's the -e option, or one of the
|
||||
include/exclude options, which can be called multiple times to create lists
|
||||
of patterns. */
|
||||
|
||||
else if (op->type == OP_PATLIST)
|
||||
{
|
||||
patdatastr *pd = (patdatastr *)op->dataptr;
|
||||
*(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
|
||||
*(pd->lastptr));
|
||||
if (*(pd->lastptr) == NULL) goto EXIT2;
|
||||
if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
|
||||
}
|
||||
|
||||
/* If the option type is OP_FILELIST, it's one of the options that names a
|
||||
file. */
|
||||
|
||||
else if (op->type == OP_FILELIST)
|
||||
{
|
||||
fndatastr *fd = (fndatastr *)op->dataptr;
|
||||
fn = (fnstr *)malloc(sizeof(fnstr));
|
||||
if (fn == NULL)
|
||||
{
|
||||
/* LCOV_EXCL_START */
|
||||
fprintf(stderr, "pcre2grep: malloc failed\n");
|
||||
goto EXIT2;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
fn->next = NULL;
|
||||
fn->name = option_data;
|
||||
if (*(fd->anchor) == NULL)
|
||||
*(fd->anchor) = fn;
|
||||
else
|
||||
(*(fd->lastptr))->next = fn;
|
||||
*(fd->lastptr) = fn;
|
||||
}
|
||||
|
||||
/* Handle OP_BINARY_FILES */
|
||||
|
||||
else if (op->type == OP_BINFILES)
|
||||
{
|
||||
if (strcmp(option_data, "binary") == 0)
|
||||
binary_files = BIN_BINARY;
|
||||
else if (strcmp(option_data, "without-match") == 0)
|
||||
binary_files = BIN_NOMATCH;
|
||||
else if (strcmp(option_data, "text") == 0)
|
||||
binary_files = BIN_TEXT;
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
|
||||
option_data);
|
||||
pcre2grep_exit(usage(2));
|
||||
}
|
||||
}
|
||||
|
||||
/* Otherwise, deal with a single string or numeric data value. */
|
||||
|
||||
else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
|
||||
op->type != OP_OP_NUMBER && op->type != OP_SIZE)
|
||||
{
|
||||
*((char **)op->dataptr) = option_data;
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned long int n = decode_number(option_data, op, longop);
|
||||
if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
|
||||
else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
|
||||
else *((int *)op->dataptr) = n;
|
||||
}
|
||||
}
|
||||
|
||||
/* Options have been decoded. If -C was used, its value is used as a default
|
||||
for -A and -B. */
|
||||
|
||||
if (both_context > 0)
|
||||
{
|
||||
if (after_context == 0) after_context = both_context;
|
||||
if (before_context == 0) before_context = both_context;
|
||||
}
|
||||
|
||||
/* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
|
||||
permitted. They display, each in their own way, only the data that has matched.
|
||||
*/
|
||||
|
||||
only_matching_count = (only_matching != NULL) + (output_text != NULL) +
|
||||
file_offsets + line_offsets;
|
||||
|
||||
if (only_matching_count > 1)
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
|
||||
"--file-offsets and/or --line-offsets\n");
|
||||
pcre2grep_exit(usage(2));
|
||||
}
|
||||
|
||||
/* Check that there is a big enough ovector for all -o settings. */
|
||||
|
||||
for (om = only_matching; om != NULL; om = om->next)
|
||||
{
|
||||
int n = om->groupnum;
|
||||
if (n > (int)capture_max)
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Requested group %d cannot be captured.\n", n);
|
||||
fprintf(stderr, "pcre2grep: Use --om-capture to increase the size of the capture vector.\n");
|
||||
goto EXIT2;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check the text supplied to --output for errors. */
|
||||
|
||||
if (output_text != NULL &&
|
||||
!syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
|
||||
goto EXIT2;
|
||||
|
||||
/* Set up default compile and match contexts and match data blocks. */
|
||||
|
||||
offset_size = capture_max + 1;
|
||||
compile_context = pcre2_compile_context_create(NULL);
|
||||
match_context = pcre2_match_context_create(NULL);
|
||||
match_data_pair[0] = pcre2_match_data_create(offset_size, NULL);
|
||||
match_data_pair[1] = pcre2_match_data_create(offset_size, NULL);
|
||||
offsets_pair[0] = pcre2_get_ovector_pointer(match_data_pair[0]);
|
||||
offsets_pair[1] = pcre2_get_ovector_pointer(match_data_pair[1]);
|
||||
match_data = match_data_pair[0];
|
||||
offsets = offsets_pair[0];
|
||||
match_data_toggle = 0;
|
||||
|
||||
/* If string (script) callouts are supported, set up the callout processing
|
||||
function in the match context. */
|
||||
|
||||
#ifdef SUPPORT_PCRE2GREP_CALLOUT
|
||||
pcre2_set_callout(match_context, pcre2grep_callout, NULL);
|
||||
#else
|
||||
extra_options |= PCRE2_EXTRA_NEVER_CALLOUT;
|
||||
#endif
|
||||
|
||||
/* Put limits into the match context. */
|
||||
|
||||
if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
|
||||
if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
|
||||
if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
|
||||
|
||||
/* If a locale has not been provided as an option, see if the LC_CTYPE or
|
||||
LC_ALL environment variable is set, and if so, use it. */
|
||||
|
||||
if (locale == NULL)
|
||||
{
|
||||
locale = getenv("LC_ALL");
|
||||
locale_from = "LC_ALL";
|
||||
}
|
||||
|
||||
if (locale == NULL)
|
||||
{
|
||||
locale = getenv("LC_CTYPE");
|
||||
locale_from = "LC_CTYPE";
|
||||
}
|
||||
|
||||
/* If a locale is set, use it to generate the tables the PCRE needs. Passing
|
||||
NULL to pcre2_maketables() means that malloc() is used to get the memory. */
|
||||
|
||||
if (locale != NULL)
|
||||
{
|
||||
if (setlocale(LC_CTYPE, locale) == NULL)
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
|
||||
locale, locale_from);
|
||||
goto EXIT2;
|
||||
}
|
||||
character_tables = pcre2_maketables(NULL);
|
||||
pcre2_set_character_tables(compile_context, character_tables);
|
||||
}
|
||||
|
||||
/* Sort out colouring */
|
||||
|
||||
if (colour_option != NULL && strcmp(colour_option, "never") != 0)
|
||||
{
|
||||
if (strcmp(colour_option, "always") == 0)
|
||||
#ifdef WIN32
|
||||
do_ansi = !is_stdout_tty(),
|
||||
#endif
|
||||
do_colour = TRUE;
|
||||
else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
|
||||
colour_option);
|
||||
goto EXIT2;
|
||||
}
|
||||
if (do_colour)
|
||||
{
|
||||
char *cs = getenv("PCRE2GREP_COLOUR");
|
||||
if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
|
||||
if (cs == NULL) cs = getenv("PCREGREP_COLOUR");
|
||||
if (cs == NULL) cs = getenv("PCREGREP_COLOR");
|
||||
if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
|
||||
if (cs == NULL) cs = getenv("GREP_COLOR");
|
||||
if (cs != NULL)
|
||||
{
|
||||
if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
|
||||
}
|
||||
#ifdef WIN32
|
||||
init_colour_output();
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/* When colouring or otherwise identifying matching substrings, we need to find
|
||||
all possible matches when there are multiple patterns. */
|
||||
|
||||
all_matches = do_colour || only_matching_count != 0;
|
||||
|
||||
/* Sort out a newline setting. */
|
||||
|
||||
if (newline_arg != NULL)
|
||||
{
|
||||
for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
|
||||
endlinetype++)
|
||||
{
|
||||
if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
|
||||
}
|
||||
if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
|
||||
pcre2_set_newline(compile_context, endlinetype);
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
|
||||
newline_arg);
|
||||
goto EXIT2;
|
||||
}
|
||||
}
|
||||
|
||||
/* Find default newline convention */
|
||||
|
||||
else
|
||||
{
|
||||
(void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
|
||||
}
|
||||
|
||||
/* Interpret the text values for -d and -D */
|
||||
|
||||
if (dee_option != NULL)
|
||||
{
|
||||
if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
|
||||
else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
|
||||
else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
|
||||
goto EXIT2;
|
||||
}
|
||||
}
|
||||
|
||||
if (DEE_option != NULL)
|
||||
{
|
||||
if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
|
||||
else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
|
||||
goto EXIT2;
|
||||
}
|
||||
}
|
||||
|
||||
/* If no_ucp is set, remove PCRE2_UCP from the compile options. */
|
||||
|
||||
if (no_ucp) pcre2_options &= ~PCRE2_UCP;
|
||||
|
||||
/* adjust the extra options. */
|
||||
|
||||
if (case_restrict) extra_options |= PCRE2_EXTRA_CASELESS_RESTRICT;
|
||||
if (posix_digit)
|
||||
extra_options |= (PCRE2_EXTRA_ASCII_BSD | PCRE2_EXTRA_ASCII_DIGIT);
|
||||
if ((pcre2_options & PCRE2_LITERAL) != 0)
|
||||
extra_options &= ~PCRE2_EXTRA_NEVER_CALLOUT;
|
||||
|
||||
/* Set the extra options in the compile context. */
|
||||
|
||||
(void)pcre2_set_compile_extra_options(compile_context, extra_options);
|
||||
|
||||
/* If use_jit is set, check whether JIT is available. If not, do not try
|
||||
to use JIT. */
|
||||
|
||||
if (use_jit)
|
||||
{
|
||||
uint32_t answer;
|
||||
(void)pcre2_config(PCRE2_CONFIG_JIT, &answer);
|
||||
if (!answer) use_jit = FALSE;
|
||||
}
|
||||
|
||||
/* Get memory for the main buffer. */
|
||||
|
||||
if (bufthird <= 0)
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n");
|
||||
goto EXIT2;
|
||||
}
|
||||
|
||||
bufsize = 3*bufthird;
|
||||
main_buffer = (char *)malloc(bufsize);
|
||||
|
||||
if (main_buffer == NULL)
|
||||
{
|
||||
/* LCOV_EXCL_START */
|
||||
fprintf(stderr, "pcre2grep: malloc failed\n");
|
||||
goto EXIT2;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
/* If no patterns were provided by -e, and there are no files provided by -f,
|
||||
the first argument is the one and only pattern, and it must exist. */
|
||||
|
||||
if (patterns == NULL && pattern_files == NULL)
|
||||
{
|
||||
if (i >= argc) return usage(2);
|
||||
patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]),
|
||||
NULL);
|
||||
i++;
|
||||
if (patterns == NULL) goto EXIT2;
|
||||
}
|
||||
|
||||
/* Compile the patterns that were provided on the command line, either by
|
||||
multiple uses of -e or as a single unkeyed pattern. We cannot do this until
|
||||
after all the command-line options are read so that we know which PCRE options
|
||||
to use. When -F is used, compile_pattern() may add another block into the
|
||||
chain, so we must not access the next pointer till after the compile. */
|
||||
|
||||
for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
|
||||
{
|
||||
if (!compile_pattern(cp, pcre2_options, FALSE, "command-line",
|
||||
(j == 1 && patterns->next == NULL)? 0 : j))
|
||||
goto EXIT2;
|
||||
}
|
||||
|
||||
/* Read and compile the regular expressions that are provided in files. */
|
||||
|
||||
for (fn = pattern_files; fn != NULL; fn = fn->next)
|
||||
{
|
||||
if (!read_pattern_file(fn->name, &patterns, &patterns_last)) goto EXIT2;
|
||||
}
|
||||
|
||||
/* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
|
||||
|
||||
#ifdef SUPPORT_PCRE2GREP_JIT
|
||||
if (use_jit)
|
||||
{
|
||||
jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
|
||||
if (jit_stack != NULL )
|
||||
pcre2_jit_stack_assign(match_context, NULL, jit_stack);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* -F, -w, and -x do not apply to include or exclude patterns, so we must
|
||||
adjust the options. */
|
||||
|
||||
pcre2_options &= ~PCRE2_LITERAL;
|
||||
(void)pcre2_set_compile_extra_options(compile_context, 0);
|
||||
|
||||
/* If there are include or exclude patterns read from the command line, compile
|
||||
them. */
|
||||
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
int k;
|
||||
for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
|
||||
{
|
||||
if (!compile_pattern(cp, pcre2_options, FALSE, incexname[j],
|
||||
(k == 1 && cp->next == NULL)? 0 : k))
|
||||
goto EXIT2;
|
||||
}
|
||||
}
|
||||
|
||||
/* Read and compile include/exclude patterns from files. */
|
||||
|
||||
for (fn = include_from; fn != NULL; fn = fn->next)
|
||||
{
|
||||
if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last))
|
||||
goto EXIT2;
|
||||
}
|
||||
|
||||
for (fn = exclude_from; fn != NULL; fn = fn->next)
|
||||
{
|
||||
if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last))
|
||||
goto EXIT2;
|
||||
}
|
||||
|
||||
/* If there are no files that contain lists of files to search, and there are
|
||||
no file arguments, search stdin, and then exit. */
|
||||
|
||||
if (file_lists == NULL && i >= argc)
|
||||
{
|
||||
/* Using a buffered stdin, that then is seek is not portable,
|
||||
so attempt to remove the buffer, to workaround reported issues
|
||||
affecting several BSD and AIX */
|
||||
if (count_limit >= 0)
|
||||
setbuf(stdin, NULL);
|
||||
rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
|
||||
(filenames > FN_DEFAULT)? stdin_name : NULL);
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* If any files that contains a list of files to search have been specified,
|
||||
read them line by line and search the given files. */
|
||||
|
||||
for (fn = file_lists; fn != NULL; fn = fn->next)
|
||||
{
|
||||
char buffer[FNBUFSIZ];
|
||||
FILE *fl;
|
||||
if (strcmp(fn->name, "-") == 0) fl = stdin; else
|
||||
{
|
||||
fl = fopen(fn->name, "rb");
|
||||
if (fl == NULL)
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
|
||||
strerror(errno));
|
||||
goto EXIT2;
|
||||
}
|
||||
}
|
||||
while (fgets(buffer, sizeof(buffer), fl) != NULL)
|
||||
{
|
||||
int frc;
|
||||
char *end = buffer + (int)strlen(buffer);
|
||||
while (end > buffer && isspace((unsigned char)(end[-1]))) end--;
|
||||
*end = 0;
|
||||
if (*buffer != 0)
|
||||
{
|
||||
frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
|
||||
if (frc > 1) rc = frc;
|
||||
else if (frc == 0 && rc == 1) rc = 0;
|
||||
}
|
||||
}
|
||||
if (fl != stdin) fclose(fl);
|
||||
}
|
||||
|
||||
/* After handling file-list, work through remaining arguments. Pass in the fact
|
||||
that there is only one argument at top level - this suppresses the file name if
|
||||
the argument is not a directory and filenames are not otherwise forced. */
|
||||
|
||||
only_one_at_top = i == argc - 1 && file_lists == NULL;
|
||||
|
||||
for (; i < argc; i++)
|
||||
{
|
||||
int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
|
||||
only_one_at_top);
|
||||
if (frc > 1) rc = frc;
|
||||
else if (frc == 0 && rc == 1) rc = 0;
|
||||
}
|
||||
|
||||
/* Show the total number of matches if requested, but not if only one file's
|
||||
count was printed. */
|
||||
|
||||
if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY)
|
||||
{
|
||||
if (counts_printed != 0 && filenames >= FN_DEFAULT)
|
||||
fprintf(stdout, "TOTAL:");
|
||||
fprintf(stdout, "%lu" STDOUT_NL, total_count);
|
||||
}
|
||||
|
||||
EXIT:
|
||||
#ifdef SUPPORT_PCRE2GREP_JIT
|
||||
pcre2_jit_free_unused_memory(NULL);
|
||||
if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
|
||||
#endif
|
||||
|
||||
free(main_buffer);
|
||||
if (character_tables != NULL) pcre2_maketables_free(NULL, character_tables);
|
||||
|
||||
pcre2_compile_context_free(compile_context);
|
||||
pcre2_match_context_free(match_context);
|
||||
pcre2_match_data_free(match_data_pair[0]);
|
||||
pcre2_match_data_free(match_data_pair[1]);
|
||||
|
||||
free_pattern_chain(patterns);
|
||||
free_pattern_chain(include_patterns);
|
||||
free_pattern_chain(include_dir_patterns);
|
||||
free_pattern_chain(exclude_patterns);
|
||||
free_pattern_chain(exclude_dir_patterns);
|
||||
|
||||
free_file_chain(exclude_from);
|
||||
free_file_chain(include_from);
|
||||
free_file_chain(pattern_files);
|
||||
free_file_chain(file_lists);
|
||||
|
||||
while (only_matching != NULL)
|
||||
{
|
||||
omstr *this = only_matching;
|
||||
only_matching = this->next;
|
||||
free(this);
|
||||
}
|
||||
|
||||
pcre2grep_exit(rc);
|
||||
|
||||
EXIT2:
|
||||
rc = 2;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* End of pcre2grep */
|
||||
@@ -1,431 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module is a wrapper that provides a POSIX API to the underlying PCRE2
|
||||
functions. The functions are called pcre2_regcomp(), pcre2_regexec(), etc.
|
||||
pcre2posix.h defines the POSIX names as macros for the corresonding pcre2_xxx
|
||||
functions, so any program that includes it and uses the POSIX names will call
|
||||
the PCRE2 implementations instead. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#ifdef PCRE2POSIX_SHARED
|
||||
#undef PCRE2_STATIC
|
||||
#endif
|
||||
|
||||
|
||||
/* Ensure that the PCRE2POSIX_EXP_xxx macros are set appropriately for
|
||||
compiling these functions. This must come before including pcre2posix.h, where
|
||||
they are set for an application (using these functions) if they have not
|
||||
previously been set. */
|
||||
|
||||
#if defined(_WIN32) && (defined(PCRE2POSIX_SHARED) || !defined(PCRE2_STATIC))
|
||||
# define PCRE2POSIX_EXP_DECL extern __declspec(dllexport)
|
||||
# define PCRE2POSIX_EXP_DEFN __declspec(dllexport)
|
||||
#endif
|
||||
|
||||
/* Older versions of MSVC lack snprintf(). This define allows for
|
||||
warning/error-free compilation and testing with MSVC compilers back to at least
|
||||
MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||
#define snprintf _snprintf
|
||||
#define BROKEN_SNPRINTF
|
||||
#endif
|
||||
|
||||
|
||||
/* Compile-time error numbers start at this value. It should probably never be
|
||||
changed. This #define is a copy of the one in pcre2_internal.h. */
|
||||
|
||||
#define COMPILE_ERROR_BASE 100
|
||||
|
||||
|
||||
/* Standard C headers */
|
||||
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/* PCRE2 headers */
|
||||
|
||||
#include "pcre2.h"
|
||||
#include "pcre2posix.h"
|
||||
#include "pcre2_util.h"
|
||||
|
||||
/* Table to translate PCRE2 compile time error codes into POSIX error codes.
|
||||
Only a few PCRE2 errors with a value greater than 23 turn into special POSIX
|
||||
codes: most go to REG_BADPAT. The second table lists, in pairs, those that
|
||||
don't, even though some of them cannot currently be provoked from within the
|
||||
POSIX wrapper. */
|
||||
|
||||
static const int eint1[] = {
|
||||
0, /* No error */
|
||||
REG_EESCAPE, /* \ at end of pattern */
|
||||
REG_EESCAPE, /* \c at end of pattern */
|
||||
REG_EESCAPE, /* unrecognized character follows \ */
|
||||
REG_BADBR, /* numbers out of order in {} quantifier */
|
||||
/* 5 */
|
||||
REG_BADBR, /* number too big in {} quantifier */
|
||||
REG_EBRACK, /* missing terminating ] for character class */
|
||||
REG_ECTYPE, /* invalid escape sequence in character class */
|
||||
REG_ERANGE, /* range out of order in character class */
|
||||
REG_BADRPT, /* nothing to repeat */
|
||||
/* 10 */
|
||||
REG_ASSERT, /* internal error: unexpected repeat */
|
||||
REG_BADPAT, /* unrecognized character after (? or (?- */
|
||||
REG_BADPAT, /* POSIX named classes are supported only within a class */
|
||||
REG_BADPAT, /* POSIX collating elements are not supported */
|
||||
REG_EPAREN, /* missing ) */
|
||||
/* 15 */
|
||||
REG_ESUBREG, /* reference to non-existent subpattern */
|
||||
REG_INVARG, /* pattern passed as NULL */
|
||||
REG_INVARG, /* unknown compile-time option bit(s) */
|
||||
REG_EPAREN, /* missing ) after (?# comment */
|
||||
REG_ESIZE, /* parentheses nested too deeply */
|
||||
/* 20 */
|
||||
REG_ESIZE, /* regular expression too large */
|
||||
REG_ESPACE, /* failed to get memory */
|
||||
REG_EPAREN, /* unmatched closing parenthesis */
|
||||
REG_ASSERT /* internal error: code overflow */
|
||||
};
|
||||
|
||||
static const int eint2[] = {
|
||||
30, REG_ECTYPE, /* unknown POSIX class name */
|
||||
32, REG_INVARG, /* this version of PCRE2 does not have Unicode support */
|
||||
37, REG_EESCAPE, /* PCRE2 does not support \L, \l, \N{name}, \U, or \u */
|
||||
56, REG_INVARG, /* internal error: unknown newline setting */
|
||||
92, REG_INVARG, /* invalid option bits with PCRE2_LITERAL */
|
||||
98, REG_EESCAPE, /* missing digit after \0 in NO_BS0 mode */
|
||||
99, REG_EESCAPE, /* \K in lookaround */
|
||||
102, REG_EESCAPE /* \ddd octal > \377 in PYTHON_OCTAL mode */
|
||||
};
|
||||
|
||||
/* Table of texts corresponding to POSIX error codes */
|
||||
|
||||
static const char *const pstring[] = {
|
||||
"", /* Dummy for value 0 */
|
||||
"internal error", /* REG_ASSERT */
|
||||
"invalid repeat counts in {}", /* BADBR */
|
||||
"pattern error", /* BADPAT */
|
||||
"? * + invalid", /* BADRPT */
|
||||
"unbalanced {}", /* EBRACE */
|
||||
"unbalanced []", /* EBRACK */
|
||||
"collation error - not relevant", /* ECOLLATE */
|
||||
"bad class", /* ECTYPE */
|
||||
"bad escape sequence", /* EESCAPE */
|
||||
"empty expression", /* EMPTY */
|
||||
"unbalanced ()", /* EPAREN */
|
||||
"bad range inside []", /* ERANGE */
|
||||
"expression too big", /* ESIZE */
|
||||
"failed to get memory", /* ESPACE */
|
||||
"bad back reference", /* ESUBREG */
|
||||
"bad argument", /* INVARG */
|
||||
"match failed" /* NOMATCH */
|
||||
};
|
||||
|
||||
static int message_len(const char *message, int offset)
|
||||
{
|
||||
char buf[12];
|
||||
|
||||
/* 11 magic number comes from the format below */
|
||||
return (int)strlen(message) + 11 + snprintf(buf, sizeof(buf), "%d", offset);
|
||||
}
|
||||
|
||||
/*************************************************
|
||||
* Translate error code to string *
|
||||
*************************************************/
|
||||
|
||||
PCRE2POSIX_EXP_DEFN size_t PCRE2_CALL_CONVENTION
|
||||
pcre2_regerror(int errcode, const regex_t *preg, char *errbuf,
|
||||
size_t errbuf_size)
|
||||
{
|
||||
int ret;
|
||||
const char *message;
|
||||
size_t len = 0; /* keeps 0 if snprintf is used */
|
||||
|
||||
message = (errcode <= 0 || errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
|
||||
"unknown error code" : pstring[errcode];
|
||||
|
||||
if (preg != NULL && (int)preg->re_erroffset != -1)
|
||||
{
|
||||
/* no need to deal with UB in snprintf */
|
||||
if (errbuf_size > INT_MAX) errbuf_size = INT_MAX;
|
||||
|
||||
/* there are 11 characters between message and offset;
|
||||
update message_len() if changed */
|
||||
ret = snprintf(errbuf, errbuf_size, "%s at offset %d", message,
|
||||
(int)preg->re_erroffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
len = strlen(message);
|
||||
if (errbuf_size != 0)
|
||||
{
|
||||
strncpy(errbuf, message, errbuf_size);
|
||||
if (errbuf_size <= len) errbuf[errbuf_size - 1] = '\0';
|
||||
}
|
||||
ret = (int)len;
|
||||
}
|
||||
|
||||
PCRE2_ASSERT(len > 0 || preg != NULL);
|
||||
|
||||
do {
|
||||
if (ret < 0)
|
||||
{
|
||||
#ifdef BROKEN_SNPRINTF
|
||||
/* _snprintf returns -1 on overflow and doesn't zero terminate */
|
||||
if (!len)
|
||||
{
|
||||
if (ret == -1 && errbuf_size != 0) errbuf[errbuf_size - 1] = '\0';
|
||||
|
||||
ret = message_len(message, (int)preg->re_erroffset);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
/* snprintf failed, will use a 14 char long message if possible */
|
||||
ret = 14;
|
||||
if (errbuf_size != 0)
|
||||
{
|
||||
strncpy(errbuf, "internal error", errbuf_size);
|
||||
if ((int)errbuf_size <= ret) errbuf[errbuf_size - 1] = '\0';
|
||||
}
|
||||
}
|
||||
else if (ret == (int)errbuf_size && !len)
|
||||
{
|
||||
/* pre C99 snprintf returns used, so redo ret to fix that */
|
||||
|
||||
ret = message_len(message, (int)preg->re_erroffset);
|
||||
}
|
||||
} while (0);
|
||||
|
||||
return ret + 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free store held by a regex *
|
||||
*************************************************/
|
||||
|
||||
PCRE2POSIX_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_regfree(regex_t *preg)
|
||||
{
|
||||
pcre2_match_data_free(preg->re_match_data);
|
||||
pcre2_code_free(preg->re_pcre2_code);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compile a regular expression *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
preg points to a structure for recording the compiled expression
|
||||
pattern the pattern to compile
|
||||
cflags compilation flags
|
||||
|
||||
Returns: 0 on success
|
||||
various non-zero codes on failure
|
||||
*/
|
||||
|
||||
PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_regcomp(regex_t *preg, const char *pattern, int cflags)
|
||||
{
|
||||
PCRE2_SIZE erroffset;
|
||||
PCRE2_SIZE patlen;
|
||||
int errorcode;
|
||||
int options = 0;
|
||||
int re_nsub = 0;
|
||||
|
||||
patlen = ((cflags & REG_PEND) != 0)? (PCRE2_SIZE)(preg->re_endp - pattern) :
|
||||
PCRE2_ZERO_TERMINATED;
|
||||
|
||||
if ((cflags & REG_ICASE) != 0) options |= PCRE2_CASELESS;
|
||||
if ((cflags & REG_NEWLINE) != 0) options |= PCRE2_MULTILINE;
|
||||
if ((cflags & REG_DOTALL) != 0) options |= PCRE2_DOTALL;
|
||||
if ((cflags & REG_NOSPEC) != 0) options |= PCRE2_LITERAL;
|
||||
if ((cflags & REG_UTF) != 0) options |= PCRE2_UTF;
|
||||
if ((cflags & REG_UCP) != 0) options |= PCRE2_UCP;
|
||||
if ((cflags & REG_UNGREEDY) != 0) options |= PCRE2_UNGREEDY;
|
||||
|
||||
preg->re_cflags = cflags;
|
||||
preg->re_pcre2_code = pcre2_compile((PCRE2_SPTR)pattern, patlen, options,
|
||||
&errorcode, &erroffset, NULL);
|
||||
preg->re_erroffset = erroffset;
|
||||
|
||||
if (preg->re_pcre2_code == NULL)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
/* A negative value is a UTF error; otherwise all error codes are greater
|
||||
than COMPILE_ERROR_BASE, but check, just in case. */
|
||||
|
||||
if (errorcode < COMPILE_ERROR_BASE) return REG_BADPAT;
|
||||
errorcode -= COMPILE_ERROR_BASE;
|
||||
|
||||
if (errorcode < (int)(sizeof(eint1)/sizeof(const int)))
|
||||
return eint1[errorcode];
|
||||
for (i = 0; i < sizeof(eint2)/sizeof(const int); i += 2)
|
||||
if (errorcode == eint2[i]) return eint2[i+1];
|
||||
return REG_BADPAT;
|
||||
}
|
||||
|
||||
(void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code,
|
||||
PCRE2_INFO_CAPTURECOUNT, &re_nsub);
|
||||
preg->re_nsub = (size_t)re_nsub;
|
||||
preg->re_match_data = pcre2_match_data_create(re_nsub + 1, NULL);
|
||||
preg->re_erroffset = (size_t)(-1); /* No meaning after successful compile */
|
||||
|
||||
if (preg->re_match_data == NULL)
|
||||
{
|
||||
/* LCOV_EXCL_START */
|
||||
pcre2_code_free(preg->re_pcre2_code);
|
||||
return REG_ESPACE;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match a regular expression *
|
||||
*************************************************/
|
||||
|
||||
/* A suitable match_data block, large enough to hold all possible captures, was
|
||||
obtained when the pattern was compiled, to save having to allocate and free it
|
||||
for each match. If REG_NOSUB was specified at compile time, the nmatch and
|
||||
pmatch arguments are ignored, and the only result is yes/no/error. */
|
||||
|
||||
PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_regexec(const regex_t *preg, const char *string, size_t nmatch,
|
||||
regmatch_t pmatch[], int eflags)
|
||||
{
|
||||
int rc, so, eo;
|
||||
int options = 0;
|
||||
pcre2_match_data *md = (pcre2_match_data *)preg->re_match_data;
|
||||
|
||||
if (string == NULL) return REG_INVARG;
|
||||
|
||||
if ((eflags & REG_NOTBOL) != 0) options |= PCRE2_NOTBOL;
|
||||
if ((eflags & REG_NOTEOL) != 0) options |= PCRE2_NOTEOL;
|
||||
if ((eflags & REG_NOTEMPTY) != 0) options |= PCRE2_NOTEMPTY;
|
||||
|
||||
/* When REG_NOSUB was specified, or if no vector has been passed in which to
|
||||
put captured strings, ensure that nmatch is zero. This will stop any attempt to
|
||||
write to pmatch. */
|
||||
|
||||
if ((preg->re_cflags & REG_NOSUB) != 0 || pmatch == NULL) nmatch = 0;
|
||||
|
||||
/* REG_STARTEND is a BSD extension, to allow for non-NUL-terminated strings.
|
||||
The man page from OS X says "REG_STARTEND affects only the location of the
|
||||
string, not how it is matched". That is why the "so" value is used to bump the
|
||||
start location rather than being passed as a PCRE2 "starting offset". */
|
||||
|
||||
if ((eflags & REG_STARTEND) != 0)
|
||||
{
|
||||
if (pmatch == NULL) return REG_INVARG;
|
||||
so = pmatch[0].rm_so;
|
||||
eo = pmatch[0].rm_eo;
|
||||
}
|
||||
else
|
||||
{
|
||||
so = 0;
|
||||
eo = (int)strlen(string);
|
||||
}
|
||||
|
||||
rc = pcre2_match((const pcre2_code *)preg->re_pcre2_code,
|
||||
(PCRE2_SPTR)string + so, (eo - so), 0, options, md, NULL);
|
||||
|
||||
/* Successful match */
|
||||
|
||||
if (rc >= 0)
|
||||
{
|
||||
size_t i;
|
||||
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
|
||||
if ((size_t)rc > nmatch) rc = (int)nmatch;
|
||||
for (i = 0; i < (size_t)rc; i++)
|
||||
{
|
||||
pmatch[i].rm_so = (ovector[i*2] == PCRE2_UNSET)? -1 :
|
||||
(int)(ovector[i*2] + so);
|
||||
pmatch[i].rm_eo = (ovector[i*2+1] == PCRE2_UNSET)? -1 :
|
||||
(int)(ovector[i*2+1] + so);
|
||||
}
|
||||
for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Unsuccessful match */
|
||||
|
||||
if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21)
|
||||
return REG_INVARG;
|
||||
|
||||
/* Most of these are events that won't occur during testing, so exclude them
|
||||
from coverage. */
|
||||
|
||||
switch(rc)
|
||||
{
|
||||
case PCRE2_ERROR_HEAPLIMIT: return REG_ESPACE;
|
||||
case PCRE2_ERROR_NOMATCH: return REG_NOMATCH;
|
||||
|
||||
/* LCOV_EXCL_START */
|
||||
case PCRE2_ERROR_BADMODE: return REG_INVARG;
|
||||
case PCRE2_ERROR_BADMAGIC: return REG_INVARG;
|
||||
case PCRE2_ERROR_BADOPTION: return REG_INVARG;
|
||||
case PCRE2_ERROR_BADUTFOFFSET: return REG_INVARG;
|
||||
case PCRE2_ERROR_MATCHLIMIT: return REG_ESPACE;
|
||||
case PCRE2_ERROR_NOMEMORY: return REG_ESPACE;
|
||||
case PCRE2_ERROR_NULL: return REG_INVARG;
|
||||
default: return REG_ASSERT;
|
||||
/* LCOV_EXCL_STOP */
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2posix.c */
|
||||
@@ -1,187 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE2 is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language. This is
|
||||
the public header file to be #included by applications that call PCRE2 via the
|
||||
POSIX wrapper interface.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2023 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef PCRE2POSIX_H_IDEMPOTENT_GUARD
|
||||
#define PCRE2POSIX_H_IDEMPOTENT_GUARD
|
||||
|
||||
/* Have to include stdlib.h in order to ensure that size_t is defined. */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Allow for C++ users */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Options, mostly defined by POSIX, but with some extras. */
|
||||
|
||||
#define REG_ICASE 0x0001 /* Maps to PCRE2_CASELESS */
|
||||
#define REG_NEWLINE 0x0002 /* Maps to PCRE2_MULTILINE */
|
||||
#define REG_NOTBOL 0x0004 /* Maps to PCRE2_NOTBOL */
|
||||
#define REG_NOTEOL 0x0008 /* Maps to PCRE2_NOTEOL */
|
||||
#define REG_DOTALL 0x0010 /* NOT defined by POSIX; maps to PCRE2_DOTALL */
|
||||
#define REG_NOSUB 0x0020 /* Do not report what was matched */
|
||||
#define REG_UTF 0x0040 /* NOT defined by POSIX; maps to PCRE2_UTF */
|
||||
#define REG_STARTEND 0x0080 /* BSD feature: pass subject string by so,eo */
|
||||
#define REG_NOTEMPTY 0x0100 /* NOT defined by POSIX; maps to PCRE2_NOTEMPTY */
|
||||
#define REG_UNGREEDY 0x0200 /* NOT defined by POSIX; maps to PCRE2_UNGREEDY */
|
||||
#define REG_UCP 0x0400 /* NOT defined by POSIX; maps to PCRE2_UCP */
|
||||
#define REG_PEND 0x0800 /* GNU feature: pass end pattern by re_endp */
|
||||
#define REG_NOSPEC 0x1000 /* Maps to PCRE2_LITERAL */
|
||||
|
||||
/* This is not used by PCRE2, but by defining it we make it easier
|
||||
to slot PCRE2 into existing programs that make POSIX calls. */
|
||||
|
||||
#define REG_EXTENDED 0
|
||||
|
||||
/* Error values. Not all these are relevant or used by the wrapper. */
|
||||
|
||||
enum {
|
||||
REG_ASSERT = 1, /* internal error ? */
|
||||
REG_BADBR, /* invalid repeat counts in {} */
|
||||
REG_BADPAT, /* pattern error */
|
||||
REG_BADRPT, /* ? * + invalid */
|
||||
REG_EBRACE, /* unbalanced {} */
|
||||
REG_EBRACK, /* unbalanced [] */
|
||||
REG_ECOLLATE, /* collation error - not relevant */
|
||||
REG_ECTYPE, /* bad class */
|
||||
REG_EESCAPE, /* bad escape sequence */
|
||||
REG_EMPTY, /* empty expression */
|
||||
REG_EPAREN, /* unbalanced () */
|
||||
REG_ERANGE, /* bad range inside [] */
|
||||
REG_ESIZE, /* expression too big */
|
||||
REG_ESPACE, /* failed to get memory */
|
||||
REG_ESUBREG, /* bad back reference */
|
||||
REG_INVARG, /* bad argument */
|
||||
REG_NOMATCH /* match failed */
|
||||
};
|
||||
|
||||
|
||||
/* The structure representing a compiled regular expression. It is also used
|
||||
for passing the pattern end pointer when REG_PEND is set. */
|
||||
|
||||
typedef struct {
|
||||
void *re_pcre2_code;
|
||||
void *re_match_data;
|
||||
const char *re_endp;
|
||||
size_t re_nsub;
|
||||
size_t re_erroffset;
|
||||
int re_cflags;
|
||||
} regex_t;
|
||||
|
||||
/* The structure in which a captured offset is returned. */
|
||||
|
||||
typedef int regoff_t;
|
||||
|
||||
typedef struct {
|
||||
regoff_t rm_so;
|
||||
regoff_t rm_eo;
|
||||
} regmatch_t;
|
||||
|
||||
/* When compiling with the MSVC compiler, it is sometimes necessary to include
|
||||
a "calling convention" before exported function names. (This is secondhand
|
||||
information; I know nothing about MSVC myself). For example, something like
|
||||
|
||||
void __cdecl function(....)
|
||||
|
||||
might be needed. In order to make this easy, all the exported functions have
|
||||
PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
|
||||
set, we ensure here that it has no effect. */
|
||||
|
||||
#ifndef PCRE2_CALL_CONVENTION
|
||||
#define PCRE2_CALL_CONVENTION
|
||||
#endif
|
||||
|
||||
#ifndef PCRE2_EXPORT
|
||||
#define PCRE2_EXPORT
|
||||
#endif
|
||||
|
||||
/* When an application links to a PCRE2 DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
export settings are needed, and are set in pcre2posix.c before including this
|
||||
file. */
|
||||
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCRE2POSIX_EXP_DECL
|
||||
# if defined(_WIN32) && defined(PCRE2POSIX_SHARED) && !defined(PCRE2_STATIC)
|
||||
# define PCRE2POSIX_EXP_DECL extern __declspec(dllimport)
|
||||
# define PCRE2POSIX_EXP_DEFN __declspec(dllimport)
|
||||
# else
|
||||
# define PCRE2POSIX_EXP_DECL extern PCRE2_EXPORT
|
||||
# define PCRE2POSIX_EXP_DEFN
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* The functions. The actual code is in functions with pcre2_xxx names for
|
||||
uniqueness. POSIX names are provided as macros for API compatibility with POSIX
|
||||
regex functions. It's done this way to ensure to they are always linked from
|
||||
the PCRE2 library and not by accident from elsewhere (regex_t differs in size
|
||||
elsewhere). */
|
||||
|
||||
PCRE2POSIX_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_regcomp(regex_t *, const char *, int);
|
||||
PCRE2POSIX_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_regexec(const regex_t *, const char *, size_t,
|
||||
regmatch_t *, int);
|
||||
PCRE2POSIX_EXP_DECL size_t PCRE2_CALL_CONVENTION pcre2_regerror(int, const regex_t *, char *, size_t);
|
||||
PCRE2POSIX_EXP_DECL void PCRE2_CALL_CONVENTION pcre2_regfree(regex_t *);
|
||||
|
||||
#define regcomp pcre2_regcomp
|
||||
#define regexec pcre2_regexec
|
||||
#define regerror pcre2_regerror
|
||||
#define regfree pcre2_regfree
|
||||
|
||||
/* Debian had a patch that used different names. These are now here to save
|
||||
them having to maintain their own patch, but are not documented by PCRE2. */
|
||||
|
||||
#define PCRE2regcomp pcre2_regcomp
|
||||
#define PCRE2regexec pcre2_regexec
|
||||
#define PCRE2regerror pcre2_regerror
|
||||
#define PCRE2regfree pcre2_regfree
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* PCRE2POSIX_H_IDEMPOTENT_GUARD */
|
||||
|
||||
/* End of pcre2posix.h */
|
||||
@@ -1,209 +0,0 @@
|
||||
/*************************************************
|
||||
* PCRE2 POSIX interface test program *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Written by Philip Hazel, December 2022
|
||||
Copyright (c) 2022
|
||||
File last edited: December 2022
|
||||
|
||||
This program tests the POSIX wrapper to the PCRE2 regular expression library.
|
||||
The main PCRE2 test program is pcre2test, which also tests these function
|
||||
calls. This little program is needed to test the case where the client includes
|
||||
pcre2posix.h but not pcre2.h, mainly to make sure that it builds successfully.
|
||||
However, the code is written as a flexible test program to which extra tests
|
||||
can be added.
|
||||
|
||||
Compile with -lpcre2-posix -lpcre2-8
|
||||
|
||||
If run with no options, there is no output on success, and the return code is
|
||||
zero. If any test fails there is output to stderr, and the return code is 1.
|
||||
|
||||
For testing purposes, the "-v" option causes verification output to be written
|
||||
to stdout. */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <pcre2posix.h>
|
||||
|
||||
#define CAPCOUNT 5 /* Number of captures supported */
|
||||
#define PRINTF if (v) printf /* Shorthand for testing output */
|
||||
|
||||
/* This vector contains compiler flags for each pattern that is tested. */
|
||||
|
||||
static int cflags[] = {
|
||||
0, /* Test 0 */
|
||||
REG_ICASE, /* Test 1 */
|
||||
0, /* Test 2 */
|
||||
REG_NEWLINE, /* Test 3 */
|
||||
0 /* Test 4 */
|
||||
};
|
||||
|
||||
/* This vector contains match flags for each pattern that is tested. */
|
||||
|
||||
static int mflags[] = {
|
||||
0, /* Test 0 */
|
||||
0, /* Test 1 */
|
||||
0, /* Test 2 */
|
||||
REG_NOTBOL, /* Test 3 */
|
||||
0 /* Test 4 */
|
||||
};
|
||||
|
||||
/* Automate the number of patterns */
|
||||
|
||||
#define count (int)(sizeof(cflags)/sizeof(int))
|
||||
|
||||
/* The data for each pattern consists of a pattern string, followed by any
|
||||
number of subject strings, terminated by NULL. Some tests share data, but use
|
||||
different flags. */
|
||||
|
||||
static const char *data0_1[] = { "posix", "lower posix", "upper POSIX", NULL };
|
||||
static const char *data2_3[] = { "(*LF)^(cat|dog)", "catastrophic\ncataclysm",
|
||||
"dogfight", "no animals", NULL };
|
||||
static const char *data4[] = { "*badpattern", NULL };
|
||||
|
||||
/* Index the data strings */
|
||||
|
||||
static char **data[] = {
|
||||
(char **)(&data0_1),
|
||||
(char **)(&data0_1),
|
||||
(char **)(&data2_3),
|
||||
(char **)(&data2_3),
|
||||
(char **)(&data4)
|
||||
};
|
||||
|
||||
/* The expected results for each pattern consist of a compiler return code,
|
||||
optionally followed, for each subject string, by a match return code and, for a
|
||||
successful match, up to CAPCOUNT pairs of returned match data. */
|
||||
|
||||
static int results0[] = {
|
||||
0, /* Compiler rc */
|
||||
0, 6, 11, /* 1st match */
|
||||
REG_NOMATCH /* 2nd match */
|
||||
};
|
||||
|
||||
static int results1[] = {
|
||||
0, /* Compiler rc */
|
||||
0, 6, 11, /* 1st match */
|
||||
0, 6, 11 /* 2nd match */
|
||||
};
|
||||
|
||||
static int results2[] = {
|
||||
0, /* Compiler rc */
|
||||
0, 0, 3, 0, 3, /* 1st match */
|
||||
0, 0, 3, 0, 3, /* 2nd match */
|
||||
REG_NOMATCH /* 3rd match */
|
||||
};
|
||||
|
||||
static int results3[] = {
|
||||
0, /* Compiler rc */
|
||||
0, 13, 16, 13, 16, /* 1st match */
|
||||
REG_NOMATCH, /* 2nd match */
|
||||
REG_NOMATCH /* 3rd match */
|
||||
};
|
||||
|
||||
static int results4[] = {
|
||||
REG_BADRPT /* Compiler rc */
|
||||
};
|
||||
|
||||
/* Index the result vectors */
|
||||
|
||||
static int *results[] = {
|
||||
(int *)(&results0),
|
||||
(int *)(&results1),
|
||||
(int *)(&results2),
|
||||
(int *)(&results3),
|
||||
(int *)(&results4)
|
||||
};
|
||||
|
||||
/* And here is the program */
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
regex_t re;
|
||||
regmatch_t match[CAPCOUNT];
|
||||
int v = argc > 1 && strcmp(argv[1], "-v") == 0;
|
||||
|
||||
PRINTF("Test of pcre2posix.h without pcre2.h\n");
|
||||
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
char *pattern = data[i][0];
|
||||
char **subjects = data[i] + 1;
|
||||
int *rd = results[i];
|
||||
int rc = regcomp(&re, pattern, cflags[i]);
|
||||
|
||||
PRINTF("Pattern: %s flags=0x%02x\n", pattern, cflags[i]);
|
||||
|
||||
if (rc != *rd)
|
||||
{
|
||||
fprintf(stderr, "Unexpected compile error %d (expected %d)\n", rc, *rd);
|
||||
fprintf(stderr, "Pattern is: %s\n", pattern);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (rc != 0)
|
||||
{
|
||||
if (v)
|
||||
{
|
||||
char buffer[256];
|
||||
(void)regerror(rc, &re, buffer, sizeof(buffer));
|
||||
PRINTF("Compile error %d: %s (expected)\n", rc, buffer);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
for (; *subjects != NULL; subjects++)
|
||||
{
|
||||
rc = regexec(&re, *subjects, CAPCOUNT, match, mflags[i]);
|
||||
|
||||
PRINTF("Subject: %s\n", *subjects);
|
||||
PRINTF("Return: %d", rc);
|
||||
|
||||
if (rc != *(++rd))
|
||||
{
|
||||
PRINTF("\n");
|
||||
fprintf(stderr, "Unexpected match error %d (expected %d)\n", rc, *rd);
|
||||
fprintf(stderr, "Pattern is: %s\n", pattern);
|
||||
fprintf(stderr, "Subject is: %s\n", *subjects);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (rc == 0)
|
||||
{
|
||||
for (int j = 0; j < CAPCOUNT; j++)
|
||||
{
|
||||
regmatch_t *m = match + j;
|
||||
if (m->rm_so < 0) continue;
|
||||
if (m->rm_so != *(++rd) || m->rm_eo != *(++rd))
|
||||
{
|
||||
PRINTF("\n");
|
||||
fprintf(stderr, "Mismatched results for successful match\n");
|
||||
fprintf(stderr, "Pattern is: %s\n", pattern);
|
||||
fprintf(stderr, "Subject is: %s\n", *subjects);
|
||||
fprintf(stderr, "Result %d: expected %d %d received %d %d\n",
|
||||
j, rd[-1], rd[0], m->rm_so, m->rm_eo);
|
||||
return 1;
|
||||
}
|
||||
PRINTF(" (%d %d %d)", j, m->rm_so, m->rm_eo);
|
||||
}
|
||||
}
|
||||
|
||||
else if (v)
|
||||
{
|
||||
char buffer[256];
|
||||
(void)regerror(rc, &re, buffer, sizeof(buffer));
|
||||
PRINTF(": %s (expected)", buffer);
|
||||
}
|
||||
|
||||
PRINTF("\n");
|
||||
}
|
||||
|
||||
regfree(&re);
|
||||
}
|
||||
|
||||
PRINTF("End of test\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre2posix_test.c */
|
||||
File diff suppressed because it is too large
Load Diff
500
CMakeLists.txt
500
CMakeLists.txt
@@ -1,26 +1,23 @@
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
cmake_minimum_required(VERSION 3.0)
|
||||
cmake_policy(SET CMP0017 NEW) # need include() with .cmake
|
||||
if (POLICY CMP0177)
|
||||
cmake_policy(SET CMP0177 OLD)
|
||||
endif()
|
||||
project(PIP)
|
||||
set(PIP_MAJOR 5)
|
||||
set(PIP_MINOR 4)
|
||||
set(PIP_REVISION 0)
|
||||
set(PIP_SUFFIX )
|
||||
set(PIP_COMPANY SHS)
|
||||
set(PIP_DOMAIN org.SHS)
|
||||
project(pip)
|
||||
set(pip_MAJOR 2)
|
||||
set(pip_MINOR 39)
|
||||
set(pip_REVISION 0)
|
||||
set(pip_SUFFIX )
|
||||
set(pip_COMPANY SHS)
|
||||
set(pip_DOMAIN org.SHS)
|
||||
|
||||
set(GIT_CMAKE_DIR)
|
||||
if (NOT DEFINED SHSTKPROJECT)
|
||||
set(ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/cmake-download/CMakeLists.txt"
|
||||
"# This file was generated by PIP CMake, don`t edit it!
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
cmake_minimum_required(VERSION 2.8.2)
|
||||
project(cmake-download NONE)
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(cmake
|
||||
GIT_REPOSITORY https://git.shstk.ru/SHS/cmake.git
|
||||
GIT_REPOSITORY https://git.shs.tools/SHS/cmake.git
|
||||
GIT_TAG \"origin/master\"
|
||||
GIT_CONFIG \"advice.detachedHead=false\"
|
||||
SOURCE_DIR \"${CMAKE_CURRENT_BINARY_DIR}/cmake-src\"
|
||||
@@ -45,7 +42,9 @@ ExternalProject_Add(cmake
|
||||
set(GIT_CMAKE_DIR "${CMAKE_CURRENT_BINARY_DIR}/cmake-src")
|
||||
endif()
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
||||
if ("x${CMAKE_MODULE_PATH}" STREQUAL "x")
|
||||
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
||||
endif()
|
||||
if (NOT "x${GIT_CMAKE_DIR}" STREQUAL "x")
|
||||
list(APPEND CMAKE_MODULE_PATH "${GIT_CMAKE_DIR}")
|
||||
endif()
|
||||
@@ -54,8 +53,7 @@ include(CheckFunctionExists)
|
||||
include(PIPMacros)
|
||||
include(SHSTKMacros)
|
||||
|
||||
shstk_begin_project(PIP)
|
||||
set(PIP_VERSION "${PIP_VERSION}" CACHE STRING "")
|
||||
shstk_begin_project(pip PIP)
|
||||
|
||||
set(_ICU_DEFAULT OFF)
|
||||
if((NOT DEFINED WIN32) AND (NOT DEFINED ANDROID_PLATFORM) AND (NOT DEFINED APPLE))
|
||||
@@ -63,26 +61,17 @@ if((NOT DEFINED WIN32) AND (NOT DEFINED ANDROID_PLATFORM) AND (NOT DEFINED APPLE
|
||||
endif()
|
||||
set(PIP_DLL_DIR "${CMAKE_CURRENT_BINARY_DIR}" CACHE STRING "")
|
||||
|
||||
|
||||
# Options
|
||||
option(ICU "ICU support for convert codepages" ${_ICU_DEFAULT})
|
||||
option(STD_IOSTREAM "Building with std iostream operators support" OFF)
|
||||
option(INTROSPECTION "Build with introspection" OFF)
|
||||
option(TESTS "Build tests and perform their before install step" OFF)
|
||||
option(COVERAGE "Build project with coverage info" OFF)
|
||||
option(PIP_FFTW_F "Support fftw module for float" ON)
|
||||
option(PIP_FFTW_L "Support fftw module for long double" ON)
|
||||
option(PIP_FFTW_Q "Support fftw module for quad double" OFF)
|
||||
set(PIP_UTILS 1)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
|
||||
shstk_is_parent_exists(_pe)
|
||||
if (_pe)
|
||||
set(BUILDING_pip 1 PARENT_SCOPE)
|
||||
set(pip_ROOT_SRC "${CMAKE_CURRENT_SOURCE_DIR}" PARENT_SCOPE)
|
||||
set(pip_ROOT_SRC "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
endif()
|
||||
|
||||
|
||||
# Basic
|
||||
set(PIP_MODULES)
|
||||
@@ -94,13 +83,10 @@ set(HDR_DIRS)
|
||||
set(PIP_UTILS_LIST)
|
||||
set(PIP_TESTS_LIST)
|
||||
set(PIP_EXPORTS)
|
||||
set(PIP_3PL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/3rd")
|
||||
|
||||
set(PIP_SRC_MODULES "console;crypt;compress;usb;fftw;opencl;io_utils;client_server;cloud;lua;http_client;http_server")
|
||||
set(PIP_SRC_MODULES "console;crypt;compress;usb;fftw;opencl;io_utils;cloud;lua")
|
||||
foreach(_m ${PIP_SRC_MODULES})
|
||||
set(PIP_MSG_${_m} "no")
|
||||
string(TOUPPER "${_m}" _mu)
|
||||
option(PIP_BUILD_${_mu} "Build \"${_m}\" module" ON)
|
||||
endforeach()
|
||||
|
||||
macro(pip_module NAME LIBS LABEL INCLUDES SOURCES MSG)
|
||||
@@ -122,10 +108,6 @@ macro(pip_module NAME LIBS LABEL INCLUDES SOURCES MSG)
|
||||
list(APPEND HDRS ${HS})
|
||||
list(APPEND PHDRS ${PHS})
|
||||
|
||||
if(${CMAKE_VERSION} VERSION_LESS "3.8.0")
|
||||
else()
|
||||
source_group(TREE "${CMAKE_CURRENT_SOURCE_DIR}" FILES ${CPPS} ${HS} ${PHS})
|
||||
endif()
|
||||
set(_target "pip_${NAME}")
|
||||
set(_libs "${LIBS}")
|
||||
if ("${NAME}" STREQUAL "main")
|
||||
@@ -136,11 +118,11 @@ macro(pip_module NAME LIBS LABEL INCLUDES SOURCES MSG)
|
||||
string(TOUPPER "${_target}" DEF_NAME)
|
||||
|
||||
set(PIP_MSG_${NAME} "yes${MSG}")
|
||||
import_version(${_target} PIP)
|
||||
set_deploy_property(${_target} ${PIP_LIB_TYPE}
|
||||
import_version(${_target} pip)
|
||||
set_deploy_property(${_target} ${pip_LIB_TYPE}
|
||||
LABEL "${LABEL}"
|
||||
FULLNAME "${PIP_DOMAIN}.${_target}"
|
||||
COMPANY "${PIP_COMPANY}"
|
||||
FULLNAME "${pip_DOMAIN}.${_target}"
|
||||
COMPANY "${pip_COMPANY}"
|
||||
INFO "Platform-Independent Primitives")
|
||||
make_rc(${_target} _RC)
|
||||
|
||||
@@ -157,7 +139,7 @@ macro(pip_module NAME LIBS LABEL INCLUDES SOURCES MSG)
|
||||
pip_resources(CRES "${RES}")
|
||||
endif()
|
||||
add_definitions(-D${DEF_NAME})
|
||||
add_library(${_target} ${PIP_LIB_TYPE} ${CPPS} ${CRES} ${_RC} ${HS} ${PHS})
|
||||
add_library(${_target} ${pip_LIB_TYPE} ${CPPS} ${CRES} ${_RC})
|
||||
target_include_directories(${_target} PUBLIC ${PIP_INCLUDES})
|
||||
if (NOT "x${RES}" STREQUAL "x")
|
||||
add_dependencies(${_target} pip_rc)
|
||||
@@ -189,12 +171,10 @@ if (NOT DEFINED PIP_CMG)
|
||||
if (CMAKE_CROSSCOMPILING OR (DEFINED ANDROID_PLATFORM))
|
||||
set(PIP_CMG "pip_cmg")
|
||||
set(PIP_RC "pip_rc")
|
||||
set(PIP_TR "pip_tr")
|
||||
set(PIP_DEPLOY_TOOL "deploy_tool")
|
||||
else()
|
||||
set(PIP_CMG "${CMAKE_CURRENT_BINARY_DIR}/utils/code_model_generator/pip_cmg")
|
||||
set(PIP_RC "${CMAKE_CURRENT_BINARY_DIR}/utils/resources_compiler/pip_rc")
|
||||
set(PIP_TR "${CMAKE_CURRENT_BINARY_DIR}/utils/translator/pip_tr")
|
||||
set(PIP_DEPLOY_TOOL "${CMAKE_CURRENT_BINARY_DIR}/utils/deploy_tool/deploy_tool")
|
||||
endif()
|
||||
endif()
|
||||
@@ -211,6 +191,7 @@ set(PIP_INCLUDES "${CMAKE_CURRENT_BINARY_DIR}")
|
||||
foreach(F ${PIP_FOLDERS})
|
||||
if (IS_DIRECTORY "${F}")
|
||||
list(APPEND PIP_INCLUDES "${F}")
|
||||
#include_directories("${F}")
|
||||
endif()
|
||||
endforeach(F)
|
||||
|
||||
@@ -253,11 +234,26 @@ if(PIP_MATH_YN)
|
||||
add_definitions(-DPIP_MATH_YN)
|
||||
endif()
|
||||
|
||||
|
||||
# Check if RT timers exists
|
||||
set(CMAKE_REQUIRED_INCLUDES time.h)
|
||||
set(CMAKE_REQUIRED_LIBRARIES )
|
||||
if((NOT DEFINED ENV{QNX_HOST}) AND (NOT APPLE) AND (NOT WIN32) AND (NOT DEFINED ANDROID_PLATFORM) AND (NOT PIP_FREERTOS))
|
||||
list(APPEND LIBS_MAIN rt)
|
||||
set(CMAKE_REQUIRED_LIBRARIES rt)
|
||||
endif()
|
||||
CHECK_FUNCTION_EXISTS(timer_create PIP_TIMER_RT_0)
|
||||
CHECK_FUNCTION_EXISTS(timer_settime PIP_TIMER_RT_1)
|
||||
CHECK_FUNCTION_EXISTS(timer_delete PIP_TIMER_RT_2)
|
||||
|
||||
|
||||
# Check if build debug version
|
||||
if (PIP_BUILD_DEBUG)
|
||||
if (CMAKE_BUILD_TYPE MATCHES Debug)
|
||||
set(PIP_BUILD_TYPE "Debug")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g3 -Wall")
|
||||
add_definitions(-DPIP_DEBUG)
|
||||
else()
|
||||
set(PIP_BUILD_TYPE "Release")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall")
|
||||
endif()
|
||||
|
||||
@@ -285,14 +281,9 @@ endif()
|
||||
# Check if ICU used for PIString and PIChar
|
||||
set(PIP_ICU "no")
|
||||
if(ICU)
|
||||
pip_find_lib(icuuc)
|
||||
if (icuuc_FOUND)
|
||||
set(PIP_ICU "yes")
|
||||
add_definitions(-DPIP_ICU)
|
||||
list(APPEND LIBS_MAIN icuuc)
|
||||
else()
|
||||
message(STATUS "Warning: ICU requested, but not found. Build without ICU")
|
||||
endif()
|
||||
set(PIP_ICU "yes")
|
||||
add_definitions(-DPIP_ICU)
|
||||
list(APPEND LIBS_MAIN icuuc)
|
||||
endif()
|
||||
|
||||
|
||||
@@ -316,6 +307,15 @@ list(APPEND HDRS ${_PIP_DEFS_FILE})
|
||||
#message("${_PIP_DEFS_CHANGED}")
|
||||
|
||||
|
||||
# Check if RT timers exists
|
||||
if(PIP_TIMER_RT_0 AND PIP_TIMER_RT_1 AND PIP_TIMER_RT_2)
|
||||
set(PIP_TIMERS "Thread, ThreadRT, Pool")
|
||||
add_definitions(-DPIP_TIMER_RT)
|
||||
else()
|
||||
set(PIP_TIMERS "Thread, Pool")
|
||||
endif()
|
||||
|
||||
|
||||
|
||||
# Add main library
|
||||
if(APPLE)
|
||||
@@ -326,27 +326,23 @@ if ((NOT DEFINED SHSTKPROJECT) AND (DEFINED ANDROID_PLATFORM))
|
||||
#message("${ANDROID_SYSTEM_LIBRARY_PATH}/usr/include")
|
||||
#message("${ANDROID_NDK}/sysroot/usr/include")
|
||||
endif()
|
||||
|
||||
if(NOT PIP_FREERTOS)
|
||||
if(WIN32)
|
||||
if(${C_COMPILER} STREQUAL "cl.exe")
|
||||
else()
|
||||
list(APPEND LIBS_MAIN ws2_32 iphlpapi psapi cfgmgr32 setupapi hid)
|
||||
endif()
|
||||
if(WIN32)
|
||||
if(${C_COMPILER} STREQUAL "cl.exe")
|
||||
else()
|
||||
list(APPEND LIBS_MAIN dl)
|
||||
if(DEFINED ENV{QNX_HOST})
|
||||
list(APPEND LIBS_MAIN socket)
|
||||
else()
|
||||
if (NOT DEFINED ANDROID_PLATFORM)
|
||||
list(APPEND LIBS_MAIN pthread util)
|
||||
if (NOT APPLE)
|
||||
list(APPEND LIBS_MAIN rt)
|
||||
endif()
|
||||
endif()
|
||||
list(APPEND LIBS_MAIN ws2_32 iphlpapi psapi cfgmgr32 setupapi)
|
||||
endif()
|
||||
else()
|
||||
list(APPEND LIBS_MAIN dl)
|
||||
if(DEFINED ENV{QNX_HOST})
|
||||
list(APPEND LIBS_MAIN socket)
|
||||
else()
|
||||
if (NOT DEFINED ANDROID_PLATFORM)
|
||||
list(APPEND LIBS_MAIN pthread util)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
set(PIP_LIBS)
|
||||
if(PIP_FREERTOS)
|
||||
set(PIP_LIBS ${LIBS_MAIN})
|
||||
@@ -368,19 +364,8 @@ else()
|
||||
endif()
|
||||
set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
|
||||
set(PCRE2_BUILD_PCRE2_8 OFF)
|
||||
set(PCRE2_BUILD_PCRE2_16 ON )
|
||||
set(PCRE2_BUILD_PCRE2_32 OFF)
|
||||
set(PCRE2_BUILD_PCRE2GREP OFF)
|
||||
set(PCRE2_BUILD_TESTS OFF)
|
||||
set(PCRE2_SHOW_REPORT OFF)
|
||||
if (WIN32)
|
||||
set (ZLIB_ROOT "${MINGW_INCLUDE}")
|
||||
endif()
|
||||
add_subdirectory("3rd/pcre2" EXCLUDE_FROM_ALL)
|
||||
list(APPEND LIBS_MAIN pcre2-16-static)
|
||||
|
||||
pip_module(main "${LIBS_MAIN}" "PIP main library" "" "${PIP_3PL_DIR}/BLAKE2" "")
|
||||
pip_module(main "${LIBS_MAIN}" "PIP main library" "" "" "")
|
||||
|
||||
generate_export_header(pip)
|
||||
list(APPEND HDRS "${CMAKE_CURRENT_BINARY_DIR}/pip_export.h")
|
||||
@@ -391,273 +376,131 @@ foreach(_m ${PIP_SRC_MODULES})
|
||||
endforeach()
|
||||
set_target_properties(pip PROPERTIES DEFINE_SYMBOL pip_EXPORTS)
|
||||
|
||||
# Override containers minimum bytes allocation
|
||||
if(NOT "x${PIP_CONTAINERS_MIN_ALLOC}" STREQUAL "x")
|
||||
target_compile_definitions(pip PRIVATE "-DPIP_CONTAINERS_MIN_ALLOC=${PIP_CONTAINERS_MIN_ALLOC}")
|
||||
message(STATUS "Attention: Override PIP_CONTAINERS_MIN_ALLOC = ${PIP_CONTAINERS_MIN_ALLOC}")
|
||||
endif()
|
||||
# Override containers maximum bytes for power of two expansion, may be bytes or X_KiB, or X_MiB
|
||||
if(NOT "x${PIP_CONTAINERS_MAX_POT_ALLOC}" STREQUAL "x")
|
||||
target_compile_definitions(pip PRIVATE "-DPIP_CONTAINERS_MAX_POT_ALLOC=${PIP_CONTAINERS_MAX_POT_ALLOC}")
|
||||
message(STATUS "Attention: Override PIP_CONTAINERS_MAX_POT_ALLOC = ${PIP_CONTAINERS_MAX_POT_ALLOC}")
|
||||
endif()
|
||||
|
||||
|
||||
if (NOT CROSSTOOLS)
|
||||
if (NOT PIP_FREERTOS)
|
||||
|
||||
if (PIP_BUILD_CONSOLE)
|
||||
pip_module(console "" "PIP console support" "" "" "")
|
||||
endif()
|
||||
|
||||
if (PIP_BUILD_USB)
|
||||
pip_find_lib(usb)
|
||||
if(usb_FOUND)
|
||||
pip_module(usb "usb" "PIP usb support" "" "" "")
|
||||
endif()
|
||||
pip_module(console "" "PIP console support" "" "" "")
|
||||
|
||||
|
||||
pip_find_lib(usb)
|
||||
if(usb_FOUND)
|
||||
pip_module(usb "usb" "PIP usb support" "" "" "")
|
||||
endif()
|
||||
|
||||
|
||||
if (PIP_BUILD_COMPRESS)
|
||||
pip_find_lib(zlib NAMES z zlib)
|
||||
if(zlib_FOUND)
|
||||
pip_module(compress "zlib" "PIP compression support" "" "" "")
|
||||
endif()
|
||||
pip_find_lib(zlib NAMES z zlib)
|
||||
if(zlib_FOUND)
|
||||
pip_module(compress "zlib" "PIP compression support" "" "" "")
|
||||
endif()
|
||||
|
||||
|
||||
if (PIP_BUILD_CRYPT)
|
||||
pip_find_lib(sodium)
|
||||
if(sodium_FOUND)
|
||||
pip_module(crypt "sodium" "PIP crypt support" "" "" "")
|
||||
pip_module(client_server "pip_io_utils" "PIP client-server helper" "" "" "")
|
||||
pip_module(cloud "pip_io_utils" "PIP cloud support" "" "" "")
|
||||
endif()
|
||||
pip_find_lib(sodium)
|
||||
if(sodium_FOUND)
|
||||
pip_module(crypt "sodium" "PIP crypt support" "" "" "")
|
||||
pip_module(cloud "pip_io_utils" "PIP cloud support" "" "" "")
|
||||
endif()
|
||||
|
||||
|
||||
if (PIP_BUILD_FFTW)
|
||||
# Check if PIP support fftw3 for PIFFT using in math module
|
||||
set(FFTW_LIB_NAME fftw3)
|
||||
set(FFTW_LIB_SUFFIXES "")
|
||||
if (PIP_FFTW_F)
|
||||
list(APPEND FFTW_LIB_SUFFIXES "f")
|
||||
endif()
|
||||
if (PIP_FFTW_L)
|
||||
list(APPEND FFTW_LIB_SUFFIXES "l")
|
||||
endif()
|
||||
if (PIP_FFTW_Q)
|
||||
list(APPEND FFTW_LIB_SUFFIXES "q")
|
||||
endif()
|
||||
if (NOT "${FFTW_LIB_SUFFIXES}" STREQUAL "")
|
||||
set(FFTW_LIB_SUFFIXES ";${FFTW_LIB_SUFFIXES}")
|
||||
else()
|
||||
list(APPEND FFTW_LIB_SUFFIXES "" "_")
|
||||
endif()
|
||||
set(FFTW_LIB_SUFFIXES2 "" "-3")
|
||||
set(FFTW_MSG "")
|
||||
set(FFTW_LIBS)
|
||||
set(FFTW_ABS_LIBS)
|
||||
set(CMAKE_REQUIRED_INCLUDES fftw3.h)
|
||||
foreach(FFTW_S_ IN LISTS FFTW_LIB_SUFFIXES)
|
||||
set(FFTW_BREAK false)
|
||||
foreach(FFTW_S2_ IN LISTS FFTW_LIB_SUFFIXES2)
|
||||
if(NOT FFTW_BREAK)
|
||||
set(FFTW_CLN "${FFTW_LIB_NAME}${FFTW_S_}${FFTW_S2_}")
|
||||
set(FFTW_CLNT "${FFTW_LIB_NAME}${FFTW_S_}_threads${FFTW_S2_}")
|
||||
find_library(${FFTW_CLN}_LIBRARIES ${FFTW_CLN})
|
||||
find_library(${FFTW_CLNT}_LIBRARIES ${FFTW_CLNT})
|
||||
set(${FFTW_CLN}_FOUND FALSE)
|
||||
set(${FFTW_CLNT}_FOUND FALSE)
|
||||
if(${FFTW_CLN}_LIBRARIES)
|
||||
if (NOT "${FFTW_MSG}" STREQUAL "")
|
||||
set(FFTW_MSG "${FFTW_MSG}, ")
|
||||
endif()
|
||||
set(FFTW_MSG "${FFTW_MSG}${FFTW_CLN}")
|
||||
set(${FFTW_CLN}_FOUND TRUE)
|
||||
list(APPEND FFTW_LIBS "${FFTW_CLN}")
|
||||
list(APPEND FFTW_ABS_LIBS "${${FFTW_CLN}_LIBRARIES}")
|
||||
set(${FFTW_CLN}_CTS "${FFTW_CLN}")
|
||||
if(${FFTW_CLNT}_FLIBRARIES)
|
||||
set(${FFTW_CLNT}_FOUND TRUE)
|
||||
list(APPEND FFTW_LIBS "${FFTW_CLNT}")
|
||||
list(APPEND FFTW_ABS_LIBS "${${FFTW_CLNT}_LIBRARIES}")
|
||||
list(APPEND ${FFTW_CLN}_CTS "${FFTW_CLNT}")
|
||||
endif()
|
||||
set(CMAKE_REQUIRED_LIBRARIES ${${FFTW_CLN}_CTS})
|
||||
CHECK_FUNCTION_EXISTS(fftw${FFTW_S_}_make_planner_thread_safe ${FFTW_CLN}_TSFE)
|
||||
add_definitions(-DPIP_FFTW${FFTW_S_})
|
||||
if(${FFTW_CLN}_TSFE)
|
||||
add_definitions(-DPIP_FFTW${FFTW_S_}_THREADSAFE)
|
||||
else()
|
||||
message(STATUS "Warning: PIFFTW${FFTW_S_}::preparePlan was not threadsafe")
|
||||
endif()
|
||||
# Check if PIP support fftw3 for PIFFT using in math module
|
||||
set(FFTW_LIB_NAME fftw3)
|
||||
set(FFTW_LIB_SUFFIXES "" "f" "l" "q")
|
||||
set(FFTW_LIB_SUFFIXES2 "" "-3")
|
||||
set(FFTW_LIBS)
|
||||
set(FFTW_ABS_LIBS)
|
||||
set(CMAKE_REQUIRED_INCLUDES fftw3.h)
|
||||
foreach(FFTW_S_ IN LISTS FFTW_LIB_SUFFIXES)
|
||||
set(FFTW_BREAK false)
|
||||
foreach(FFTW_S2_ IN LISTS FFTW_LIB_SUFFIXES2)
|
||||
if(NOT FFTW_BREAK)
|
||||
set(FFTW_CLN "${FFTW_LIB_NAME}${FFTW_S_}${FFTW_S2_}")
|
||||
set(FFTW_CLNT "${FFTW_LIB_NAME}${FFTW_S_}_threads${FFTW_S2_}")
|
||||
find_library(${FFTW_CLN}_LIBRARIES ${FFTW_CLN})
|
||||
find_library(${FFTW_CLNT}_LIBRARIES ${FFTW_CLNT})
|
||||
set(${FFTW_CLN}_FOUND FALSE)
|
||||
set(${FFTW_CLNT}_FOUND FALSE)
|
||||
if(${FFTW_CLN}_LIBRARIES)
|
||||
set(${FFTW_CLN}_FOUND TRUE)
|
||||
list(APPEND FFTW_LIBS "${FFTW_CLN}")
|
||||
list(APPEND FFTW_ABS_LIBS "${${FFTW_CLN}_LIBRARIES}")
|
||||
set(${FFTW_CLN}_CTS "${FFTW_CLN}")
|
||||
if(${FFTW_CLNT}_FLIBRARIES)
|
||||
set(${FFTW_CLNT}_FOUND TRUE)
|
||||
list(APPEND FFTW_LIBS "${FFTW_CLNT}")
|
||||
list(APPEND FFTW_ABS_LIBS "${${FFTW_CLNT}_LIBRARIES}")
|
||||
list(APPEND ${FFTW_CLN}_CTS "${FFTW_CLNT}")
|
||||
endif()
|
||||
set(CMAKE_REQUIRED_LIBRARIES ${${FFTW_CLN}_CTS})
|
||||
CHECK_FUNCTION_EXISTS(fftw${FFTW_S_}_make_planner_thread_safe ${FFTW_CLN}_TSFE)
|
||||
add_definitions(-DPIP_FFTW${FFTW_S_})
|
||||
if(${FFTW_CLN}_TSFE)
|
||||
add_definitions(-DPIP_FFTW${FFTW_S_}_THREADSAFE)
|
||||
else()
|
||||
message(STATUS "Warning: PIFFTW${FFTW_S_}::preparePlan was not threadsafe")
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
endforeach()
|
||||
if(FFTW_LIBS)
|
||||
pip_module(fftw "${FFTW_LIBS}" "PIP FFTW support" "" "" " (${FFTW_MSG})")
|
||||
endif()
|
||||
endforeach()
|
||||
if(FFTW_LIBS)
|
||||
pip_module(fftw "${FFTW_LIBS}" "PIP FFTW support" "" "" "")
|
||||
endif()
|
||||
|
||||
|
||||
if (PIP_BUILD_OPENCL)
|
||||
if (NOT "x${MINGW_INCLUDE}" STREQUAL "x")
|
||||
list(APPEND CMAKE_INCLUDE_PATH "${MINGW_INCLUDE}")
|
||||
find_package(OpenCL QUIET) #OpenCL_VERSION_STRING
|
||||
if(OpenCL_FOUND)
|
||||
set(_opencl_lib OpenCL::OpenCL)
|
||||
if(${CMAKE_VERSION} VERSION_LESS "3.7.0")
|
||||
target_link_libraries(_opencl_lib OpenCL)
|
||||
endif()
|
||||
find_package(OpenCL QUIET) #OpenCL_VERSION_STRING
|
||||
if(OpenCL_FOUND)
|
||||
set(_opencl_inc "${OpenCL_INCLUDE_DIRS}")
|
||||
if(APPLE)
|
||||
set(_opencl_inc "${OpenCL_INCLUDE_DIRS}/Headers")
|
||||
endif()
|
||||
pip_module(opencl "OpenCL" "PIP OpenCL support" "${_opencl_inc}" "" " (${OpenCL_VERSION_STRING})")
|
||||
set(_opencl_inc "${OpenCL_INCLUDE_DIRS}")
|
||||
if(APPLE)
|
||||
set(_opencl_inc "${OpenCL_INCLUDE_DIRS}/Headers")
|
||||
endif()
|
||||
pip_module(opencl "${_opencl_lib}" "PIP OpenCL support" "${_opencl_inc}" "" " (${OpenCL_VERSION_STRING})")
|
||||
endif()
|
||||
|
||||
|
||||
if (PIP_BUILD_IO_UTILS)
|
||||
if(sodium_FOUND)
|
||||
pip_module(io_utils "pip_crypt" "PIP I/O support" "" "" " (+crypt)")
|
||||
else()
|
||||
pip_module(io_utils "" "PIP I/O support" "" "" "")
|
||||
endif()
|
||||
if(sodium_FOUND)
|
||||
pip_module(io_utils "pip_crypt" "PIP I/O support" "" "" " (+crypt)")
|
||||
else()
|
||||
pip_module(io_utils "" "PIP I/O support" "" "" "")
|
||||
endif()
|
||||
|
||||
|
||||
if (PIP_BUILD_LUA)
|
||||
# Lua module
|
||||
set(_lua_src_dir "${PIP_3PL_DIR}/lua")
|
||||
set(_lua_src_hdr "${_lua_src_dir}/lua.hpp" "${_lua_src_dir}/lua.h" "${_lua_src_dir}/luaconf.h" "${_lua_src_dir}/lualib.h")
|
||||
pip_module(lua "" "PIP Lua support" "${_lua_src_dir};${PIP_3PL_DIR}" "${_lua_src_dir}" " (internal)")
|
||||
target_include_directories(pip_lua PUBLIC "${_lua_src_dir}")
|
||||
if (WIN32)
|
||||
target_compile_definitions(pip_lua PRIVATE LUA_BUILD_AS_DLL LUA_CORE)
|
||||
else()
|
||||
target_compile_definitions(pip_lua PRIVATE LUA_USE_POSIX)
|
||||
endif()
|
||||
list(APPEND HDR_DIRS "${PIP_3PL_DIR}/LuaBridge")
|
||||
list(APPEND HDRS ${_lua_src_hdr})
|
||||
# Lua module
|
||||
set(_lua_src_dir "${CMAKE_CURRENT_SOURCE_DIR}/3rd/lua")
|
||||
set(_lua_bri_dir "${CMAKE_CURRENT_SOURCE_DIR}/libs/lua/3rd")
|
||||
set(_lua_src_hdr "${_lua_src_dir}/lua.hpp" "${_lua_src_dir}/lua.h" "${_lua_src_dir}/luaconf.h" "${_lua_src_dir}/lualib.h")
|
||||
pip_module(lua "" "PIP Lua support" "${_lua_src_dir};${_lua_bri_dir}" "${_lua_src_dir}" " (internal)")
|
||||
target_include_directories(pip_lua PUBLIC "${_lua_src_dir}" "${_lua_bri_dir}")
|
||||
if (WIN32)
|
||||
target_compile_definitions(pip_lua PRIVATE LUA_BUILD_AS_DLL LUA_CORE)
|
||||
endif()
|
||||
|
||||
|
||||
if (PIP_BUILD_HTTP_SERVER)
|
||||
# libmicrohttpd
|
||||
pip_find_lib(microhttpd HINTS "${MINGW_LIB}")
|
||||
if (microhttpd_FOUND)
|
||||
set(_microhttpd_add_libs microhttpd)
|
||||
if(WIN32)
|
||||
if("${C_COMPILER}" STREQUAL "cl.exe")
|
||||
else()
|
||||
list(APPEND _microhttpd_add_libs ws2_32)
|
||||
endif()
|
||||
else()
|
||||
list(APPEND _microhttpd_add_libs dl)
|
||||
find_library(tls_lib gnutls)
|
||||
if (tls_lib)
|
||||
set(gnutls_FOUND TRUE)
|
||||
set(gnutls_LIBRARIES "${tls_lib}")
|
||||
list(APPEND _microhttpd_add_libs gnutls)
|
||||
endif()
|
||||
if(DEFINED ENV{QNX_HOST})
|
||||
list(APPEND _microhttpd_add_libs socket)
|
||||
else()
|
||||
if (NOT DEFINED ANDROID_PLATFORM)
|
||||
list(APPEND _microhttpd_add_libs pthread util)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
#list(APPEND microhttpd_LIBRARIES "${_microhttpd_add_libs}")
|
||||
pip_module(http_server "${_microhttpd_add_libs}" "PIP HTTP server" "" "" "")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
if (PIP_BUILD_HTTP_CLIENT)
|
||||
# libcurl
|
||||
pip_find_lib(curl HINTS "${MINGW_LIB}")
|
||||
if (curl_FOUND)
|
||||
pip_module(http_client curl "PIP HTTP client" "" "" "")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
list(APPEND HDR_DIRS "${_lua_bri_dir}/LuaBridge")
|
||||
list(APPEND HDRS ${_lua_src_hdr})
|
||||
|
||||
|
||||
# Test program
|
||||
if(PIP_UTILS)
|
||||
|
||||
#add_library(pip_plugin SHARED "test_plugin.h" "test_plugin.cpp" "ccm.h" "ccm.cpp")
|
||||
#add_library(pip_plugin SHARED "test_plugin.h" "test_plugin.cpp")
|
||||
#target_link_libraries(pip_plugin pip)
|
||||
|
||||
if (NOT DEFINED ANDROID_PLATFORM)
|
||||
if(microhttpd_FOUND AND curl_FOUND)
|
||||
add_executable(pip_test "main.cpp")
|
||||
target_link_libraries(pip_test pip pip_io_utils pip_client_server pip_http_server pip_http_client)
|
||||
if(sodium_FOUND)
|
||||
add_executable(pip_cloud_test "main_picloud_test.cpp")
|
||||
target_link_libraries(pip_cloud_test pip_cloud)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
add_executable(pip_test "main.cpp")
|
||||
target_link_libraries(pip_test pip)
|
||||
endif()
|
||||
|
||||
else()
|
||||
|
||||
if (PIP_BUILD_CRYPT)
|
||||
pip_module(crypt "" "PIP crypt support" "" "" "")
|
||||
endif()
|
||||
|
||||
if (PIP_BUILD_COMPRESS)
|
||||
pip_module(compress "" "PIP compression support" "" "" "")
|
||||
endif()
|
||||
|
||||
if (PIP_BUILD_IO_UTILS)
|
||||
pip_module(io_utils "pip_crypt" "PIP I/O support" "" "" " (+crypt)")
|
||||
endif()
|
||||
|
||||
pip_module(crypt "" "PIP crypt support" "" "" "")
|
||||
pip_module(compress "" "PIP compression support" "" "" "")
|
||||
pip_module(io_utils "pip_crypt" "PIP I/O support" "" "" " (+crypt)")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
string(REPLACE ";" "," PIP_EXPORTS_STR "${PIP_EXPORTS}")
|
||||
target_compile_definitions(pip PRIVATE "PICODE_DEFINES=\"${PIP_EXPORTS_STR}\"")
|
||||
|
||||
|
||||
if(NOT PIP_FREERTOS)
|
||||
|
||||
# Auxiliary
|
||||
if (NOT CROSSTOOLS)
|
||||
add_subdirectory("utils/piterminal")
|
||||
endif()
|
||||
|
||||
# Utils
|
||||
add_subdirectory("utils/code_model_generator")
|
||||
add_subdirectory("utils/resources_compiler")
|
||||
add_subdirectory("utils/deploy_tool")
|
||||
add_subdirectory("utils/qt_support")
|
||||
add_subdirectory("utils/translator")
|
||||
add_subdirectory("utils/value_tree_translator")
|
||||
if(PIP_UTILS AND (NOT CROSSTOOLS))
|
||||
add_subdirectory("utils/system_calib")
|
||||
add_subdirectory("utils/udp_file_transfer")
|
||||
if(sodium_FOUND)
|
||||
add_subdirectory("utils/system_daemon")
|
||||
add_subdirectory("utils/crypt_tool")
|
||||
add_subdirectory("utils/cloud_dispatcher")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
|
||||
# Translations
|
||||
set(PIP_LANG)
|
||||
if (NOT CROSSTOOLS)
|
||||
# pip_translation(PIP_LANG lang/pip_ru.ts)
|
||||
# add_custom_target(pip_lang SOURCES "${PIP_LANG}")
|
||||
file(GLOB PIP_LANG "lang/*.btf")
|
||||
endif()
|
||||
|
||||
|
||||
# Install
|
||||
# Check if system or local install will be used (to system install use "-DLIB=" argument of cmake)
|
||||
if(NOT LOCAL)
|
||||
@@ -665,9 +508,6 @@ if(NOT LOCAL)
|
||||
if(MINGW)
|
||||
if (NOT CROSSTOOLS)
|
||||
install(FILES ${HDRS} DESTINATION ${MINGW_INCLUDE}/pip)
|
||||
if(PIP_LANG)
|
||||
install(FILES ${PIP_LANG} DESTINATION ${MINGW_INCLUDE}/../share/pip/lang)
|
||||
endif()
|
||||
if(HDR_DIRS)
|
||||
install(DIRECTORY ${HDR_DIRS} DESTINATION ${MINGW_INCLUDE}/pip)
|
||||
endif()
|
||||
@@ -681,7 +521,6 @@ if(NOT LOCAL)
|
||||
file(COPY "${STDLIB}" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/utils/code_model_generator")
|
||||
file(COPY "${STDLIB}" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/utils/resources_compiler")
|
||||
file(COPY "${STDLIB}" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/utils/deploy_tool")
|
||||
file(COPY "${STDLIB}" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/utils/translator")
|
||||
endif()
|
||||
else()
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/pip_export.h DESTINATION include)
|
||||
@@ -689,9 +528,6 @@ if(NOT LOCAL)
|
||||
else()
|
||||
if (NOT CROSSTOOLS)
|
||||
install(FILES ${HDRS} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/pip)
|
||||
if(PIP_LANG)
|
||||
install(FILES ${PIP_LANG} DESTINATION ${CMAKE_INSTALL_PREFIX}/share/pip/lang)
|
||||
endif()
|
||||
if(HDR_DIRS)
|
||||
install(DIRECTORY ${HDR_DIRS} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/pip)
|
||||
endif()
|
||||
@@ -707,9 +543,6 @@ else()
|
||||
install(TARGETS ${PIP_MODULES} DESTINATION lib)
|
||||
endif()
|
||||
install(FILES ${HDRS} DESTINATION include/pip)
|
||||
if(PIP_LANG)
|
||||
install(FILES ${PIP_LANG} DESTINATION share/pip/lang)
|
||||
endif()
|
||||
if(HDR_DIRS)
|
||||
install(DIRECTORY ${HDR_DIRS} DESTINATION include/pip)
|
||||
endif()
|
||||
@@ -718,6 +551,29 @@ endif()
|
||||
file(GLOB CMAKES "cmake/*.cmake" "cmake/*.in")
|
||||
install(FILES ${CMAKES} DESTINATION ${CMAKE_ROOT}/Modules)
|
||||
|
||||
if(NOT PIP_FREERTOS)
|
||||
|
||||
# Auxiliary
|
||||
if (NOT CROSSTOOLS)
|
||||
add_subdirectory("utils/piterminal")
|
||||
endif()
|
||||
|
||||
# Utils
|
||||
add_subdirectory("utils/code_model_generator")
|
||||
add_subdirectory("utils/resources_compiler")
|
||||
add_subdirectory("utils/deploy_tool")
|
||||
if(PIP_UTILS AND (NOT CROSSTOOLS))
|
||||
add_subdirectory("utils/system_test")
|
||||
add_subdirectory("utils/udp_file_transfer")
|
||||
if(sodium_FOUND)
|
||||
add_subdirectory("utils/system_daemon")
|
||||
add_subdirectory("utils/crypt_tool")
|
||||
add_subdirectory("utils/cloud_dispatcher")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
|
||||
shstk_is_parent_exists(_pe)
|
||||
if (_pe)
|
||||
@@ -732,18 +588,18 @@ if ((NOT PIP_FREERTOS) AND (NOT CROSSTOOLS))
|
||||
find_package(Doxygen)
|
||||
if(DOXYGEN_FOUND)
|
||||
set(DOXY_DEFINES "${PIP_EXPORTS}")
|
||||
foreach (_m "console" "usb" "compress" "crypt" "client_server" "cloud" "fftw" "opencl" "io_utils" "lua" "http_server" "http_client")
|
||||
foreach (_m "console" "usb" "compress" "crypt" "cloud" "fftw" "opencl" "io_utils" "lua")
|
||||
string(TOUPPER "${_m}" _mdef)
|
||||
list(APPEND DOXY_DEFINES "PIP_${_mdef}_EXPORT")
|
||||
endforeach()
|
||||
set(DOXY_PROJECT_NUMBER "${PIP_VERSION}")
|
||||
set(DOXY_QHP_CUST_FILTER_ATTRS "\"PIP ${PIP_VERSION}\"")
|
||||
set(DOXY_QHP_SECT_FILTER_ATTRS "\"PIP ${PIP_VERSION}\"")
|
||||
set(DOXY_PROJECT_NUMBER "${pip_VERSION}")
|
||||
set(DOXY_QHP_CUST_FILTER_ATTRS "\"PIP ${pip_VERSION}\"")
|
||||
set(DOXY_QHP_SECT_FILTER_ATTRS "\"PIP ${pip_VERSION}\"")
|
||||
set(DOXY_EXAMPLE_PATH "\"${CMAKE_CURRENT_SOURCE_DIR}/doc/examples\"")
|
||||
set(DOXY_IMAGE_PATH "\"${CMAKE_CURRENT_SOURCE_DIR}/doc/images\"")
|
||||
set(DOXY_LOGO_PATH "\"${CMAKE_CURRENT_SOURCE_DIR}/doc/pip.png\"")
|
||||
set(DOXY_EXCLUDE "\"${CMAKE_CURRENT_SOURCE_DIR}/3rd\"")
|
||||
set(DOXY_DOMAIN "${PIP_DOMAIN}.${PROJECT_NAME}.doc")
|
||||
set(DOXY_EXCLUDE "\"${CMAKE_CURRENT_SOURCE_DIR}/libs/lua/3rd\"")
|
||||
set(DOXY_DOMAIN "${pip_DOMAIN}.${PROJECT_NAME}.doc")
|
||||
if ("x${DOC_LANG}" STREQUAL "x")
|
||||
set(DOXY_OUTPUT_LANGUAGE English)
|
||||
set(DOXY_OUTPUT_DIR en)
|
||||
@@ -788,11 +644,10 @@ macro(expand_to_length _out _str _len)
|
||||
endmacro()
|
||||
|
||||
list(REMOVE_ITEM LIBS_STATUS ${PIP_MODULES})
|
||||
list(REMOVE_DUPLICATES LIBS_STATUS)
|
||||
message("----------PIP----------")
|
||||
message(" Version: ${PIP_VERSION} ")
|
||||
message(" Linkage: ${PIP_LIB_TYPE_MSG}")
|
||||
message(" Type : ${CMAKE_BUILD_TYPE}")
|
||||
message(" Version: ${pip_VERSION} ")
|
||||
message(" Linkage: ${pip_LIB_TYPE_MSG}")
|
||||
message(" Type : ${pip_BUILD_TYPE}")
|
||||
if (NOT LOCAL)
|
||||
message(" Install: \"${CMAKE_INSTALL_PREFIX}\"")
|
||||
else()
|
||||
@@ -804,6 +659,7 @@ message("")
|
||||
message(" Options:")
|
||||
message(" std::iostream: ${PIP_STD_IOSTREAM}")
|
||||
message(" ICU strings : ${PIP_ICU}")
|
||||
message(" Timer types : ${PIP_TIMERS}")
|
||||
message(" Introspection: ${PIP_INTROSPECTION}")
|
||||
message(" Coverage : ${PIP_COVERAGE}")
|
||||
if(INTROSPECTION)
|
||||
|
||||
@@ -33,10 +33,10 @@ You should add ${<out_var>} to your target.
|
||||
|
||||
## Documentation
|
||||
|
||||
[🇺🇸 Online documentation](https://shstk.ru/pip/html/en/index.html)
|
||||
[🇺🇸 Online documentation](https://shs.tools/pip/html/en/index.html)
|
||||
|
||||
[🇺🇸 Qt-help](https://shstk.ru/pip/pip_en.qch)
|
||||
[🇺🇸 Qt-help](https://shs.tools/pip/pip_en.qch)
|
||||
|
||||
[🇷🇺 Онлайн документация](https://shstk.ru/pip/html/ru/index.html)
|
||||
[🇷🇺 Онлайн документация](https://shs.tools/pip/html/ru/index.html)
|
||||
|
||||
[🇷🇺 Qt-help](https://shstk.ru/pip/pip_ru.qch)
|
||||
[🇷🇺 Qt-help](https://shs.tools/pip/pip_ru.qch)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user