Compare commits
97 Commits
5755d172cd
...
pico_sdk
| Author | SHA1 | Date | |
|---|---|---|---|
| cf89d77981 | |||
| 4885623492 | |||
| 6c3c763934 | |||
| 4d841787fc | |||
| 790246afea | |||
| 978e350722 | |||
| 1d76cacae2 | |||
| db954ffdaa | |||
| 8d6ae976a3 | |||
| e767fae934 | |||
| 5db97ca959 | |||
| daab41e41e | |||
| a61c8477c7 | |||
| 788ad8f2c0 | |||
| 78afc179c4 | |||
| 69ec4c9837 | |||
| 2368de6e93 | |||
| e5df76ab1d | |||
| fdec0e66a8 | |||
| 5f3baa5580 | |||
| 7083b2c32b | |||
| af02684dc5 | |||
| 2806086558 | |||
| ce962bfb40 | |||
| dcdd7db33d | |||
| 3c72db2de8 | |||
| 53faaeb396 | |||
| 2928a690b8 | |||
| 220ce225f8 | |||
| ac89c499ab | |||
| 8d1c97da04 | |||
| aa140fd4ec | |||
| 08161c9aad | |||
| 40cda7d988 | |||
| c05b8b4095 | |||
| 3d7ba1dee6 | |||
| a299ada873 | |||
| 91144ad338 | |||
| ef8b785ac6 | |||
| 27f37c9cc1 | |||
| f2464ed76b | |||
| a3615c5666 | |||
| 1b3f72d429 | |||
| 04152f05a9 | |||
| d95944dcfc | |||
| a17644a953 | |||
| 3426a3064e | |||
| a41379a40e | |||
| 39266f8c3c | |||
| 3bcb778628 | |||
| 781b430c33 | |||
| 0ac7ea3096 | |||
| d6a0ae6106 | |||
| 3625afa783 | |||
| 154cbd0160 | |||
| b6c5d65a8d | |||
| d4254121b8 | |||
| da30ae558f | |||
| 7a6936ccd9 | |||
| 58acf7a823 | |||
| d9719a7a50 | |||
| bf63365370 | |||
| 64e142b8c6 | |||
| be2d3c197c | |||
| 30c4f215a2 | |||
| 6ffbbbe636 | |||
| d62599fb8e | |||
| 7e371132ae | |||
| 3504c25f84 | |||
| 68615035aa | |||
| 654c0847b2 | |||
| 91955d44fa | |||
| a2c24c9f07 | |||
| cf25cacc17 | |||
| 19daab173c | |||
| 19f8f522b3 | |||
| 905d39c07b | |||
| 9504792586 | |||
| 1b51d09c58 | |||
| ed3d4c4217 | |||
| 7b52f6d70d | |||
| 19fe33383a | |||
| e57719c118 | |||
| edb077b400 | |||
| bad51db0c8 | |||
| 87ab4c4c28 | |||
| 8f3456a650 | |||
| ce1aee1553 | |||
| dfdc4b8bdc | |||
| 8a61cfe7ef | |||
| 78f79d92aa | |||
| f09fe03e0d | |||
| 8e18ec15c9 | |||
| 4340f5444d | |||
| 8e96750046 | |||
| d01baffb0b | |||
| 82cda42e75 |
@@ -134,8 +134,8 @@ JavaScriptQuotes: Leave
|
||||
JavaScriptWrapImports: true
|
||||
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||
LambdaBodyIndentation: Signature
|
||||
MacroBlockBegin: "PRIVATE_DEFINITION_START|STATIC_INITIALIZER_BEGIN"
|
||||
MacroBlockEnd: "PRIVATE_DEFINITION_END|STATIC_INITIALIZER_END"
|
||||
MacroBlockBegin: "PRIVATE_DEFINITION_START|STATIC_INITIALIZER_BEGIN|DECLARE_UNIT_CLASS_BEGIN"
|
||||
MacroBlockEnd: "PRIVATE_DEFINITION_END|PRIVATE_DEFINITION_END_NO_INITIALIZE|STATIC_INITIALIZER_END|DECLARE_UNIT_CLASS_END"
|
||||
MaxEmptyLinesToKeep: 2
|
||||
NamespaceIndentation: None
|
||||
ObjCBinPackProtocolList: Auto
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -5,3 +5,4 @@ _unsused
|
||||
CMakeLists.txt.user*
|
||||
/include
|
||||
/release
|
||||
/build*
|
||||
|
||||
121
3rd/BLAKE2/COPYING
Normal file
121
3rd/BLAKE2/COPYING
Normal file
@@ -0,0 +1,121 @@
|
||||
Creative Commons Legal Code
|
||||
|
||||
CC0 1.0 Universal
|
||||
|
||||
CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
|
||||
LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
|
||||
ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
|
||||
INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
|
||||
REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
|
||||
PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
|
||||
THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
|
||||
HEREUNDER.
|
||||
|
||||
Statement of Purpose
|
||||
|
||||
The laws of most jurisdictions throughout the world automatically confer
|
||||
exclusive Copyright and Related Rights (defined below) upon the creator
|
||||
and subsequent owner(s) (each and all, an "owner") of an original work of
|
||||
authorship and/or a database (each, a "Work").
|
||||
|
||||
Certain owners wish to permanently relinquish those rights to a Work for
|
||||
the purpose of contributing to a commons of creative, cultural and
|
||||
scientific works ("Commons") that the public can reliably and without fear
|
||||
of later claims of infringement build upon, modify, incorporate in other
|
||||
works, reuse and redistribute as freely as possible in any form whatsoever
|
||||
and for any purposes, including without limitation commercial purposes.
|
||||
These owners may contribute to the Commons to promote the ideal of a free
|
||||
culture and the further production of creative, cultural and scientific
|
||||
works, or to gain reputation or greater distribution for their Work in
|
||||
part through the use and efforts of others.
|
||||
|
||||
For these and/or other purposes and motivations, and without any
|
||||
expectation of additional consideration or compensation, the person
|
||||
associating CC0 with a Work (the "Affirmer"), to the extent that he or she
|
||||
is an owner of Copyright and Related Rights in the Work, voluntarily
|
||||
elects to apply CC0 to the Work and publicly distribute the Work under its
|
||||
terms, with knowledge of his or her Copyright and Related Rights in the
|
||||
Work and the meaning and intended legal effect of CC0 on those rights.
|
||||
|
||||
1. Copyright and Related Rights. A Work made available under CC0 may be
|
||||
protected by copyright and related or neighboring rights ("Copyright and
|
||||
Related Rights"). Copyright and Related Rights include, but are not
|
||||
limited to, the following:
|
||||
|
||||
i. the right to reproduce, adapt, distribute, perform, display,
|
||||
communicate, and translate a Work;
|
||||
ii. moral rights retained by the original author(s) and/or performer(s);
|
||||
iii. publicity and privacy rights pertaining to a person's image or
|
||||
likeness depicted in a Work;
|
||||
iv. rights protecting against unfair competition in regards to a Work,
|
||||
subject to the limitations in paragraph 4(a), below;
|
||||
v. rights protecting the extraction, dissemination, use and reuse of data
|
||||
in a Work;
|
||||
vi. database rights (such as those arising under Directive 96/9/EC of the
|
||||
European Parliament and of the Council of 11 March 1996 on the legal
|
||||
protection of databases, and under any national implementation
|
||||
thereof, including any amended or successor version of such
|
||||
directive); and
|
||||
vii. other similar, equivalent or corresponding rights throughout the
|
||||
world based on applicable law or treaty, and any national
|
||||
implementations thereof.
|
||||
|
||||
2. Waiver. To the greatest extent permitted by, but not in contravention
|
||||
of, applicable law, Affirmer hereby overtly, fully, permanently,
|
||||
irrevocably and unconditionally waives, abandons, and surrenders all of
|
||||
Affirmer's Copyright and Related Rights and associated claims and causes
|
||||
of action, whether now known or unknown (including existing as well as
|
||||
future claims and causes of action), in the Work (i) in all territories
|
||||
worldwide, (ii) for the maximum duration provided by applicable law or
|
||||
treaty (including future time extensions), (iii) in any current or future
|
||||
medium and for any number of copies, and (iv) for any purpose whatsoever,
|
||||
including without limitation commercial, advertising or promotional
|
||||
purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
|
||||
member of the public at large and to the detriment of Affirmer's heirs and
|
||||
successors, fully intending that such Waiver shall not be subject to
|
||||
revocation, rescission, cancellation, termination, or any other legal or
|
||||
equitable action to disrupt the quiet enjoyment of the Work by the public
|
||||
as contemplated by Affirmer's express Statement of Purpose.
|
||||
|
||||
3. Public License Fallback. Should any part of the Waiver for any reason
|
||||
be judged legally invalid or ineffective under applicable law, then the
|
||||
Waiver shall be preserved to the maximum extent permitted taking into
|
||||
account Affirmer's express Statement of Purpose. In addition, to the
|
||||
extent the Waiver is so judged Affirmer hereby grants to each affected
|
||||
person a royalty-free, non transferable, non sublicensable, non exclusive,
|
||||
irrevocable and unconditional license to exercise Affirmer's Copyright and
|
||||
Related Rights in the Work (i) in all territories worldwide, (ii) for the
|
||||
maximum duration provided by applicable law or treaty (including future
|
||||
time extensions), (iii) in any current or future medium and for any number
|
||||
of copies, and (iv) for any purpose whatsoever, including without
|
||||
limitation commercial, advertising or promotional purposes (the
|
||||
"License"). The License shall be deemed effective as of the date CC0 was
|
||||
applied by Affirmer to the Work. Should any part of the License for any
|
||||
reason be judged legally invalid or ineffective under applicable law, such
|
||||
partial invalidity or ineffectiveness shall not invalidate the remainder
|
||||
of the License, and in such case Affirmer hereby affirms that he or she
|
||||
will not (i) exercise any of his or her remaining Copyright and Related
|
||||
Rights in the Work or (ii) assert any associated claims and causes of
|
||||
action with respect to the Work, in either case contrary to Affirmer's
|
||||
express Statement of Purpose.
|
||||
|
||||
4. Limitations and Disclaimers.
|
||||
|
||||
a. No trademark or patent rights held by Affirmer are waived, abandoned,
|
||||
surrendered, licensed or otherwise affected by this document.
|
||||
b. Affirmer offers the Work as-is and makes no representations or
|
||||
warranties of any kind concerning the Work, express, implied,
|
||||
statutory or otherwise, including without limitation warranties of
|
||||
title, merchantability, fitness for a particular purpose, non
|
||||
infringement, or the absence of latent or other defects, accuracy, or
|
||||
the present or absence of errors, whether or not discoverable, all to
|
||||
the greatest extent permissible under applicable law.
|
||||
c. Affirmer disclaims responsibility for clearing rights of other persons
|
||||
that may apply to the Work or any use thereof, including without
|
||||
limitation any person's Copyright and Related Rights in the Work.
|
||||
Further, Affirmer disclaims responsibility for obtaining any necessary
|
||||
consents, permissions or other rights required for any use of the
|
||||
Work.
|
||||
d. Affirmer understands and acknowledges that Creative Commons is not a
|
||||
party to this document and has no duty or obligation with respect to
|
||||
this CC0 or use of the Work.
|
||||
219
3rd/SipHash/LICENSE_A2LLVM
Normal file
219
3rd/SipHash/LICENSE_A2LLVM
Normal file
@@ -0,0 +1,219 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
|
||||
---- LLVM Exceptions to the Apache 2.0 License ----
|
||||
|
||||
As an exception, if, as a result of your compiling your source code, portions
|
||||
of this Software are embedded into an Object form of such source code, you
|
||||
may redistribute such embedded portions in such Object form without complying
|
||||
with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
|
||||
|
||||
In addition, if you combine or link compiled forms of this Software with
|
||||
software that is licensed under the GPLv2 ("Combined Software") and if a
|
||||
court of competent jurisdiction determines that the patent provision (Section
|
||||
3), the indemnity provision (Section 9) or other Section of the License
|
||||
conflicts with the conditions of the GPLv2, you may retroactively and
|
||||
prospectively choose to deem waived or otherwise exclude such Section(s) of
|
||||
the License, but only in their entirety and only with respect to the Combined
|
||||
Software.
|
||||
|
||||
116
3rd/SipHash/LICENSE_CC0
Normal file
116
3rd/SipHash/LICENSE_CC0
Normal file
@@ -0,0 +1,116 @@
|
||||
CC0 1.0 Universal
|
||||
|
||||
Statement of Purpose
|
||||
|
||||
The laws of most jurisdictions throughout the world automatically confer
|
||||
exclusive Copyright and Related Rights (defined below) upon the creator and
|
||||
subsequent owner(s) (each and all, an "owner") of an original work of
|
||||
authorship and/or a database (each, a "Work").
|
||||
|
||||
Certain owners wish to permanently relinquish those rights to a Work for the
|
||||
purpose of contributing to a commons of creative, cultural and scientific
|
||||
works ("Commons") that the public can reliably and without fear of later
|
||||
claims of infringement build upon, modify, incorporate in other works, reuse
|
||||
and redistribute as freely as possible in any form whatsoever and for any
|
||||
purposes, including without limitation commercial purposes. These owners may
|
||||
contribute to the Commons to promote the ideal of a free culture and the
|
||||
further production of creative, cultural and scientific works, or to gain
|
||||
reputation or greater distribution for their Work in part through the use and
|
||||
efforts of others.
|
||||
|
||||
For these and/or other purposes and motivations, and without any expectation
|
||||
of additional consideration or compensation, the person associating CC0 with a
|
||||
Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
|
||||
and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
|
||||
and publicly distribute the Work under its terms, with knowledge of his or her
|
||||
Copyright and Related Rights in the Work and the meaning and intended legal
|
||||
effect of CC0 on those rights.
|
||||
|
||||
1. Copyright and Related Rights. A Work made available under CC0 may be
|
||||
protected by copyright and related or neighboring rights ("Copyright and
|
||||
Related Rights"). Copyright and Related Rights include, but are not limited
|
||||
to, the following:
|
||||
|
||||
i. the right to reproduce, adapt, distribute, perform, display, communicate,
|
||||
and translate a Work;
|
||||
|
||||
ii. moral rights retained by the original author(s) and/or performer(s);
|
||||
|
||||
iii. publicity and privacy rights pertaining to a person's image or likeness
|
||||
depicted in a Work;
|
||||
|
||||
iv. rights protecting against unfair competition in regards to a Work,
|
||||
subject to the limitations in paragraph 4(a), below;
|
||||
|
||||
v. rights protecting the extraction, dissemination, use and reuse of data in
|
||||
a Work;
|
||||
|
||||
vi. database rights (such as those arising under Directive 96/9/EC of the
|
||||
European Parliament and of the Council of 11 March 1996 on the legal
|
||||
protection of databases, and under any national implementation thereof,
|
||||
including any amended or successor version of such directive); and
|
||||
|
||||
vii. other similar, equivalent or corresponding rights throughout the world
|
||||
based on applicable law or treaty, and any national implementations thereof.
|
||||
|
||||
2. Waiver. To the greatest extent permitted by, but not in contravention of,
|
||||
applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
|
||||
unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
|
||||
and Related Rights and associated claims and causes of action, whether now
|
||||
known or unknown (including existing as well as future claims and causes of
|
||||
action), in the Work (i) in all territories worldwide, (ii) for the maximum
|
||||
duration provided by applicable law or treaty (including future time
|
||||
extensions), (iii) in any current or future medium and for any number of
|
||||
copies, and (iv) for any purpose whatsoever, including without limitation
|
||||
commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
|
||||
the Waiver for the benefit of each member of the public at large and to the
|
||||
detriment of Affirmer's heirs and successors, fully intending that such Waiver
|
||||
shall not be subject to revocation, rescission, cancellation, termination, or
|
||||
any other legal or equitable action to disrupt the quiet enjoyment of the Work
|
||||
by the public as contemplated by Affirmer's express Statement of Purpose.
|
||||
|
||||
3. Public License Fallback. Should any part of the Waiver for any reason be
|
||||
judged legally invalid or ineffective under applicable law, then the Waiver
|
||||
shall be preserved to the maximum extent permitted taking into account
|
||||
Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
|
||||
is so judged Affirmer hereby grants to each affected person a royalty-free,
|
||||
non transferable, non sublicensable, non exclusive, irrevocable and
|
||||
unconditional license to exercise Affirmer's Copyright and Related Rights in
|
||||
the Work (i) in all territories worldwide, (ii) for the maximum duration
|
||||
provided by applicable law or treaty (including future time extensions), (iii)
|
||||
in any current or future medium and for any number of copies, and (iv) for any
|
||||
purpose whatsoever, including without limitation commercial, advertising or
|
||||
promotional purposes (the "License"). The License shall be deemed effective as
|
||||
of the date CC0 was applied by Affirmer to the Work. Should any part of the
|
||||
License for any reason be judged legally invalid or ineffective under
|
||||
applicable law, such partial invalidity or ineffectiveness shall not
|
||||
invalidate the remainder of the License, and in such case Affirmer hereby
|
||||
affirms that he or she will not (i) exercise any of his or her remaining
|
||||
Copyright and Related Rights in the Work or (ii) assert any associated claims
|
||||
and causes of action with respect to the Work, in either case contrary to
|
||||
Affirmer's express Statement of Purpose.
|
||||
|
||||
4. Limitations and Disclaimers.
|
||||
|
||||
a. No trademark or patent rights held by Affirmer are waived, abandoned,
|
||||
surrendered, licensed or otherwise affected by this document.
|
||||
|
||||
b. Affirmer offers the Work as-is and makes no representations or warranties
|
||||
of any kind concerning the Work, express, implied, statutory or otherwise,
|
||||
including without limitation warranties of title, merchantability, fitness
|
||||
for a particular purpose, non infringement, or the absence of latent or
|
||||
other defects, accuracy, or the present or absence of errors, whether or not
|
||||
discoverable, all to the greatest extent permissible under applicable law.
|
||||
|
||||
c. Affirmer disclaims responsibility for clearing rights of other persons
|
||||
that may apply to the Work or any use thereof, including without limitation
|
||||
any person's Copyright and Related Rights in the Work. Further, Affirmer
|
||||
disclaims responsibility for obtaining any necessary consents, permissions
|
||||
or other rights required for any use of the Work.
|
||||
|
||||
d. Affirmer understands and acknowledges that Creative Commons is not a
|
||||
party to this document and has no duty or obligation with respect to this
|
||||
CC0 or use of the Work.
|
||||
|
||||
For more information, please see
|
||||
<http://creativecommons.org/publicdomain/zero/1.0/>
|
||||
7
3rd/SipHash/LICENSE_MIT
Normal file
7
3rd/SipHash/LICENSE_MIT
Normal file
@@ -0,0 +1,7 @@
|
||||
Copyright 2012-2024 JP Aumasson
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
166
3rd/SipHash/halfsiphash.c
Normal file
166
3rd/SipHash/halfsiphash.c
Normal file
@@ -0,0 +1,166 @@
|
||||
|
||||
/*
|
||||
SipHash reference C implementation
|
||||
|
||||
Copyright (c) 2016 Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
|
||||
|
||||
To the extent possible under law, the author(s) have dedicated all copyright
|
||||
and related and neighboring rights to this software to the public domain
|
||||
worldwide. This software is distributed without any warranty.
|
||||
|
||||
You should have received a copy of the CC0 Public Domain Dedication along
|
||||
with
|
||||
this software. If not, see
|
||||
<http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
#include "halfsiphash.h"
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/* default: SipHash-2-4 */
|
||||
#ifndef cROUNDS
|
||||
#define cROUNDS 2
|
||||
#endif
|
||||
#ifndef dROUNDS
|
||||
#define dROUNDS 4
|
||||
#endif
|
||||
|
||||
#define ROTL(x, b) (uint32_t)(((x) << (b)) | ((x) >> (32 - (b))))
|
||||
|
||||
#define U32TO8_LE(p, v) \
|
||||
(p)[0] = (uint8_t)((v)); \
|
||||
(p)[1] = (uint8_t)((v) >> 8); \
|
||||
(p)[2] = (uint8_t)((v) >> 16); \
|
||||
(p)[3] = (uint8_t)((v) >> 24);
|
||||
|
||||
#define U8TO32_LE(p) \
|
||||
(((uint32_t)((p)[0])) | ((uint32_t)((p)[1]) << 8) | \
|
||||
((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24))
|
||||
|
||||
#define SIPROUND \
|
||||
do { \
|
||||
v0 += v1; \
|
||||
v1 = ROTL(v1, 5); \
|
||||
v1 ^= v0; \
|
||||
v0 = ROTL(v0, 16); \
|
||||
v2 += v3; \
|
||||
v3 = ROTL(v3, 8); \
|
||||
v3 ^= v2; \
|
||||
v0 += v3; \
|
||||
v3 = ROTL(v3, 7); \
|
||||
v3 ^= v0; \
|
||||
v2 += v1; \
|
||||
v1 = ROTL(v1, 13); \
|
||||
v1 ^= v2; \
|
||||
v2 = ROTL(v2, 16); \
|
||||
} while (0)
|
||||
|
||||
|
||||
#ifdef DEBUG_SIPHASH
|
||||
#include <stdio.h>
|
||||
|
||||
#define TRACE \
|
||||
do { \
|
||||
printf("(%3zu) v0 %08" PRIx32 "\n", inlen, v0); \
|
||||
printf("(%3zu) v1 %08" PRIx32 "\n", inlen, v1); \
|
||||
printf("(%3zu) v2 %08" PRIx32 "\n", inlen, v2); \
|
||||
printf("(%3zu) v3 %08" PRIx32 "\n", inlen, v3); \
|
||||
} while (0)
|
||||
#else
|
||||
#define TRACE
|
||||
#endif
|
||||
|
||||
/*
|
||||
Computes a SipHash value
|
||||
*in: pointer to input data (read-only)
|
||||
inlen: input data length in bytes (any size_t value)
|
||||
*k: pointer to the key data (read-only), must be 8 bytes
|
||||
*out: pointer to output data (write-only), outlen bytes must be allocated
|
||||
outlen: length of the output in bytes, must be 4 or 8
|
||||
*/
|
||||
int halfsiphash(const void *in, const size_t inlen, const void *k, uint8_t *out,
|
||||
const size_t outlen) {
|
||||
|
||||
const unsigned char *ni = (const unsigned char *)in;
|
||||
const unsigned char *kk = (const unsigned char *)k;
|
||||
|
||||
assert((outlen == 4) || (outlen == 8));
|
||||
uint32_t v0 = 0;
|
||||
uint32_t v1 = 0;
|
||||
uint32_t v2 = UINT32_C(0x6c796765);
|
||||
uint32_t v3 = UINT32_C(0x74656462);
|
||||
uint32_t k0 = U8TO32_LE(kk);
|
||||
uint32_t k1 = U8TO32_LE(kk + 4);
|
||||
uint32_t m;
|
||||
int i;
|
||||
const unsigned char *end = ni + inlen - (inlen % sizeof(uint32_t));
|
||||
const int left = inlen & 3;
|
||||
uint32_t b = ((uint32_t)inlen) << 24;
|
||||
v3 ^= k1;
|
||||
v2 ^= k0;
|
||||
v1 ^= k1;
|
||||
v0 ^= k0;
|
||||
|
||||
if (outlen == 8)
|
||||
v1 ^= 0xee;
|
||||
|
||||
for (; ni != end; ni += 4) {
|
||||
m = U8TO32_LE(ni);
|
||||
v3 ^= m;
|
||||
|
||||
TRACE;
|
||||
for (i = 0; i < cROUNDS; ++i)
|
||||
SIPROUND;
|
||||
|
||||
v0 ^= m;
|
||||
}
|
||||
|
||||
switch (left) {
|
||||
case 3:
|
||||
b |= ((uint32_t)ni[2]) << 16;
|
||||
/* FALLTHRU */
|
||||
case 2:
|
||||
b |= ((uint32_t)ni[1]) << 8;
|
||||
/* FALLTHRU */
|
||||
case 1:
|
||||
b |= ((uint32_t)ni[0]);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
|
||||
v3 ^= b;
|
||||
|
||||
TRACE;
|
||||
for (i = 0; i < cROUNDS; ++i)
|
||||
SIPROUND;
|
||||
|
||||
v0 ^= b;
|
||||
|
||||
if (outlen == 8)
|
||||
v2 ^= 0xee;
|
||||
else
|
||||
v2 ^= 0xff;
|
||||
|
||||
TRACE;
|
||||
for (i = 0; i < dROUNDS; ++i)
|
||||
SIPROUND;
|
||||
|
||||
b = v1 ^ v3;
|
||||
U32TO8_LE(out, b);
|
||||
|
||||
if (outlen == 4)
|
||||
return 0;
|
||||
|
||||
v1 ^= 0xdd;
|
||||
|
||||
TRACE;
|
||||
for (i = 0; i < dROUNDS; ++i)
|
||||
SIPROUND;
|
||||
|
||||
b = v1 ^ v3;
|
||||
U32TO8_LE(out + 4, b);
|
||||
|
||||
return 0;
|
||||
}
|
||||
34
3rd/SipHash/halfsiphash.h
Normal file
34
3rd/SipHash/halfsiphash.h
Normal file
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
SipHash reference C implementation
|
||||
|
||||
Copyright (c) 2012-2021 Jean-Philippe Aumasson
|
||||
<jeanphilippe.aumasson@gmail.com>
|
||||
Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>
|
||||
|
||||
To the extent possible under law, the author(s) have dedicated all copyright
|
||||
and related and neighboring rights to this software to the public domain
|
||||
worldwide. This software is distributed without any warranty.
|
||||
|
||||
You should have received a copy of the CC0 Public Domain Dedication along
|
||||
with
|
||||
this software. If not, see
|
||||
<http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
|
||||
#ifndef HALFSIPHASH_H
|
||||
#define HALFSIPHASH_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int halfsiphash(const void * in, const size_t inlen, const void * k, uint8_t * out, const size_t outlen);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
185
3rd/SipHash/siphash.c
Normal file
185
3rd/SipHash/siphash.c
Normal file
@@ -0,0 +1,185 @@
|
||||
/*
|
||||
SipHash reference C implementation
|
||||
|
||||
Copyright (c) 2012-2022 Jean-Philippe Aumasson
|
||||
<jeanphilippe.aumasson@gmail.com>
|
||||
Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>
|
||||
|
||||
To the extent possible under law, the author(s) have dedicated all copyright
|
||||
and related and neighboring rights to this software to the public domain
|
||||
worldwide. This software is distributed without any warranty.
|
||||
|
||||
You should have received a copy of the CC0 Public Domain Dedication along
|
||||
with
|
||||
this software. If not, see
|
||||
<http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
|
||||
#include "siphash.h"
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/* default: SipHash-2-4 */
|
||||
#ifndef cROUNDS
|
||||
#define cROUNDS 2
|
||||
#endif
|
||||
#ifndef dROUNDS
|
||||
#define dROUNDS 4
|
||||
#endif
|
||||
|
||||
#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
|
||||
|
||||
#define U32TO8_LE(p, v) \
|
||||
(p)[0] = (uint8_t)((v)); \
|
||||
(p)[1] = (uint8_t)((v) >> 8); \
|
||||
(p)[2] = (uint8_t)((v) >> 16); \
|
||||
(p)[3] = (uint8_t)((v) >> 24);
|
||||
|
||||
#define U64TO8_LE(p, v) \
|
||||
U32TO8_LE((p), (uint32_t)((v))); \
|
||||
U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));
|
||||
|
||||
#define U8TO64_LE(p) \
|
||||
(((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \
|
||||
((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \
|
||||
((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \
|
||||
((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))
|
||||
|
||||
#define SIPROUND \
|
||||
do { \
|
||||
v0 += v1; \
|
||||
v1 = ROTL(v1, 13); \
|
||||
v1 ^= v0; \
|
||||
v0 = ROTL(v0, 32); \
|
||||
v2 += v3; \
|
||||
v3 = ROTL(v3, 16); \
|
||||
v3 ^= v2; \
|
||||
v0 += v3; \
|
||||
v3 = ROTL(v3, 21); \
|
||||
v3 ^= v0; \
|
||||
v2 += v1; \
|
||||
v1 = ROTL(v1, 17); \
|
||||
v1 ^= v2; \
|
||||
v2 = ROTL(v2, 32); \
|
||||
} while (0)
|
||||
|
||||
#ifdef DEBUG_SIPHASH
|
||||
#include <stdio.h>
|
||||
|
||||
#define TRACE \
|
||||
do { \
|
||||
printf("(%3zu) v0 %016" PRIx64 "\n", inlen, v0); \
|
||||
printf("(%3zu) v1 %016" PRIx64 "\n", inlen, v1); \
|
||||
printf("(%3zu) v2 %016" PRIx64 "\n", inlen, v2); \
|
||||
printf("(%3zu) v3 %016" PRIx64 "\n", inlen, v3); \
|
||||
} while (0)
|
||||
#else
|
||||
#define TRACE
|
||||
#endif
|
||||
|
||||
/*
|
||||
Computes a SipHash value
|
||||
*in: pointer to input data (read-only)
|
||||
inlen: input data length in bytes (any size_t value)
|
||||
*k: pointer to the key data (read-only), must be 16 bytes
|
||||
*out: pointer to output data (write-only), outlen bytes must be allocated
|
||||
outlen: length of the output in bytes, must be 8 or 16
|
||||
*/
|
||||
int siphash(const void *in, const size_t inlen, const void *k, uint8_t *out,
|
||||
const size_t outlen) {
|
||||
|
||||
const unsigned char *ni = (const unsigned char *)in;
|
||||
const unsigned char *kk = (const unsigned char *)k;
|
||||
|
||||
assert((outlen == 8) || (outlen == 16));
|
||||
uint64_t v0 = UINT64_C(0x736f6d6570736575);
|
||||
uint64_t v1 = UINT64_C(0x646f72616e646f6d);
|
||||
uint64_t v2 = UINT64_C(0x6c7967656e657261);
|
||||
uint64_t v3 = UINT64_C(0x7465646279746573);
|
||||
uint64_t k0 = U8TO64_LE(kk);
|
||||
uint64_t k1 = U8TO64_LE(kk + 8);
|
||||
uint64_t m;
|
||||
int i;
|
||||
const unsigned char *end = ni + inlen - (inlen % sizeof(uint64_t));
|
||||
const int left = inlen & 7;
|
||||
uint64_t b = ((uint64_t)inlen) << 56;
|
||||
v3 ^= k1;
|
||||
v2 ^= k0;
|
||||
v1 ^= k1;
|
||||
v0 ^= k0;
|
||||
|
||||
if (outlen == 16)
|
||||
v1 ^= 0xee;
|
||||
|
||||
for (; ni != end; ni += 8) {
|
||||
m = U8TO64_LE(ni);
|
||||
v3 ^= m;
|
||||
|
||||
TRACE;
|
||||
for (i = 0; i < cROUNDS; ++i)
|
||||
SIPROUND;
|
||||
|
||||
v0 ^= m;
|
||||
}
|
||||
|
||||
switch (left) {
|
||||
case 7:
|
||||
b |= ((uint64_t)ni[6]) << 48;
|
||||
/* FALLTHRU */
|
||||
case 6:
|
||||
b |= ((uint64_t)ni[5]) << 40;
|
||||
/* FALLTHRU */
|
||||
case 5:
|
||||
b |= ((uint64_t)ni[4]) << 32;
|
||||
/* FALLTHRU */
|
||||
case 4:
|
||||
b |= ((uint64_t)ni[3]) << 24;
|
||||
/* FALLTHRU */
|
||||
case 3:
|
||||
b |= ((uint64_t)ni[2]) << 16;
|
||||
/* FALLTHRU */
|
||||
case 2:
|
||||
b |= ((uint64_t)ni[1]) << 8;
|
||||
/* FALLTHRU */
|
||||
case 1:
|
||||
b |= ((uint64_t)ni[0]);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
|
||||
v3 ^= b;
|
||||
|
||||
TRACE;
|
||||
for (i = 0; i < cROUNDS; ++i)
|
||||
SIPROUND;
|
||||
|
||||
v0 ^= b;
|
||||
|
||||
if (outlen == 16)
|
||||
v2 ^= 0xee;
|
||||
else
|
||||
v2 ^= 0xff;
|
||||
|
||||
TRACE;
|
||||
for (i = 0; i < dROUNDS; ++i)
|
||||
SIPROUND;
|
||||
|
||||
b = v0 ^ v1 ^ v2 ^ v3;
|
||||
U64TO8_LE(out, b);
|
||||
|
||||
if (outlen == 8)
|
||||
return 0;
|
||||
|
||||
v1 ^= 0xdd;
|
||||
|
||||
TRACE;
|
||||
for (i = 0; i < dROUNDS; ++i)
|
||||
SIPROUND;
|
||||
|
||||
b = v0 ^ v1 ^ v2 ^ v3;
|
||||
U64TO8_LE(out + 8, b);
|
||||
|
||||
return 0;
|
||||
}
|
||||
34
3rd/SipHash/siphash.h
Normal file
34
3rd/SipHash/siphash.h
Normal file
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
SipHash reference C implementation
|
||||
|
||||
Copyright (c) 2012-2021 Jean-Philippe Aumasson
|
||||
<jeanphilippe.aumasson@gmail.com>
|
||||
Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>
|
||||
|
||||
To the extent possible under law, the author(s) have dedicated all copyright
|
||||
and related and neighboring rights to this software to the public domain
|
||||
worldwide. This software is distributed without any warranty.
|
||||
|
||||
You should have received a copy of the CC0 Public Domain Dedication along
|
||||
with
|
||||
this software. If not, see
|
||||
<http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
|
||||
#ifndef SIPHASH_H
|
||||
#define SIPHASH_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int siphash(const void * in, const size_t inlen, const void * k, uint8_t * out, const size_t outlen);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
2826
3rd/SipHash/vectors.h
Normal file
2826
3rd/SipHash/vectors.h
Normal file
@@ -0,0 +1,2826 @@
|
||||
#include <stdint.h>
|
||||
|
||||
const uint8_t vectors_sip64[64][8] = {
|
||||
{
|
||||
0x31,
|
||||
0x0e,
|
||||
0x0e,
|
||||
0xdd,
|
||||
0x47,
|
||||
0xdb,
|
||||
0x6f,
|
||||
0x72,
|
||||
},
|
||||
{
|
||||
0xfd,
|
||||
0x67,
|
||||
0xdc,
|
||||
0x93,
|
||||
0xc5,
|
||||
0x39,
|
||||
0xf8,
|
||||
0x74,
|
||||
},
|
||||
{
|
||||
0x5a,
|
||||
0x4f,
|
||||
0xa9,
|
||||
0xd9,
|
||||
0x09,
|
||||
0x80,
|
||||
0x6c,
|
||||
0x0d,
|
||||
},
|
||||
{
|
||||
0x2d,
|
||||
0x7e,
|
||||
0xfb,
|
||||
0xd7,
|
||||
0x96,
|
||||
0x66,
|
||||
0x67,
|
||||
0x85,
|
||||
},
|
||||
{
|
||||
0xb7,
|
||||
0x87,
|
||||
0x71,
|
||||
0x27,
|
||||
0xe0,
|
||||
0x94,
|
||||
0x27,
|
||||
0xcf,
|
||||
},
|
||||
{
|
||||
0x8d,
|
||||
0xa6,
|
||||
0x99,
|
||||
0xcd,
|
||||
0x64,
|
||||
0x55,
|
||||
0x76,
|
||||
0x18,
|
||||
},
|
||||
{
|
||||
0xce,
|
||||
0xe3,
|
||||
0xfe,
|
||||
0x58,
|
||||
0x6e,
|
||||
0x46,
|
||||
0xc9,
|
||||
0xcb,
|
||||
},
|
||||
{
|
||||
0x37,
|
||||
0xd1,
|
||||
0x01,
|
||||
0x8b,
|
||||
0xf5,
|
||||
0x00,
|
||||
0x02,
|
||||
0xab,
|
||||
},
|
||||
{
|
||||
0x62,
|
||||
0x24,
|
||||
0x93,
|
||||
0x9a,
|
||||
0x79,
|
||||
0xf5,
|
||||
0xf5,
|
||||
0x93,
|
||||
},
|
||||
{
|
||||
0xb0,
|
||||
0xe4,
|
||||
0xa9,
|
||||
0x0b,
|
||||
0xdf,
|
||||
0x82,
|
||||
0x00,
|
||||
0x9e,
|
||||
},
|
||||
{
|
||||
0xf3,
|
||||
0xb9,
|
||||
0xdd,
|
||||
0x94,
|
||||
0xc5,
|
||||
0xbb,
|
||||
0x5d,
|
||||
0x7a,
|
||||
},
|
||||
{
|
||||
0xa7,
|
||||
0xad,
|
||||
0x6b,
|
||||
0x22,
|
||||
0x46,
|
||||
0x2f,
|
||||
0xb3,
|
||||
0xf4,
|
||||
},
|
||||
{
|
||||
0xfb,
|
||||
0xe5,
|
||||
0x0e,
|
||||
0x86,
|
||||
0xbc,
|
||||
0x8f,
|
||||
0x1e,
|
||||
0x75,
|
||||
},
|
||||
{
|
||||
0x90,
|
||||
0x3d,
|
||||
0x84,
|
||||
0xc0,
|
||||
0x27,
|
||||
0x56,
|
||||
0xea,
|
||||
0x14,
|
||||
},
|
||||
{
|
||||
0xee,
|
||||
0xf2,
|
||||
0x7a,
|
||||
0x8e,
|
||||
0x90,
|
||||
0xca,
|
||||
0x23,
|
||||
0xf7,
|
||||
},
|
||||
{
|
||||
0xe5,
|
||||
0x45,
|
||||
0xbe,
|
||||
0x49,
|
||||
0x61,
|
||||
0xca,
|
||||
0x29,
|
||||
0xa1,
|
||||
},
|
||||
{
|
||||
0xdb,
|
||||
0x9b,
|
||||
0xc2,
|
||||
0x57,
|
||||
0x7f,
|
||||
0xcc,
|
||||
0x2a,
|
||||
0x3f,
|
||||
},
|
||||
{
|
||||
0x94,
|
||||
0x47,
|
||||
0xbe,
|
||||
0x2c,
|
||||
0xf5,
|
||||
0xe9,
|
||||
0x9a,
|
||||
0x69,
|
||||
},
|
||||
{
|
||||
0x9c,
|
||||
0xd3,
|
||||
0x8d,
|
||||
0x96,
|
||||
0xf0,
|
||||
0xb3,
|
||||
0xc1,
|
||||
0x4b,
|
||||
},
|
||||
{
|
||||
0xbd,
|
||||
0x61,
|
||||
0x79,
|
||||
0xa7,
|
||||
0x1d,
|
||||
0xc9,
|
||||
0x6d,
|
||||
0xbb,
|
||||
},
|
||||
{
|
||||
0x98,
|
||||
0xee,
|
||||
0xa2,
|
||||
0x1a,
|
||||
0xf2,
|
||||
0x5c,
|
||||
0xd6,
|
||||
0xbe,
|
||||
},
|
||||
{
|
||||
0xc7,
|
||||
0x67,
|
||||
0x3b,
|
||||
0x2e,
|
||||
0xb0,
|
||||
0xcb,
|
||||
0xf2,
|
||||
0xd0,
|
||||
},
|
||||
{
|
||||
0x88,
|
||||
0x3e,
|
||||
0xa3,
|
||||
0xe3,
|
||||
0x95,
|
||||
0x67,
|
||||
0x53,
|
||||
0x93,
|
||||
},
|
||||
{
|
||||
0xc8,
|
||||
0xce,
|
||||
0x5c,
|
||||
0xcd,
|
||||
0x8c,
|
||||
0x03,
|
||||
0x0c,
|
||||
0xa8,
|
||||
},
|
||||
{
|
||||
0x94,
|
||||
0xaf,
|
||||
0x49,
|
||||
0xf6,
|
||||
0xc6,
|
||||
0x50,
|
||||
0xad,
|
||||
0xb8,
|
||||
},
|
||||
{
|
||||
0xea,
|
||||
0xb8,
|
||||
0x85,
|
||||
0x8a,
|
||||
0xde,
|
||||
0x92,
|
||||
0xe1,
|
||||
0xbc,
|
||||
},
|
||||
{
|
||||
0xf3,
|
||||
0x15,
|
||||
0xbb,
|
||||
0x5b,
|
||||
0xb8,
|
||||
0x35,
|
||||
0xd8,
|
||||
0x17,
|
||||
},
|
||||
{
|
||||
0xad,
|
||||
0xcf,
|
||||
0x6b,
|
||||
0x07,
|
||||
0x63,
|
||||
0x61,
|
||||
0x2e,
|
||||
0x2f,
|
||||
},
|
||||
{
|
||||
0xa5,
|
||||
0xc9,
|
||||
0x1d,
|
||||
0xa7,
|
||||
0xac,
|
||||
0xaa,
|
||||
0x4d,
|
||||
0xde,
|
||||
},
|
||||
{
|
||||
0x71,
|
||||
0x65,
|
||||
0x95,
|
||||
0x87,
|
||||
0x66,
|
||||
0x50,
|
||||
0xa2,
|
||||
0xa6,
|
||||
},
|
||||
{
|
||||
0x28,
|
||||
0xef,
|
||||
0x49,
|
||||
0x5c,
|
||||
0x53,
|
||||
0xa3,
|
||||
0x87,
|
||||
0xad,
|
||||
},
|
||||
{
|
||||
0x42,
|
||||
0xc3,
|
||||
0x41,
|
||||
0xd8,
|
||||
0xfa,
|
||||
0x92,
|
||||
0xd8,
|
||||
0x32,
|
||||
},
|
||||
{
|
||||
0xce,
|
||||
0x7c,
|
||||
0xf2,
|
||||
0x72,
|
||||
0x2f,
|
||||
0x51,
|
||||
0x27,
|
||||
0x71,
|
||||
},
|
||||
{
|
||||
0xe3,
|
||||
0x78,
|
||||
0x59,
|
||||
0xf9,
|
||||
0x46,
|
||||
0x23,
|
||||
0xf3,
|
||||
0xa7,
|
||||
},
|
||||
{
|
||||
0x38,
|
||||
0x12,
|
||||
0x05,
|
||||
0xbb,
|
||||
0x1a,
|
||||
0xb0,
|
||||
0xe0,
|
||||
0x12,
|
||||
},
|
||||
{
|
||||
0xae,
|
||||
0x97,
|
||||
0xa1,
|
||||
0x0f,
|
||||
0xd4,
|
||||
0x34,
|
||||
0xe0,
|
||||
0x15,
|
||||
},
|
||||
{
|
||||
0xb4,
|
||||
0xa3,
|
||||
0x15,
|
||||
0x08,
|
||||
0xbe,
|
||||
0xff,
|
||||
0x4d,
|
||||
0x31,
|
||||
},
|
||||
{
|
||||
0x81,
|
||||
0x39,
|
||||
0x62,
|
||||
0x29,
|
||||
0xf0,
|
||||
0x90,
|
||||
0x79,
|
||||
0x02,
|
||||
},
|
||||
{
|
||||
0x4d,
|
||||
0x0c,
|
||||
0xf4,
|
||||
0x9e,
|
||||
0xe5,
|
||||
0xd4,
|
||||
0xdc,
|
||||
0xca,
|
||||
},
|
||||
{
|
||||
0x5c,
|
||||
0x73,
|
||||
0x33,
|
||||
0x6a,
|
||||
0x76,
|
||||
0xd8,
|
||||
0xbf,
|
||||
0x9a,
|
||||
},
|
||||
{
|
||||
0xd0,
|
||||
0xa7,
|
||||
0x04,
|
||||
0x53,
|
||||
0x6b,
|
||||
0xa9,
|
||||
0x3e,
|
||||
0x0e,
|
||||
},
|
||||
{
|
||||
0x92,
|
||||
0x59,
|
||||
0x58,
|
||||
0xfc,
|
||||
0xd6,
|
||||
0x42,
|
||||
0x0c,
|
||||
0xad,
|
||||
},
|
||||
{
|
||||
0xa9,
|
||||
0x15,
|
||||
0xc2,
|
||||
0x9b,
|
||||
0xc8,
|
||||
0x06,
|
||||
0x73,
|
||||
0x18,
|
||||
},
|
||||
{
|
||||
0x95,
|
||||
0x2b,
|
||||
0x79,
|
||||
0xf3,
|
||||
0xbc,
|
||||
0x0a,
|
||||
0xa6,
|
||||
0xd4,
|
||||
},
|
||||
{
|
||||
0xf2,
|
||||
0x1d,
|
||||
0xf2,
|
||||
0xe4,
|
||||
0x1d,
|
||||
0x45,
|
||||
0x35,
|
||||
0xf9,
|
||||
},
|
||||
{
|
||||
0x87,
|
||||
0x57,
|
||||
0x75,
|
||||
0x19,
|
||||
0x04,
|
||||
0x8f,
|
||||
0x53,
|
||||
0xa9,
|
||||
},
|
||||
{
|
||||
0x10,
|
||||
0xa5,
|
||||
0x6c,
|
||||
0xf5,
|
||||
0xdf,
|
||||
0xcd,
|
||||
0x9a,
|
||||
0xdb,
|
||||
},
|
||||
{
|
||||
0xeb,
|
||||
0x75,
|
||||
0x09,
|
||||
0x5c,
|
||||
0xcd,
|
||||
0x98,
|
||||
0x6c,
|
||||
0xd0,
|
||||
},
|
||||
{
|
||||
0x51,
|
||||
0xa9,
|
||||
0xcb,
|
||||
0x9e,
|
||||
0xcb,
|
||||
0xa3,
|
||||
0x12,
|
||||
0xe6,
|
||||
},
|
||||
{
|
||||
0x96,
|
||||
0xaf,
|
||||
0xad,
|
||||
0xfc,
|
||||
0x2c,
|
||||
0xe6,
|
||||
0x66,
|
||||
0xc7,
|
||||
},
|
||||
{
|
||||
0x72,
|
||||
0xfe,
|
||||
0x52,
|
||||
0x97,
|
||||
0x5a,
|
||||
0x43,
|
||||
0x64,
|
||||
0xee,
|
||||
},
|
||||
{
|
||||
0x5a,
|
||||
0x16,
|
||||
0x45,
|
||||
0xb2,
|
||||
0x76,
|
||||
0xd5,
|
||||
0x92,
|
||||
0xa1,
|
||||
},
|
||||
{
|
||||
0xb2,
|
||||
0x74,
|
||||
0xcb,
|
||||
0x8e,
|
||||
0xbf,
|
||||
0x87,
|
||||
0x87,
|
||||
0x0a,
|
||||
},
|
||||
{
|
||||
0x6f,
|
||||
0x9b,
|
||||
0xb4,
|
||||
0x20,
|
||||
0x3d,
|
||||
0xe7,
|
||||
0xb3,
|
||||
0x81,
|
||||
},
|
||||
{
|
||||
0xea,
|
||||
0xec,
|
||||
0xb2,
|
||||
0xa3,
|
||||
0x0b,
|
||||
0x22,
|
||||
0xa8,
|
||||
0x7f,
|
||||
},
|
||||
{
|
||||
0x99,
|
||||
0x24,
|
||||
0xa4,
|
||||
0x3c,
|
||||
0xc1,
|
||||
0x31,
|
||||
0x57,
|
||||
0x24,
|
||||
},
|
||||
{
|
||||
0xbd,
|
||||
0x83,
|
||||
0x8d,
|
||||
0x3a,
|
||||
0xaf,
|
||||
0xbf,
|
||||
0x8d,
|
||||
0xb7,
|
||||
},
|
||||
{
|
||||
0x0b,
|
||||
0x1a,
|
||||
0x2a,
|
||||
0x32,
|
||||
0x65,
|
||||
0xd5,
|
||||
0x1a,
|
||||
0xea,
|
||||
},
|
||||
{
|
||||
0x13,
|
||||
0x50,
|
||||
0x79,
|
||||
0xa3,
|
||||
0x23,
|
||||
0x1c,
|
||||
0xe6,
|
||||
0x60,
|
||||
},
|
||||
{
|
||||
0x93,
|
||||
0x2b,
|
||||
0x28,
|
||||
0x46,
|
||||
0xe4,
|
||||
0xd7,
|
||||
0x06,
|
||||
0x66,
|
||||
},
|
||||
{
|
||||
0xe1,
|
||||
0x91,
|
||||
0x5f,
|
||||
0x5c,
|
||||
0xb1,
|
||||
0xec,
|
||||
0xa4,
|
||||
0x6c,
|
||||
},
|
||||
{
|
||||
0xf3,
|
||||
0x25,
|
||||
0x96,
|
||||
0x5c,
|
||||
0xa1,
|
||||
0x6d,
|
||||
0x62,
|
||||
0x9f,
|
||||
},
|
||||
{
|
||||
0x57,
|
||||
0x5f,
|
||||
0xf2,
|
||||
0x8e,
|
||||
0x60,
|
||||
0x38,
|
||||
0x1b,
|
||||
0xe5,
|
||||
},
|
||||
{
|
||||
0x72,
|
||||
0x45,
|
||||
0x06,
|
||||
0xeb,
|
||||
0x4c,
|
||||
0x32,
|
||||
0x8a,
|
||||
0x95,
|
||||
},
|
||||
};
|
||||
const uint8_t vectors_sip128[64][16] = {
|
||||
{
|
||||
0xa3,
|
||||
0x81,
|
||||
0x7f,
|
||||
0x04,
|
||||
0xba,
|
||||
0x25,
|
||||
0xa8,
|
||||
0xe6,
|
||||
0x6d,
|
||||
0xf6,
|
||||
0x72,
|
||||
0x14,
|
||||
0xc7,
|
||||
0x55,
|
||||
0x02,
|
||||
0x93,
|
||||
},
|
||||
{
|
||||
0xda,
|
||||
0x87,
|
||||
0xc1,
|
||||
0xd8,
|
||||
0x6b,
|
||||
0x99,
|
||||
0xaf,
|
||||
0x44,
|
||||
0x34,
|
||||
0x76,
|
||||
0x59,
|
||||
0x11,
|
||||
0x9b,
|
||||
0x22,
|
||||
0xfc,
|
||||
0x45,
|
||||
},
|
||||
{
|
||||
0x81,
|
||||
0x77,
|
||||
0x22,
|
||||
0x8d,
|
||||
0xa4,
|
||||
0xa4,
|
||||
0x5d,
|
||||
0xc7,
|
||||
0xfc,
|
||||
0xa3,
|
||||
0x8b,
|
||||
0xde,
|
||||
0xf6,
|
||||
0x0a,
|
||||
0xff,
|
||||
0xe4,
|
||||
},
|
||||
{
|
||||
0x9c,
|
||||
0x70,
|
||||
0xb6,
|
||||
0x0c,
|
||||
0x52,
|
||||
0x67,
|
||||
0xa9,
|
||||
0x4e,
|
||||
0x5f,
|
||||
0x33,
|
||||
0xb6,
|
||||
0xb0,
|
||||
0x29,
|
||||
0x85,
|
||||
0xed,
|
||||
0x51,
|
||||
},
|
||||
{
|
||||
0xf8,
|
||||
0x81,
|
||||
0x64,
|
||||
0xc1,
|
||||
0x2d,
|
||||
0x9c,
|
||||
0x8f,
|
||||
0xaf,
|
||||
0x7d,
|
||||
0x0f,
|
||||
0x6e,
|
||||
0x7c,
|
||||
0x7b,
|
||||
0xcd,
|
||||
0x55,
|
||||
0x79,
|
||||
},
|
||||
{
|
||||
0x13,
|
||||
0x68,
|
||||
0x87,
|
||||
0x59,
|
||||
0x80,
|
||||
0x77,
|
||||
0x6f,
|
||||
0x88,
|
||||
0x54,
|
||||
0x52,
|
||||
0x7a,
|
||||
0x07,
|
||||
0x69,
|
||||
0x0e,
|
||||
0x96,
|
||||
0x27,
|
||||
},
|
||||
{
|
||||
0x14,
|
||||
0xee,
|
||||
0xca,
|
||||
0x33,
|
||||
0x8b,
|
||||
0x20,
|
||||
0x86,
|
||||
0x13,
|
||||
0x48,
|
||||
0x5e,
|
||||
0xa0,
|
||||
0x30,
|
||||
0x8f,
|
||||
0xd7,
|
||||
0xa1,
|
||||
0x5e,
|
||||
},
|
||||
{
|
||||
0xa1,
|
||||
0xf1,
|
||||
0xeb,
|
||||
0xbe,
|
||||
0xd8,
|
||||
0xdb,
|
||||
0xc1,
|
||||
0x53,
|
||||
0xc0,
|
||||
0xb8,
|
||||
0x4a,
|
||||
0xa6,
|
||||
0x1f,
|
||||
0xf0,
|
||||
0x82,
|
||||
0x39,
|
||||
},
|
||||
{
|
||||
0x3b,
|
||||
0x62,
|
||||
0xa9,
|
||||
0xba,
|
||||
0x62,
|
||||
0x58,
|
||||
0xf5,
|
||||
0x61,
|
||||
0x0f,
|
||||
0x83,
|
||||
0xe2,
|
||||
0x64,
|
||||
0xf3,
|
||||
0x14,
|
||||
0x97,
|
||||
0xb4,
|
||||
},
|
||||
{
|
||||
0x26,
|
||||
0x44,
|
||||
0x99,
|
||||
0x06,
|
||||
0x0a,
|
||||
0xd9,
|
||||
0xba,
|
||||
0xab,
|
||||
0xc4,
|
||||
0x7f,
|
||||
0x8b,
|
||||
0x02,
|
||||
0xbb,
|
||||
0x6d,
|
||||
0x71,
|
||||
0xed,
|
||||
},
|
||||
{
|
||||
0x00,
|
||||
0x11,
|
||||
0x0d,
|
||||
0xc3,
|
||||
0x78,
|
||||
0x14,
|
||||
0x69,
|
||||
0x56,
|
||||
0xc9,
|
||||
0x54,
|
||||
0x47,
|
||||
0xd3,
|
||||
0xf3,
|
||||
0xd0,
|
||||
0xfb,
|
||||
0xba,
|
||||
},
|
||||
{
|
||||
0x01,
|
||||
0x51,
|
||||
0xc5,
|
||||
0x68,
|
||||
0x38,
|
||||
0x6b,
|
||||
0x66,
|
||||
0x77,
|
||||
0xa2,
|
||||
0xb4,
|
||||
0xdc,
|
||||
0x6f,
|
||||
0x81,
|
||||
0xe5,
|
||||
0xdc,
|
||||
0x18,
|
||||
},
|
||||
{
|
||||
0xd6,
|
||||
0x26,
|
||||
0xb2,
|
||||
0x66,
|
||||
0x90,
|
||||
0x5e,
|
||||
0xf3,
|
||||
0x58,
|
||||
0x82,
|
||||
0x63,
|
||||
0x4d,
|
||||
0xf6,
|
||||
0x85,
|
||||
0x32,
|
||||
0xc1,
|
||||
0x25,
|
||||
},
|
||||
{
|
||||
0x98,
|
||||
0x69,
|
||||
0xe2,
|
||||
0x47,
|
||||
0xe9,
|
||||
0xc0,
|
||||
0x8b,
|
||||
0x10,
|
||||
0xd0,
|
||||
0x29,
|
||||
0x93,
|
||||
0x4f,
|
||||
0xc4,
|
||||
0xb9,
|
||||
0x52,
|
||||
0xf7,
|
||||
},
|
||||
{
|
||||
0x31,
|
||||
0xfc,
|
||||
0xef,
|
||||
0xac,
|
||||
0x66,
|
||||
0xd7,
|
||||
0xde,
|
||||
0x9c,
|
||||
0x7e,
|
||||
0xc7,
|
||||
0x48,
|
||||
0x5f,
|
||||
0xe4,
|
||||
0x49,
|
||||
0x49,
|
||||
0x02,
|
||||
},
|
||||
{
|
||||
0x54,
|
||||
0x93,
|
||||
0xe9,
|
||||
0x99,
|
||||
0x33,
|
||||
0xb0,
|
||||
0xa8,
|
||||
0x11,
|
||||
0x7e,
|
||||
0x08,
|
||||
0xec,
|
||||
0x0f,
|
||||
0x97,
|
||||
0xcf,
|
||||
0xc3,
|
||||
0xd9,
|
||||
},
|
||||
{
|
||||
0x6e,
|
||||
0xe2,
|
||||
0xa4,
|
||||
0xca,
|
||||
0x67,
|
||||
0xb0,
|
||||
0x54,
|
||||
0xbb,
|
||||
0xfd,
|
||||
0x33,
|
||||
0x15,
|
||||
0xbf,
|
||||
0x85,
|
||||
0x23,
|
||||
0x05,
|
||||
0x77,
|
||||
},
|
||||
{
|
||||
0x47,
|
||||
0x3d,
|
||||
0x06,
|
||||
0xe8,
|
||||
0x73,
|
||||
0x8d,
|
||||
0xb8,
|
||||
0x98,
|
||||
0x54,
|
||||
0xc0,
|
||||
0x66,
|
||||
0xc4,
|
||||
0x7a,
|
||||
0xe4,
|
||||
0x77,
|
||||
0x40,
|
||||
},
|
||||
{
|
||||
0xa4,
|
||||
0x26,
|
||||
0xe5,
|
||||
0xe4,
|
||||
0x23,
|
||||
0xbf,
|
||||
0x48,
|
||||
0x85,
|
||||
0x29,
|
||||
0x4d,
|
||||
0xa4,
|
||||
0x81,
|
||||
0xfe,
|
||||
0xae,
|
||||
0xf7,
|
||||
0x23,
|
||||
},
|
||||
{
|
||||
0x78,
|
||||
0x01,
|
||||
0x77,
|
||||
0x31,
|
||||
0xcf,
|
||||
0x65,
|
||||
0xfa,
|
||||
0xb0,
|
||||
0x74,
|
||||
0xd5,
|
||||
0x20,
|
||||
0x89,
|
||||
0x52,
|
||||
0x51,
|
||||
0x2e,
|
||||
0xb1,
|
||||
},
|
||||
{
|
||||
0x9e,
|
||||
0x25,
|
||||
0xfc,
|
||||
0x83,
|
||||
0x3f,
|
||||
0x22,
|
||||
0x90,
|
||||
0x73,
|
||||
0x3e,
|
||||
0x93,
|
||||
0x44,
|
||||
0xa5,
|
||||
0xe8,
|
||||
0x38,
|
||||
0x39,
|
||||
0xeb,
|
||||
},
|
||||
{
|
||||
0x56,
|
||||
0x8e,
|
||||
0x49,
|
||||
0x5a,
|
||||
0xbe,
|
||||
0x52,
|
||||
0x5a,
|
||||
0x21,
|
||||
0x8a,
|
||||
0x22,
|
||||
0x14,
|
||||
0xcd,
|
||||
0x3e,
|
||||
0x07,
|
||||
0x1d,
|
||||
0x12,
|
||||
},
|
||||
{
|
||||
0x4a,
|
||||
0x29,
|
||||
0xb5,
|
||||
0x45,
|
||||
0x52,
|
||||
0xd1,
|
||||
0x6b,
|
||||
0x9a,
|
||||
0x46,
|
||||
0x9c,
|
||||
0x10,
|
||||
0x52,
|
||||
0x8e,
|
||||
0xff,
|
||||
0x0a,
|
||||
0xae,
|
||||
},
|
||||
{
|
||||
0xc9,
|
||||
0xd1,
|
||||
0x84,
|
||||
0xdd,
|
||||
0xd5,
|
||||
0xa9,
|
||||
0xf5,
|
||||
0xe0,
|
||||
0xcf,
|
||||
0x8c,
|
||||
0xe2,
|
||||
0x9a,
|
||||
0x9a,
|
||||
0xbf,
|
||||
0x69,
|
||||
0x1c,
|
||||
},
|
||||
{
|
||||
0x2d,
|
||||
0xb4,
|
||||
0x79,
|
||||
0xae,
|
||||
0x78,
|
||||
0xbd,
|
||||
0x50,
|
||||
0xd8,
|
||||
0x88,
|
||||
0x2a,
|
||||
0x8a,
|
||||
0x17,
|
||||
0x8a,
|
||||
0x61,
|
||||
0x32,
|
||||
0xad,
|
||||
},
|
||||
{
|
||||
0x8e,
|
||||
0xce,
|
||||
0x5f,
|
||||
0x04,
|
||||
0x2d,
|
||||
0x5e,
|
||||
0x44,
|
||||
0x7b,
|
||||
0x50,
|
||||
0x51,
|
||||
0xb9,
|
||||
0xea,
|
||||
0xcb,
|
||||
0x8d,
|
||||
0x8f,
|
||||
0x6f,
|
||||
},
|
||||
{
|
||||
0x9c,
|
||||
0x0b,
|
||||
0x53,
|
||||
0xb4,
|
||||
0xb3,
|
||||
0xc3,
|
||||
0x07,
|
||||
0xe8,
|
||||
0x7e,
|
||||
0xae,
|
||||
0xe0,
|
||||
0x86,
|
||||
0x78,
|
||||
0x14,
|
||||
0x1f,
|
||||
0x66,
|
||||
},
|
||||
{
|
||||
0xab,
|
||||
0xf2,
|
||||
0x48,
|
||||
0xaf,
|
||||
0x69,
|
||||
0xa6,
|
||||
0xea,
|
||||
0xe4,
|
||||
0xbf,
|
||||
0xd3,
|
||||
0xeb,
|
||||
0x2f,
|
||||
0x12,
|
||||
0x9e,
|
||||
0xeb,
|
||||
0x94,
|
||||
},
|
||||
{
|
||||
0x06,
|
||||
0x64,
|
||||
0xda,
|
||||
0x16,
|
||||
0x68,
|
||||
0x57,
|
||||
0x4b,
|
||||
0x88,
|
||||
0xb9,
|
||||
0x35,
|
||||
0xf3,
|
||||
0x02,
|
||||
0x73,
|
||||
0x58,
|
||||
0xae,
|
||||
0xf4,
|
||||
},
|
||||
{
|
||||
0xaa,
|
||||
0x4b,
|
||||
0x9d,
|
||||
0xc4,
|
||||
0xbf,
|
||||
0x33,
|
||||
0x7d,
|
||||
0xe9,
|
||||
0x0c,
|
||||
0xd4,
|
||||
0xfd,
|
||||
0x3c,
|
||||
0x46,
|
||||
0x7c,
|
||||
0x6a,
|
||||
0xb7,
|
||||
},
|
||||
{
|
||||
0xea,
|
||||
0x5c,
|
||||
0x7f,
|
||||
0x47,
|
||||
0x1f,
|
||||
0xaf,
|
||||
0x6b,
|
||||
0xde,
|
||||
0x2b,
|
||||
0x1a,
|
||||
0xd7,
|
||||
0xd4,
|
||||
0x68,
|
||||
0x6d,
|
||||
0x22,
|
||||
0x87,
|
||||
},
|
||||
{
|
||||
0x29,
|
||||
0x39,
|
||||
0xb0,
|
||||
0x18,
|
||||
0x32,
|
||||
0x23,
|
||||
0xfa,
|
||||
0xfc,
|
||||
0x17,
|
||||
0x23,
|
||||
0xde,
|
||||
0x4f,
|
||||
0x52,
|
||||
0xc4,
|
||||
0x3d,
|
||||
0x35,
|
||||
},
|
||||
{
|
||||
0x7c,
|
||||
0x39,
|
||||
0x56,
|
||||
0xca,
|
||||
0x5e,
|
||||
0xea,
|
||||
0xfc,
|
||||
0x3e,
|
||||
0x36,
|
||||
0x3e,
|
||||
0x9d,
|
||||
0x55,
|
||||
0x65,
|
||||
0x46,
|
||||
0xeb,
|
||||
0x68,
|
||||
},
|
||||
{
|
||||
0x77,
|
||||
0xc6,
|
||||
0x07,
|
||||
0x71,
|
||||
0x46,
|
||||
0xf0,
|
||||
0x1c,
|
||||
0x32,
|
||||
0xb6,
|
||||
0xb6,
|
||||
0x9d,
|
||||
0x5f,
|
||||
0x4e,
|
||||
0xa9,
|
||||
0xff,
|
||||
0xcf,
|
||||
},
|
||||
{
|
||||
0x37,
|
||||
0xa6,
|
||||
0x98,
|
||||
0x6c,
|
||||
0xb8,
|
||||
0x84,
|
||||
0x7e,
|
||||
0xdf,
|
||||
0x09,
|
||||
0x25,
|
||||
0xf0,
|
||||
0xf1,
|
||||
0x30,
|
||||
0x9b,
|
||||
0x54,
|
||||
0xde,
|
||||
},
|
||||
{
|
||||
0xa7,
|
||||
0x05,
|
||||
0xf0,
|
||||
0xe6,
|
||||
0x9d,
|
||||
0xa9,
|
||||
0xa8,
|
||||
0xf9,
|
||||
0x07,
|
||||
0x24,
|
||||
0x1a,
|
||||
0x2e,
|
||||
0x92,
|
||||
0x3c,
|
||||
0x8c,
|
||||
0xc8,
|
||||
},
|
||||
{
|
||||
0x3d,
|
||||
0xc4,
|
||||
0x7d,
|
||||
0x1f,
|
||||
0x29,
|
||||
0xc4,
|
||||
0x48,
|
||||
0x46,
|
||||
0x1e,
|
||||
0x9e,
|
||||
0x76,
|
||||
0xed,
|
||||
0x90,
|
||||
0x4f,
|
||||
0x67,
|
||||
0x11,
|
||||
},
|
||||
{
|
||||
0x0d,
|
||||
0x62,
|
||||
0xbf,
|
||||
0x01,
|
||||
0xe6,
|
||||
0xfc,
|
||||
0x0e,
|
||||
0x1a,
|
||||
0x0d,
|
||||
0x3c,
|
||||
0x47,
|
||||
0x51,
|
||||
0xc5,
|
||||
0xd3,
|
||||
0x69,
|
||||
0x2b,
|
||||
},
|
||||
{
|
||||
0x8c,
|
||||
0x03,
|
||||
0x46,
|
||||
0x8b,
|
||||
0xca,
|
||||
0x7c,
|
||||
0x66,
|
||||
0x9e,
|
||||
0xe4,
|
||||
0xfd,
|
||||
0x5e,
|
||||
0x08,
|
||||
0x4b,
|
||||
0xbe,
|
||||
0xe7,
|
||||
0xb5,
|
||||
},
|
||||
{
|
||||
0x52,
|
||||
0x8a,
|
||||
0x5b,
|
||||
0xb9,
|
||||
0x3b,
|
||||
0xaf,
|
||||
0x2c,
|
||||
0x9c,
|
||||
0x44,
|
||||
0x73,
|
||||
0xcc,
|
||||
0xe5,
|
||||
0xd0,
|
||||
0xd2,
|
||||
0x2b,
|
||||
0xd9,
|
||||
},
|
||||
{
|
||||
0xdf,
|
||||
0x6a,
|
||||
0x30,
|
||||
0x1e,
|
||||
0x95,
|
||||
0xc9,
|
||||
0x5d,
|
||||
0xad,
|
||||
0x97,
|
||||
0xae,
|
||||
0x0c,
|
||||
0xc8,
|
||||
0xc6,
|
||||
0x91,
|
||||
0x3b,
|
||||
0xd8,
|
||||
},
|
||||
{
|
||||
0x80,
|
||||
0x11,
|
||||
0x89,
|
||||
0x90,
|
||||
0x2c,
|
||||
0x85,
|
||||
0x7f,
|
||||
0x39,
|
||||
0xe7,
|
||||
0x35,
|
||||
0x91,
|
||||
0x28,
|
||||
0x5e,
|
||||
0x70,
|
||||
0xb6,
|
||||
0xdb,
|
||||
},
|
||||
{
|
||||
0xe6,
|
||||
0x17,
|
||||
0x34,
|
||||
0x6a,
|
||||
0xc9,
|
||||
0xc2,
|
||||
0x31,
|
||||
0xbb,
|
||||
0x36,
|
||||
0x50,
|
||||
0xae,
|
||||
0x34,
|
||||
0xcc,
|
||||
0xca,
|
||||
0x0c,
|
||||
0x5b,
|
||||
},
|
||||
{
|
||||
0x27,
|
||||
0xd9,
|
||||
0x34,
|
||||
0x37,
|
||||
0xef,
|
||||
0xb7,
|
||||
0x21,
|
||||
0xaa,
|
||||
0x40,
|
||||
0x18,
|
||||
0x21,
|
||||
0xdc,
|
||||
0xec,
|
||||
0x5a,
|
||||
0xdf,
|
||||
0x89,
|
||||
},
|
||||
{
|
||||
0x89,
|
||||
0x23,
|
||||
0x7d,
|
||||
0x9d,
|
||||
0xed,
|
||||
0x9c,
|
||||
0x5e,
|
||||
0x78,
|
||||
0xd8,
|
||||
0xb1,
|
||||
0xc9,
|
||||
0xb1,
|
||||
0x66,
|
||||
0xcc,
|
||||
0x73,
|
||||
0x42,
|
||||
},
|
||||
{
|
||||
0x4a,
|
||||
0x6d,
|
||||
0x80,
|
||||
0x91,
|
||||
0xbf,
|
||||
0x5e,
|
||||
0x7d,
|
||||
0x65,
|
||||
0x11,
|
||||
0x89,
|
||||
0xfa,
|
||||
0x94,
|
||||
0xa2,
|
||||
0x50,
|
||||
0xb1,
|
||||
0x4c,
|
||||
},
|
||||
{
|
||||
0x0e,
|
||||
0x33,
|
||||
0xf9,
|
||||
0x60,
|
||||
0x55,
|
||||
0xe7,
|
||||
0xae,
|
||||
0x89,
|
||||
0x3f,
|
||||
0xfc,
|
||||
0x0e,
|
||||
0x3d,
|
||||
0xcf,
|
||||
0x49,
|
||||
0x29,
|
||||
0x02,
|
||||
},
|
||||
{
|
||||
0xe6,
|
||||
0x1c,
|
||||
0x43,
|
||||
0x2b,
|
||||
0x72,
|
||||
0x0b,
|
||||
0x19,
|
||||
0xd1,
|
||||
0x8e,
|
||||
0xc8,
|
||||
0xd8,
|
||||
0x4b,
|
||||
0xdc,
|
||||
0x63,
|
||||
0x15,
|
||||
0x1b,
|
||||
},
|
||||
{
|
||||
0xf7,
|
||||
0xe5,
|
||||
0xae,
|
||||
0xf5,
|
||||
0x49,
|
||||
0xf7,
|
||||
0x82,
|
||||
0xcf,
|
||||
0x37,
|
||||
0x90,
|
||||
0x55,
|
||||
0xa6,
|
||||
0x08,
|
||||
0x26,
|
||||
0x9b,
|
||||
0x16,
|
||||
},
|
||||
{
|
||||
0x43,
|
||||
0x8d,
|
||||
0x03,
|
||||
0x0f,
|
||||
0xd0,
|
||||
0xb7,
|
||||
0xa5,
|
||||
0x4f,
|
||||
0xa8,
|
||||
0x37,
|
||||
0xf2,
|
||||
0xad,
|
||||
0x20,
|
||||
0x1a,
|
||||
0x64,
|
||||
0x03,
|
||||
},
|
||||
{
|
||||
0xa5,
|
||||
0x90,
|
||||
0xd3,
|
||||
0xee,
|
||||
0x4f,
|
||||
0xbf,
|
||||
0x04,
|
||||
0xe3,
|
||||
0x24,
|
||||
0x7e,
|
||||
0x0d,
|
||||
0x27,
|
||||
0xf2,
|
||||
0x86,
|
||||
0x42,
|
||||
0x3f,
|
||||
},
|
||||
{
|
||||
0x5f,
|
||||
0xe2,
|
||||
0xc1,
|
||||
0xa1,
|
||||
0x72,
|
||||
0xfe,
|
||||
0x93,
|
||||
0xc4,
|
||||
0xb1,
|
||||
0x5c,
|
||||
0xd3,
|
||||
0x7c,
|
||||
0xae,
|
||||
0xf9,
|
||||
0xf5,
|
||||
0x38,
|
||||
},
|
||||
{
|
||||
0x2c,
|
||||
0x97,
|
||||
0x32,
|
||||
0x5c,
|
||||
0xbd,
|
||||
0x06,
|
||||
0xb3,
|
||||
0x6e,
|
||||
0xb2,
|
||||
0x13,
|
||||
0x3d,
|
||||
0xd0,
|
||||
0x8b,
|
||||
0x3a,
|
||||
0x01,
|
||||
0x7c,
|
||||
},
|
||||
{
|
||||
0x92,
|
||||
0xc8,
|
||||
0x14,
|
||||
0x22,
|
||||
0x7a,
|
||||
0x6b,
|
||||
0xca,
|
||||
0x94,
|
||||
0x9f,
|
||||
0xf0,
|
||||
0x65,
|
||||
0x9f,
|
||||
0x00,
|
||||
0x2a,
|
||||
0xd3,
|
||||
0x9e,
|
||||
},
|
||||
{
|
||||
0xdc,
|
||||
0xe8,
|
||||
0x50,
|
||||
0x11,
|
||||
0x0b,
|
||||
0xd8,
|
||||
0x32,
|
||||
0x8c,
|
||||
0xfb,
|
||||
0xd5,
|
||||
0x08,
|
||||
0x41,
|
||||
0xd6,
|
||||
0x91,
|
||||
0x1d,
|
||||
0x87,
|
||||
},
|
||||
{
|
||||
0x67,
|
||||
0xf1,
|
||||
0x49,
|
||||
0x84,
|
||||
0xc7,
|
||||
0xda,
|
||||
0x79,
|
||||
0x12,
|
||||
0x48,
|
||||
0xe3,
|
||||
0x2b,
|
||||
0xb5,
|
||||
0x92,
|
||||
0x25,
|
||||
0x83,
|
||||
0xda,
|
||||
},
|
||||
{
|
||||
0x19,
|
||||
0x38,
|
||||
0xf2,
|
||||
0xcf,
|
||||
0x72,
|
||||
0xd5,
|
||||
0x4e,
|
||||
0xe9,
|
||||
0x7e,
|
||||
0x94,
|
||||
0x16,
|
||||
0x6f,
|
||||
0xa9,
|
||||
0x1d,
|
||||
0x2a,
|
||||
0x36,
|
||||
},
|
||||
{
|
||||
0x74,
|
||||
0x48,
|
||||
0x1e,
|
||||
0x96,
|
||||
0x46,
|
||||
0xed,
|
||||
0x49,
|
||||
0xfe,
|
||||
0x0f,
|
||||
0x62,
|
||||
0x24,
|
||||
0x30,
|
||||
0x16,
|
||||
0x04,
|
||||
0x69,
|
||||
0x8e,
|
||||
},
|
||||
{
|
||||
0x57,
|
||||
0xfc,
|
||||
0xa5,
|
||||
0xde,
|
||||
0x98,
|
||||
0xa9,
|
||||
0xd6,
|
||||
0xd8,
|
||||
0x00,
|
||||
0x64,
|
||||
0x38,
|
||||
0xd0,
|
||||
0x58,
|
||||
0x3d,
|
||||
0x8a,
|
||||
0x1d,
|
||||
},
|
||||
{
|
||||
0x9f,
|
||||
0xec,
|
||||
0xde,
|
||||
0x1c,
|
||||
0xef,
|
||||
0xdc,
|
||||
0x1c,
|
||||
0xbe,
|
||||
0xd4,
|
||||
0x76,
|
||||
0x36,
|
||||
0x74,
|
||||
0xd9,
|
||||
0x57,
|
||||
0x53,
|
||||
0x59,
|
||||
},
|
||||
{
|
||||
0xe3,
|
||||
0x04,
|
||||
0x0c,
|
||||
0x00,
|
||||
0xeb,
|
||||
0x28,
|
||||
0xf1,
|
||||
0x53,
|
||||
0x66,
|
||||
0xca,
|
||||
0x73,
|
||||
0xcb,
|
||||
0xd8,
|
||||
0x72,
|
||||
0xe7,
|
||||
0x40,
|
||||
},
|
||||
{
|
||||
0x76,
|
||||
0x97,
|
||||
0x00,
|
||||
0x9a,
|
||||
0x6a,
|
||||
0x83,
|
||||
0x1d,
|
||||
0xfe,
|
||||
0xcc,
|
||||
0xa9,
|
||||
0x1c,
|
||||
0x59,
|
||||
0x93,
|
||||
0x67,
|
||||
0x0f,
|
||||
0x7a,
|
||||
},
|
||||
{
|
||||
0x58,
|
||||
0x53,
|
||||
0x54,
|
||||
0x23,
|
||||
0x21,
|
||||
0xf5,
|
||||
0x67,
|
||||
0xa0,
|
||||
0x05,
|
||||
0xd5,
|
||||
0x47,
|
||||
0xa4,
|
||||
0xf0,
|
||||
0x47,
|
||||
0x59,
|
||||
0xbd,
|
||||
},
|
||||
{
|
||||
0x51,
|
||||
0x50,
|
||||
0xd1,
|
||||
0x77,
|
||||
0x2f,
|
||||
0x50,
|
||||
0x83,
|
||||
0x4a,
|
||||
0x50,
|
||||
0x3e,
|
||||
0x06,
|
||||
0x9a,
|
||||
0x97,
|
||||
0x3f,
|
||||
0xbd,
|
||||
0x7c,
|
||||
},
|
||||
};
|
||||
const uint8_t vectors_hsip32[64][4] = {
|
||||
{
|
||||
0xa9,
|
||||
0x35,
|
||||
0x9f,
|
||||
0x5b,
|
||||
},
|
||||
{
|
||||
0x27,
|
||||
0x47,
|
||||
0x5a,
|
||||
0xb8,
|
||||
},
|
||||
{
|
||||
0xfa,
|
||||
0x62,
|
||||
0xa6,
|
||||
0x03,
|
||||
},
|
||||
{
|
||||
0x8a,
|
||||
0xfe,
|
||||
0xe7,
|
||||
0x04,
|
||||
},
|
||||
{
|
||||
0x2a,
|
||||
0x6e,
|
||||
0x46,
|
||||
0x89,
|
||||
},
|
||||
{
|
||||
0xc5,
|
||||
0xfa,
|
||||
0xb6,
|
||||
0x69,
|
||||
},
|
||||
{
|
||||
0x58,
|
||||
0x63,
|
||||
0xfc,
|
||||
0x23,
|
||||
},
|
||||
{
|
||||
0x8b,
|
||||
0xcf,
|
||||
0x63,
|
||||
0xc5,
|
||||
},
|
||||
{
|
||||
0xd0,
|
||||
0xb8,
|
||||
0x84,
|
||||
0x8f,
|
||||
},
|
||||
{
|
||||
0xf8,
|
||||
0x06,
|
||||
0xe7,
|
||||
0x79,
|
||||
},
|
||||
{
|
||||
0x94,
|
||||
0xb0,
|
||||
0x79,
|
||||
0x34,
|
||||
},
|
||||
{
|
||||
0x08,
|
||||
0x08,
|
||||
0x30,
|
||||
0x50,
|
||||
},
|
||||
{
|
||||
0x57,
|
||||
0xf0,
|
||||
0x87,
|
||||
0x2f,
|
||||
},
|
||||
{
|
||||
0x77,
|
||||
0xe6,
|
||||
0x63,
|
||||
0xff,
|
||||
},
|
||||
{
|
||||
0xd6,
|
||||
0xff,
|
||||
0xf8,
|
||||
0x7c,
|
||||
},
|
||||
{
|
||||
0x74,
|
||||
0xfe,
|
||||
0x2b,
|
||||
0x97,
|
||||
},
|
||||
{
|
||||
0xd9,
|
||||
0xb5,
|
||||
0xac,
|
||||
0x84,
|
||||
},
|
||||
{
|
||||
0xc4,
|
||||
0x74,
|
||||
0x64,
|
||||
0x5b,
|
||||
},
|
||||
{
|
||||
0x46,
|
||||
0x5b,
|
||||
0x8d,
|
||||
0x9b,
|
||||
},
|
||||
{
|
||||
0x7b,
|
||||
0xef,
|
||||
0xe3,
|
||||
0x87,
|
||||
},
|
||||
{
|
||||
0xe3,
|
||||
0x4d,
|
||||
0x10,
|
||||
0x45,
|
||||
},
|
||||
{
|
||||
0x61,
|
||||
0x3f,
|
||||
0x62,
|
||||
0xb3,
|
||||
},
|
||||
{
|
||||
0x70,
|
||||
0xf3,
|
||||
0x67,
|
||||
0xfe,
|
||||
},
|
||||
{
|
||||
0xe6,
|
||||
0xad,
|
||||
0xb8,
|
||||
0xbd,
|
||||
},
|
||||
{
|
||||
0x27,
|
||||
0x40,
|
||||
0x0c,
|
||||
0x63,
|
||||
},
|
||||
{
|
||||
0x26,
|
||||
0x78,
|
||||
0x78,
|
||||
0x75,
|
||||
},
|
||||
{
|
||||
0x4f,
|
||||
0x56,
|
||||
0x7b,
|
||||
0x5f,
|
||||
},
|
||||
{
|
||||
0x3a,
|
||||
0xb0,
|
||||
0xe6,
|
||||
0x69,
|
||||
},
|
||||
{
|
||||
0xb0,
|
||||
0x64,
|
||||
0x40,
|
||||
0x00,
|
||||
},
|
||||
{
|
||||
0xff,
|
||||
0x67,
|
||||
0x0f,
|
||||
0xb4,
|
||||
},
|
||||
{
|
||||
0x50,
|
||||
0x9e,
|
||||
0x33,
|
||||
0x8b,
|
||||
},
|
||||
{
|
||||
0x5d,
|
||||
0x58,
|
||||
0x9f,
|
||||
0x1a,
|
||||
},
|
||||
{
|
||||
0xfe,
|
||||
0xe7,
|
||||
0x21,
|
||||
0x12,
|
||||
},
|
||||
{
|
||||
0x33,
|
||||
0x75,
|
||||
0x32,
|
||||
0x59,
|
||||
},
|
||||
{
|
||||
0x6a,
|
||||
0x43,
|
||||
0x4f,
|
||||
0x8c,
|
||||
},
|
||||
{
|
||||
0xfe,
|
||||
0x28,
|
||||
0xb7,
|
||||
0x29,
|
||||
},
|
||||
{
|
||||
0xe7,
|
||||
0x5c,
|
||||
0xc6,
|
||||
0xec,
|
||||
},
|
||||
{
|
||||
0x69,
|
||||
0x7e,
|
||||
0x8d,
|
||||
0x54,
|
||||
},
|
||||
{
|
||||
0x63,
|
||||
0x68,
|
||||
0x8b,
|
||||
0x0f,
|
||||
},
|
||||
{
|
||||
0x65,
|
||||
0x0b,
|
||||
0x62,
|
||||
0xb4,
|
||||
},
|
||||
{
|
||||
0xb6,
|
||||
0xbc,
|
||||
0x18,
|
||||
0x40,
|
||||
},
|
||||
{
|
||||
0x5d,
|
||||
0x07,
|
||||
0x45,
|
||||
0x05,
|
||||
},
|
||||
{
|
||||
0x24,
|
||||
0x42,
|
||||
0xfd,
|
||||
0x2e,
|
||||
},
|
||||
{
|
||||
0x7b,
|
||||
0xb7,
|
||||
0x86,
|
||||
0x3a,
|
||||
},
|
||||
{
|
||||
0x77,
|
||||
0x05,
|
||||
0xd5,
|
||||
0x48,
|
||||
},
|
||||
{
|
||||
0xd7,
|
||||
0x52,
|
||||
0x08,
|
||||
0xb1,
|
||||
},
|
||||
{
|
||||
0xb6,
|
||||
0xd4,
|
||||
0x99,
|
||||
0xc8,
|
||||
},
|
||||
{
|
||||
0x08,
|
||||
0x92,
|
||||
0x20,
|
||||
0x2e,
|
||||
},
|
||||
{
|
||||
0x69,
|
||||
0xe1,
|
||||
0x2c,
|
||||
0xe3,
|
||||
},
|
||||
{
|
||||
0x8d,
|
||||
0xb5,
|
||||
0x80,
|
||||
0xe5,
|
||||
},
|
||||
{
|
||||
0x36,
|
||||
0x97,
|
||||
0x64,
|
||||
0xc6,
|
||||
},
|
||||
{
|
||||
0x01,
|
||||
0x6e,
|
||||
0x02,
|
||||
0x04,
|
||||
},
|
||||
{
|
||||
0x3b,
|
||||
0x85,
|
||||
0xf3,
|
||||
0xd4,
|
||||
},
|
||||
{
|
||||
0xfe,
|
||||
0xdb,
|
||||
0x66,
|
||||
0xbe,
|
||||
},
|
||||
{
|
||||
0x1e,
|
||||
0x69,
|
||||
0x2a,
|
||||
0x3a,
|
||||
},
|
||||
{
|
||||
0xc6,
|
||||
0x89,
|
||||
0x84,
|
||||
0xc0,
|
||||
},
|
||||
{
|
||||
0xa5,
|
||||
0xc5,
|
||||
0xb9,
|
||||
0x40,
|
||||
},
|
||||
{
|
||||
0x9b,
|
||||
0xe9,
|
||||
0xe8,
|
||||
0x8c,
|
||||
},
|
||||
{
|
||||
0x7d,
|
||||
0xbc,
|
||||
0x81,
|
||||
0x40,
|
||||
},
|
||||
{
|
||||
0x7c,
|
||||
0x07,
|
||||
0x8e,
|
||||
0xc5,
|
||||
},
|
||||
{
|
||||
0xd4,
|
||||
0xe7,
|
||||
0x6c,
|
||||
0x73,
|
||||
},
|
||||
{
|
||||
0x42,
|
||||
0x8f,
|
||||
0xcb,
|
||||
0xb9,
|
||||
},
|
||||
{
|
||||
0xbd,
|
||||
0x83,
|
||||
0x99,
|
||||
0x7a,
|
||||
},
|
||||
{
|
||||
0x59,
|
||||
0xea,
|
||||
0x4a,
|
||||
0x74,
|
||||
},
|
||||
};
|
||||
const uint8_t vectors_hsip64[64][8] = {
|
||||
{
|
||||
0x21,
|
||||
0x8d,
|
||||
0x1f,
|
||||
0x59,
|
||||
0xb9,
|
||||
0xb8,
|
||||
0x3c,
|
||||
0xc8,
|
||||
},
|
||||
{
|
||||
0xbe,
|
||||
0x55,
|
||||
0x24,
|
||||
0x12,
|
||||
0xf8,
|
||||
0x38,
|
||||
0x73,
|
||||
0x15,
|
||||
},
|
||||
{
|
||||
0x06,
|
||||
0x4f,
|
||||
0x39,
|
||||
0xef,
|
||||
0x7c,
|
||||
0x50,
|
||||
0xeb,
|
||||
0x57,
|
||||
},
|
||||
{
|
||||
0xce,
|
||||
0x0f,
|
||||
0x1a,
|
||||
0x45,
|
||||
0xf7,
|
||||
0x06,
|
||||
0x06,
|
||||
0x79,
|
||||
},
|
||||
{
|
||||
0xd5,
|
||||
0xe7,
|
||||
0x8a,
|
||||
0x17,
|
||||
0x5b,
|
||||
0xe5,
|
||||
0x2e,
|
||||
0xa1,
|
||||
},
|
||||
{
|
||||
0xcb,
|
||||
0x9d,
|
||||
0x7c,
|
||||
0x3f,
|
||||
0x2f,
|
||||
0x3d,
|
||||
0xb5,
|
||||
0x80,
|
||||
},
|
||||
{
|
||||
0xce,
|
||||
0x3e,
|
||||
0x91,
|
||||
0x35,
|
||||
0x8a,
|
||||
0xa2,
|
||||
0xbc,
|
||||
0x25,
|
||||
},
|
||||
{
|
||||
0xff,
|
||||
0x20,
|
||||
0x27,
|
||||
0x28,
|
||||
0xb0,
|
||||
0x7b,
|
||||
0xc6,
|
||||
0x84,
|
||||
},
|
||||
{
|
||||
0xed,
|
||||
0xfe,
|
||||
0xe8,
|
||||
0x20,
|
||||
0xbc,
|
||||
0xe4,
|
||||
0x85,
|
||||
0x8c,
|
||||
},
|
||||
{
|
||||
0x5b,
|
||||
0x51,
|
||||
0xcc,
|
||||
0xcc,
|
||||
0x13,
|
||||
0x88,
|
||||
0x83,
|
||||
0x07,
|
||||
},
|
||||
{
|
||||
0x95,
|
||||
0xb0,
|
||||
0x46,
|
||||
0x9f,
|
||||
0x06,
|
||||
0xa6,
|
||||
0xf2,
|
||||
0xee,
|
||||
},
|
||||
{
|
||||
0xae,
|
||||
0x26,
|
||||
0x33,
|
||||
0x39,
|
||||
0x94,
|
||||
0xdd,
|
||||
0xcd,
|
||||
0x48,
|
||||
},
|
||||
{
|
||||
0x7b,
|
||||
0xc7,
|
||||
0x1f,
|
||||
0x9f,
|
||||
0xae,
|
||||
0xf5,
|
||||
0xc7,
|
||||
0x99,
|
||||
},
|
||||
{
|
||||
0x5a,
|
||||
0x23,
|
||||
0x52,
|
||||
0xd7,
|
||||
0x5a,
|
||||
0x0c,
|
||||
0x37,
|
||||
0x44,
|
||||
},
|
||||
{
|
||||
0x3b,
|
||||
0xb1,
|
||||
0xa8,
|
||||
0x70,
|
||||
0xea,
|
||||
0xe8,
|
||||
0xe6,
|
||||
0x58,
|
||||
},
|
||||
{
|
||||
0x21,
|
||||
0x7d,
|
||||
0x0b,
|
||||
0xcb,
|
||||
0x4e,
|
||||
0x81,
|
||||
0xc9,
|
||||
0x02,
|
||||
},
|
||||
{
|
||||
0x73,
|
||||
0x36,
|
||||
0xaa,
|
||||
0xd2,
|
||||
0x5f,
|
||||
0x7b,
|
||||
0xf3,
|
||||
0xb5,
|
||||
},
|
||||
{
|
||||
0x37,
|
||||
0xad,
|
||||
0xc0,
|
||||
0x64,
|
||||
0x1c,
|
||||
0x4c,
|
||||
0x4f,
|
||||
0x6a,
|
||||
},
|
||||
{
|
||||
0xc9,
|
||||
0xb2,
|
||||
0xdb,
|
||||
0x2b,
|
||||
0x9a,
|
||||
0x3e,
|
||||
0x42,
|
||||
0xf9,
|
||||
},
|
||||
{
|
||||
0xf9,
|
||||
0x10,
|
||||
0xe4,
|
||||
0x80,
|
||||
0x20,
|
||||
0xab,
|
||||
0x36,
|
||||
0x3c,
|
||||
},
|
||||
{
|
||||
0x1b,
|
||||
0xf5,
|
||||
0x2b,
|
||||
0x0a,
|
||||
0x6f,
|
||||
0xee,
|
||||
0xa7,
|
||||
0xdb,
|
||||
},
|
||||
{
|
||||
0x00,
|
||||
0x74,
|
||||
0x1d,
|
||||
0xc2,
|
||||
0x69,
|
||||
0xe8,
|
||||
0xb3,
|
||||
0xef,
|
||||
},
|
||||
{
|
||||
0xe2,
|
||||
0x01,
|
||||
0x03,
|
||||
0xfa,
|
||||
0x1b,
|
||||
0xa7,
|
||||
0x76,
|
||||
0xef,
|
||||
},
|
||||
{
|
||||
0x4c,
|
||||
0x22,
|
||||
0x10,
|
||||
0xe5,
|
||||
0x4b,
|
||||
0x68,
|
||||
0x1d,
|
||||
0x73,
|
||||
},
|
||||
{
|
||||
0x70,
|
||||
0x74,
|
||||
0x10,
|
||||
0x45,
|
||||
0xae,
|
||||
0x3f,
|
||||
0xa6,
|
||||
0xf1,
|
||||
},
|
||||
{
|
||||
0x0c,
|
||||
0x86,
|
||||
0x40,
|
||||
0x37,
|
||||
0x39,
|
||||
0x71,
|
||||
0x40,
|
||||
0x38,
|
||||
},
|
||||
{
|
||||
0x0d,
|
||||
0x89,
|
||||
0x9e,
|
||||
0xd8,
|
||||
0x11,
|
||||
0x29,
|
||||
0x23,
|
||||
0xf0,
|
||||
},
|
||||
{
|
||||
0x22,
|
||||
0x6b,
|
||||
0xf5,
|
||||
0xfa,
|
||||
0xb8,
|
||||
0x1e,
|
||||
0xe1,
|
||||
0xb8,
|
||||
},
|
||||
{
|
||||
0x2d,
|
||||
0x92,
|
||||
0x5f,
|
||||
0xfb,
|
||||
0x1e,
|
||||
0x00,
|
||||
0x16,
|
||||
0xb5,
|
||||
},
|
||||
{
|
||||
0x36,
|
||||
0x19,
|
||||
0x58,
|
||||
0xd5,
|
||||
0x2c,
|
||||
0xee,
|
||||
0x10,
|
||||
0xf1,
|
||||
},
|
||||
{
|
||||
0x29,
|
||||
0x1a,
|
||||
0xaf,
|
||||
0x86,
|
||||
0x48,
|
||||
0x98,
|
||||
0x17,
|
||||
0x9d,
|
||||
},
|
||||
{
|
||||
0x86,
|
||||
0x3c,
|
||||
0x7f,
|
||||
0x15,
|
||||
0x5c,
|
||||
0x34,
|
||||
0x11,
|
||||
0x7c,
|
||||
},
|
||||
{
|
||||
0x28,
|
||||
0x70,
|
||||
0x9d,
|
||||
0x46,
|
||||
0xd8,
|
||||
0x11,
|
||||
0x62,
|
||||
0x6c,
|
||||
},
|
||||
{
|
||||
0x24,
|
||||
0x84,
|
||||
0x77,
|
||||
0x68,
|
||||
0x1d,
|
||||
0x28,
|
||||
0xf8,
|
||||
0x9c,
|
||||
},
|
||||
{
|
||||
0x83,
|
||||
0x24,
|
||||
0xe4,
|
||||
0xd7,
|
||||
0x52,
|
||||
0x8f,
|
||||
0x98,
|
||||
0x30,
|
||||
},
|
||||
{
|
||||
0xf9,
|
||||
0xef,
|
||||
0xd4,
|
||||
0xe1,
|
||||
0x3a,
|
||||
0xea,
|
||||
0x6b,
|
||||
0xd8,
|
||||
},
|
||||
{
|
||||
0x86,
|
||||
0xd6,
|
||||
0x7a,
|
||||
0x40,
|
||||
0xec,
|
||||
0x42,
|
||||
0x76,
|
||||
0xdc,
|
||||
},
|
||||
{
|
||||
0x3f,
|
||||
0x62,
|
||||
0x92,
|
||||
0xec,
|
||||
0xcc,
|
||||
0xa9,
|
||||
0x7e,
|
||||
0x35,
|
||||
},
|
||||
{
|
||||
0xcb,
|
||||
0xd9,
|
||||
0x2e,
|
||||
0xe7,
|
||||
0x24,
|
||||
0xd4,
|
||||
0x21,
|
||||
0x09,
|
||||
},
|
||||
{
|
||||
0x36,
|
||||
0x8d,
|
||||
0xf6,
|
||||
0x80,
|
||||
0x8d,
|
||||
0x40,
|
||||
0x3d,
|
||||
0x79,
|
||||
},
|
||||
{
|
||||
0x5b,
|
||||
0x38,
|
||||
0xc8,
|
||||
0x1c,
|
||||
0x67,
|
||||
0xc8,
|
||||
0xae,
|
||||
0x4c,
|
||||
},
|
||||
{
|
||||
0x95,
|
||||
0xab,
|
||||
0x71,
|
||||
0x89,
|
||||
0xd4,
|
||||
0x39,
|
||||
0xac,
|
||||
0xb3,
|
||||
},
|
||||
{
|
||||
0xa9,
|
||||
0x1a,
|
||||
0x52,
|
||||
0xc0,
|
||||
0x25,
|
||||
0x32,
|
||||
0x70,
|
||||
0x24,
|
||||
},
|
||||
{
|
||||
0x5b,
|
||||
0x00,
|
||||
0x87,
|
||||
0xc6,
|
||||
0x95,
|
||||
0x28,
|
||||
0xac,
|
||||
0xea,
|
||||
},
|
||||
{
|
||||
0x1e,
|
||||
0x30,
|
||||
0xf3,
|
||||
0xad,
|
||||
0x27,
|
||||
0xdc,
|
||||
0xb1,
|
||||
0x5a,
|
||||
},
|
||||
{
|
||||
0x69,
|
||||
0x7f,
|
||||
0x5c,
|
||||
0x9a,
|
||||
0x90,
|
||||
0x32,
|
||||
0x4e,
|
||||
0xd4,
|
||||
},
|
||||
{
|
||||
0x49,
|
||||
0x5c,
|
||||
0x0f,
|
||||
0x99,
|
||||
0x55,
|
||||
0x57,
|
||||
0xdc,
|
||||
0x38,
|
||||
},
|
||||
{
|
||||
0x94,
|
||||
0x27,
|
||||
0x20,
|
||||
0x2a,
|
||||
0x3c,
|
||||
0x29,
|
||||
0xf9,
|
||||
0x4d,
|
||||
},
|
||||
{
|
||||
0xa9,
|
||||
0xea,
|
||||
0xa8,
|
||||
0xc0,
|
||||
0x4b,
|
||||
0xa9,
|
||||
0x3e,
|
||||
0x3e,
|
||||
},
|
||||
{
|
||||
0xee,
|
||||
0xa4,
|
||||
0xc1,
|
||||
0x73,
|
||||
0x7d,
|
||||
0x01,
|
||||
0x12,
|
||||
0x18,
|
||||
},
|
||||
{
|
||||
0x91,
|
||||
0x2d,
|
||||
0x56,
|
||||
0x8f,
|
||||
0xd8,
|
||||
0xf6,
|
||||
0x5a,
|
||||
0x49,
|
||||
},
|
||||
{
|
||||
0x56,
|
||||
0x91,
|
||||
0x95,
|
||||
0x96,
|
||||
0xb0,
|
||||
0xff,
|
||||
0x5c,
|
||||
0x97,
|
||||
},
|
||||
{
|
||||
0x02,
|
||||
0x44,
|
||||
0x5a,
|
||||
0x79,
|
||||
0x98,
|
||||
0xf5,
|
||||
0x50,
|
||||
0xe1,
|
||||
},
|
||||
{
|
||||
0x86,
|
||||
0xec,
|
||||
0x46,
|
||||
0x6c,
|
||||
0xe7,
|
||||
0x1d,
|
||||
0x1f,
|
||||
0xb2,
|
||||
},
|
||||
{
|
||||
0x35,
|
||||
0x95,
|
||||
0x69,
|
||||
0xe7,
|
||||
0xd2,
|
||||
0x89,
|
||||
0xe3,
|
||||
0xbc,
|
||||
},
|
||||
{
|
||||
0x87,
|
||||
0x1b,
|
||||
0x05,
|
||||
0xca,
|
||||
0x62,
|
||||
0xbb,
|
||||
0x7c,
|
||||
0x96,
|
||||
},
|
||||
{
|
||||
0xa1,
|
||||
0xa4,
|
||||
0x92,
|
||||
0xf9,
|
||||
0x42,
|
||||
0xf1,
|
||||
0x5f,
|
||||
0x1d,
|
||||
},
|
||||
{
|
||||
0x12,
|
||||
0xec,
|
||||
0x26,
|
||||
0x7f,
|
||||
0xf6,
|
||||
0x09,
|
||||
0x5b,
|
||||
0x6e,
|
||||
},
|
||||
{
|
||||
0x5d,
|
||||
0x1b,
|
||||
0x5e,
|
||||
0xa1,
|
||||
0xb2,
|
||||
0x31,
|
||||
0xd8,
|
||||
0x9d,
|
||||
},
|
||||
{
|
||||
0xd8,
|
||||
0xcf,
|
||||
0xb4,
|
||||
0x45,
|
||||
0x3f,
|
||||
0x92,
|
||||
0xee,
|
||||
0x54,
|
||||
},
|
||||
{
|
||||
0xd6,
|
||||
0x76,
|
||||
0x28,
|
||||
0x90,
|
||||
0xbf,
|
||||
0x26,
|
||||
0xe4,
|
||||
0x60,
|
||||
},
|
||||
{
|
||||
0x31,
|
||||
0x35,
|
||||
0x63,
|
||||
0xa4,
|
||||
0xb7,
|
||||
0xed,
|
||||
0x5c,
|
||||
0xf3,
|
||||
},
|
||||
{
|
||||
0xf9,
|
||||
0x0b,
|
||||
0x3a,
|
||||
0xb5,
|
||||
0x72,
|
||||
0xd4,
|
||||
0x66,
|
||||
0x93,
|
||||
},
|
||||
{
|
||||
0x2e,
|
||||
0xa6,
|
||||
0x3c,
|
||||
0x71,
|
||||
0xbf,
|
||||
0x32,
|
||||
0x60,
|
||||
0x87,
|
||||
},
|
||||
};
|
||||
1403
3rd/pcre2/CMakeLists.txt
Normal file
1403
3rd/pcre2/CMakeLists.txt
Normal file
@@ -0,0 +1,1403 @@
|
||||
# CMakeLists.txt
|
||||
#
|
||||
# This file enables PCRE2 to be built with the CMake configuration and build
|
||||
# tool. Download CMake in source or binary form from http://www.cmake.org/
|
||||
# Converted to support PCRE2 from the original PCRE file, August 2014.
|
||||
#
|
||||
# Original listfile by Christian Ehrlicher <Ch.Ehrlicher@gmx.de>
|
||||
# Refined and expanded by Daniel Richard G. <skunk@iSKUNK.ORG>
|
||||
# 2007-09-14 mod by Sheri so 7.4 supported configuration options can be entered
|
||||
# 2007-09-19 Adjusted by PH to retain previous default settings
|
||||
# 2007-12-26 (a) On UNIX, use names libpcre instead of just pcre
|
||||
# (b) Ensure pcretest and pcregrep link with the local library,
|
||||
# not a previously-installed one.
|
||||
# (c) Add PCRE_SUPPORT_LIBREADLINE, PCRE_SUPPORT_LIBZ, and
|
||||
# PCRE_SUPPORT_LIBBZ2.
|
||||
# 2008-01-20 Brought up to date to include several new features by Christian
|
||||
# Ehrlicher.
|
||||
# 2008-01-22 Sheri added options for backward compatibility of library names
|
||||
# when building with minGW:
|
||||
# if "ON", NON_STANDARD_LIB_PREFIX causes shared libraries to
|
||||
# be built without "lib" as prefix. (The libraries will be named
|
||||
# pcre.dll, pcreposix.dll and pcrecpp.dll).
|
||||
# if "ON", NON_STANDARD_LIB_SUFFIX causes shared libraries to
|
||||
# be built with suffix of "-0.dll". (The libraries will be named
|
||||
# libpcre-0.dll, libpcreposix-0.dll and libpcrecpp-0.dll - same names
|
||||
# built by default with Configure and Make.
|
||||
# 2008-01-23 PH removed the automatic build of pcredemo.
|
||||
# 2008-04-22 PH modified READLINE support so it finds NCURSES when needed.
|
||||
# 2008-07-03 PH updated for revised UCP property support (change of files)
|
||||
# 2009-03-23 PH applied Steven Van Ingelgem's patch to change the name
|
||||
# CMAKE_BINARY_DIR to PROJECT_BINARY_DIR so that it works when PCRE
|
||||
# is included within another project.
|
||||
# 2009-03-23 PH applied a modified version of Steven Van Ingelgem's patches to
|
||||
# add options to stop the building of pcregrep and the tests, and
|
||||
# to disable the final configuration report.
|
||||
# 2009-04-11 PH applied Christian Ehrlicher's patch to show compiler flags that
|
||||
# are set by specifying a release type.
|
||||
# 2010-01-02 PH added test for stdint.h
|
||||
# 2010-03-02 PH added test for inttypes.h
|
||||
# 2011-08-01 PH added PCREGREP_BUFSIZE
|
||||
# 2011-08-22 PH added PCRE_SUPPORT_JIT
|
||||
# 2011-09-06 PH modified WIN32 ADD_TEST line as suggested by Sergey Cherepanov
|
||||
# 2011-09-06 PH added PCRE_SUPPORT_PCREGREP_JIT
|
||||
# 2011-10-04 Sheri added support for including coff data in windows shared libraries
|
||||
# compiled with MINGW if pcre.rc and/or pcreposix.rc are placed in
|
||||
# the source dir by the user prior to building
|
||||
# 2011-10-04 Sheri changed various add_test's to use exes' location built instead
|
||||
# of DEBUG location only (likely only matters in MSVC)
|
||||
# 2011-10-04 Sheri added scripts to provide needed variables to RunTest and
|
||||
# RunGrepTest (used for UNIX and Msys)
|
||||
# 2011-10-04 Sheri added scripts to provide needed variables and to execute
|
||||
# RunTest.bat in Win32 (for effortless testing with "make test")
|
||||
# 2011-10-04 Sheri Increased minimum required cmake version
|
||||
# 2012-01-06 PH removed pcre_info.c and added pcre_string_utils.c
|
||||
# 2012-01-10 Zoltan Herczeg added libpcre16 support
|
||||
# 2012-01-13 Stephen Kelly added out of source build support
|
||||
# 2012-01-17 PH applied Stephen Kelly's patch to parse the version data out
|
||||
# of the configure.ac file
|
||||
# 2012-02-26 PH added support for libedit
|
||||
# 2012-09-06 PH added support for PCRE_EBCDIC_NL25
|
||||
# 2012-09-08 ChPe added PCRE32 support
|
||||
# 2012-10-23 PH added support for VALGRIND and GCOV
|
||||
# 2012-12-08 PH added patch from Daniel Richard G to quash some MSVC warnings
|
||||
# 2013-07-01 PH realized that the "support" for GCOV was a total nonsense and
|
||||
# so it has been removed.
|
||||
# 2013-10-08 PH got rid of the "source" command, which is a bash-ism (use ".")
|
||||
# 2013-11-05 PH added support for PARENS_NEST_LIMIT
|
||||
# 2014-08-29 PH converted the file for PCRE2 (which has no C++).
|
||||
# 2015-04-24 PH added support for PCRE2_DEBUG
|
||||
# 2015-07-16 PH updated for new pcre2_find_bracket source module
|
||||
# 2015-08-24 PH correct C_FLAGS setting (patch from Roy Ivy III)
|
||||
# 2015-10=16 PH added support for never-backslash-C
|
||||
# 2016-03-01 PH applied Chris Wilson's patch for MSVC static
|
||||
# 2016-06-24 PH applied Chris Wilson's second patch, putting the first under
|
||||
# a new option instead of being unconditional.
|
||||
# 2016-10-05 PH fixed a typo (PCRE should be PCRE2) in above patch
|
||||
# fix by David Gaussmann
|
||||
# 2016-10-07 PH added PCREGREP_MAX_BUFSIZE
|
||||
# 2017-03-11 PH turned HEAP_MATCH_RECURSE into a NO-OP for 10.30
|
||||
# 2017-04-08 PH added HEAP_LIMIT
|
||||
# 2017-06-15 ZH added SUPPORT_JIT_SEALLOC support
|
||||
# 2018-06-19 PH added checks for stdint.h and inttypes.h (later removed)
|
||||
# 2018-06-27 PH added Daniel's patch to increase the stack for MSVC
|
||||
# 2018-11-14 PH removed unnecessary checks for stdint.h and inttypes.h
|
||||
# 2018-11-16 PH added PCRE2GREP_SUPPORT_CALLOUT_FORK support and tidied
|
||||
# 2019-02-16 PH hacked to avoid CMP0026 policy issue (see comments below)
|
||||
# 2020-03-16 PH renamed dftables as pcre2_dftables (as elsewhere)
|
||||
# 2020-03-24 PH changed CMAKE_MODULE_PATH definition to add, not replace
|
||||
# 2020-04-08 Carlo added function check for secure_getenv, fixed strerror
|
||||
# 2020-04-16 enh added check for __attribute__((uninitialized))
|
||||
# 2020-04-25 PH applied patches from Uwe Korn to support pkg-config and
|
||||
# library versioning.
|
||||
# 2020-04-25 Carlo added function check for mkostemp used in ProtExecAllocator
|
||||
# 2020-04-28 PH added function check for memfd_create based on Carlo's patch
|
||||
# 2020-05-25 PH added a check for Intel CET
|
||||
# 2020-12-03 PH altered the definition of pcre2test as suggested by Daniel
|
||||
# 2021-06-29 JWSB added the option to build static library with PIC.
|
||||
# 2021-07-05 JWSB modified such both the static and shared library can be
|
||||
# build in one go.
|
||||
# 2021-08-28 PH increased minimum version
|
||||
# 2021-08-28 PH added test for realpath()
|
||||
# 2022-12-10 PH added support for pcre2posix_test
|
||||
# 2023-01-15 Carlo added C99 as the minimum required
|
||||
# 2023-08-06 PH added support for setting variable length lookbehind maximum
|
||||
|
||||
################################################################################
|
||||
# We have used `gersemi` for auto-formatting our CMake files.
|
||||
# Applied to all CMake files using:
|
||||
# > pip3 install gersemi
|
||||
# > gersemi --in-place --line-length 120 --indent 2 \
|
||||
# ./CMakeLists.txt ./cmake/*.cmake ./cmake/*.cmake.in
|
||||
################################################################################
|
||||
|
||||
# Increased minimum to 3.15 to allow use of string(REPEAT).
|
||||
cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
|
||||
project(PCRE2 C)
|
||||
set(CMAKE_C_STANDARD 99)
|
||||
set(CMAKE_C_STANDARD_REQUIRED TRUE)
|
||||
|
||||
set(CMAKE_C_VISIBILITY_PRESET hidden)
|
||||
cmake_policy(SET CMP0063 NEW)
|
||||
|
||||
# Set policy CMP0026 to avoid warnings for the use of LOCATION in
|
||||
# GET_TARGET_PROPERTY. This should no longer be required.
|
||||
# CMAKE_POLICY(SET CMP0026 OLD)
|
||||
|
||||
# With a recent cmake, you can provide a rootdir to look for non
|
||||
# standard installed library dependencies, but to do so, the policy
|
||||
# needs to be set to new (by uncommenting the following)
|
||||
# CMAKE_POLICY(SET CMP0074 NEW)
|
||||
|
||||
# For FindReadline.cmake. This was changed to allow setting CMAKE_MODULE_PATH
|
||||
# on the command line.
|
||||
# SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||
|
||||
include_directories(${PROJECT_SOURCE_DIR}/src)
|
||||
|
||||
# external packages
|
||||
find_package(BZip2)
|
||||
find_package(ZLIB)
|
||||
find_package(Readline)
|
||||
find_package(Editline)
|
||||
|
||||
# Configuration checks
|
||||
|
||||
include(CheckCSourceCompiles)
|
||||
include(CheckFunctionExists)
|
||||
include(CheckSymbolExists)
|
||||
include(CheckIncludeFile)
|
||||
include(CheckTypeSize)
|
||||
include(GNUInstallDirs) # for CMAKE_INSTALL_LIBDIR
|
||||
|
||||
check_include_file(assert.h HAVE_ASSERT_H)
|
||||
check_include_file(dirent.h HAVE_DIRENT_H)
|
||||
check_include_file(sys/stat.h HAVE_SYS_STAT_H)
|
||||
check_include_file(sys/types.h HAVE_SYS_TYPES_H)
|
||||
check_include_file(unistd.h HAVE_UNISTD_H)
|
||||
check_include_file(windows.h HAVE_WINDOWS_H)
|
||||
|
||||
check_symbol_exists(bcopy "strings.h" HAVE_BCOPY)
|
||||
check_symbol_exists(memfd_create "sys/mman.h" HAVE_MEMFD_CREATE)
|
||||
check_symbol_exists(memmove "string.h" HAVE_MEMMOVE)
|
||||
check_symbol_exists(secure_getenv "stdlib.h" HAVE_SECURE_GETENV)
|
||||
check_symbol_exists(strerror "string.h" HAVE_STRERROR)
|
||||
|
||||
check_c_source_compiles(
|
||||
[=[
|
||||
#include <stdlib.h>
|
||||
#include <limits.h>
|
||||
int main(int c, char *v[]) { char buf[PATH_MAX]; realpath(v[c], buf); return 0; }
|
||||
]=]
|
||||
HAVE_REALPATH
|
||||
)
|
||||
|
||||
set(ORIG_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
|
||||
if(NOT MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "XL")
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror")
|
||||
endif()
|
||||
|
||||
check_c_source_compiles(
|
||||
"int main(void) { char buf[128] __attribute__((uninitialized)); (void)buf; return 0; }"
|
||||
HAVE_ATTRIBUTE_UNINITIALIZED
|
||||
)
|
||||
|
||||
check_c_source_compiles(
|
||||
[=[
|
||||
extern __attribute__ ((visibility ("default"))) int f(void);
|
||||
int main(void) { return f(); }
|
||||
int f(void) { return 42; }
|
||||
]=]
|
||||
HAVE_VISIBILITY
|
||||
)
|
||||
|
||||
set(CMAKE_REQUIRED_FLAGS ${ORIG_CMAKE_REQUIRED_FLAGS})
|
||||
|
||||
check_c_source_compiles("int main(void) { __assume(1); return 0; }" HAVE_BUILTIN_ASSUME)
|
||||
|
||||
check_c_source_compiles(
|
||||
[=[
|
||||
#include <stddef.h>
|
||||
int main(void) { int a,b; size_t m; __builtin_mul_overflow(a,b,&m); return 0; }
|
||||
]=]
|
||||
HAVE_BUILTIN_MUL_OVERFLOW
|
||||
)
|
||||
|
||||
check_c_source_compiles(
|
||||
"int main(int c, char *v[]) { if (c) __builtin_unreachable(); return (int)(*v[0]); }"
|
||||
HAVE_BUILTIN_UNREACHABLE
|
||||
)
|
||||
|
||||
if(HAVE_VISIBILITY)
|
||||
set(PCRE2_EXPORT [=[__attribute__ ((visibility ("default")))]=])
|
||||
else()
|
||||
set(PCRE2_EXPORT)
|
||||
endif()
|
||||
|
||||
# Check whether Intel CET is enabled, and if so, adjust compiler flags. This
|
||||
# code was written by PH, trying to imitate the logic from the autotools
|
||||
# configuration.
|
||||
|
||||
check_c_source_compiles(
|
||||
[=[
|
||||
#ifndef __CET__
|
||||
#error CET is not enabled
|
||||
#endif
|
||||
int main() { return 0; }
|
||||
]=]
|
||||
INTEL_CET_ENABLED
|
||||
)
|
||||
|
||||
if(INTEL_CET_ENABLED)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mshstk")
|
||||
endif()
|
||||
|
||||
# User-configurable options
|
||||
#
|
||||
# Note: CMakeSetup displays these in alphabetical order, regardless of
|
||||
# the order we use here.
|
||||
|
||||
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries.")
|
||||
|
||||
option(BUILD_STATIC_LIBS "Build static libraries." ON)
|
||||
|
||||
option(PCRE2_BUILD_PCRE2_8 "Build 8 bit PCRE2 library" ON)
|
||||
|
||||
option(PCRE2_BUILD_PCRE2_16 "Build 16 bit PCRE2 library" OFF)
|
||||
|
||||
option(PCRE2_BUILD_PCRE2_32 "Build 32 bit PCRE2 library" OFF)
|
||||
|
||||
option(PCRE2_STATIC_PIC "Build the static library with the option position independent code enabled." OFF)
|
||||
|
||||
set(PCRE2_DEBUG "IfDebugBuild" CACHE STRING "Include debugging code")
|
||||
set_property(CACHE PCRE2_DEBUG PROPERTY STRINGS "IfDebugBuild" "ON" "OFF")
|
||||
|
||||
option(PCRE2_DISABLE_PERCENT_ZT "Disable the use of %zu and %td (rarely needed)" OFF)
|
||||
|
||||
set(
|
||||
PCRE2_EBCDIC
|
||||
OFF
|
||||
CACHE BOOL
|
||||
"Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems.)"
|
||||
)
|
||||
|
||||
set(PCRE2_EBCDIC_NL25 OFF CACHE BOOL "Use 0x25 as EBCDIC NL character instead of 0x15; implies EBCDIC.")
|
||||
|
||||
set(
|
||||
PCRE2_LINK_SIZE
|
||||
"2"
|
||||
CACHE STRING
|
||||
"Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details."
|
||||
)
|
||||
|
||||
set(
|
||||
PCRE2_PARENS_NEST_LIMIT
|
||||
"250"
|
||||
CACHE STRING
|
||||
"Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details."
|
||||
)
|
||||
|
||||
set(
|
||||
PCRE2_HEAP_LIMIT
|
||||
"20000000"
|
||||
CACHE STRING
|
||||
"Default limit on heap memory (kibibytes). See HEAP_LIMIT in config.h.in for details."
|
||||
)
|
||||
|
||||
set(PCRE2_MAX_VARLOOKBEHIND "255" CACHE STRING "Default limit on variable lookbehinds.")
|
||||
|
||||
set(
|
||||
PCRE2_MATCH_LIMIT
|
||||
"10000000"
|
||||
CACHE STRING
|
||||
"Default limit on internal looping. See MATCH_LIMIT in config.h.in for details."
|
||||
)
|
||||
|
||||
set(
|
||||
PCRE2_MATCH_LIMIT_DEPTH
|
||||
"MATCH_LIMIT"
|
||||
CACHE STRING
|
||||
"Default limit on internal depth of search. See MATCH_LIMIT_DEPTH in config.h.in for details."
|
||||
)
|
||||
|
||||
set(
|
||||
PCRE2GREP_BUFSIZE
|
||||
"20480"
|
||||
CACHE STRING
|
||||
"Buffer starting size parameter for pcre2grep. See PCRE2GREP_BUFSIZE in config.h.in for details."
|
||||
)
|
||||
|
||||
set(
|
||||
PCRE2GREP_MAX_BUFSIZE
|
||||
"1048576"
|
||||
CACHE STRING
|
||||
"Buffer maximum size parameter for pcre2grep. See PCRE2GREP_MAX_BUFSIZE in config.h.in for details."
|
||||
)
|
||||
|
||||
set(PCRE2_NEWLINE "LF" CACHE STRING "What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF, NUL).")
|
||||
|
||||
set(PCRE2_HEAP_MATCH_RECURSE OFF CACHE BOOL "Obsolete option: do not use")
|
||||
|
||||
set(PCRE2_SUPPORT_JIT OFF CACHE BOOL "Enable support for Just-in-time compiling.")
|
||||
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
|
||||
set(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL "Enable SELinux compatible execmem allocator in JIT (experimental).")
|
||||
else()
|
||||
set(PCRE2_SUPPORT_JIT_SEALLOC IGNORE)
|
||||
endif()
|
||||
|
||||
set(PCRE2GREP_SUPPORT_JIT ON CACHE BOOL "Enable use of Just-in-time compiling in pcre2grep.")
|
||||
|
||||
set(PCRE2GREP_SUPPORT_CALLOUT ON CACHE BOOL "Enable callout string support in pcre2grep.")
|
||||
|
||||
set(PCRE2GREP_SUPPORT_CALLOUT_FORK ON CACHE BOOL "Enable callout string fork support in pcre2grep.")
|
||||
|
||||
set(PCRE2_SUPPORT_UNICODE ON CACHE BOOL "Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.")
|
||||
|
||||
set(
|
||||
PCRE2_SUPPORT_BSR_ANYCRLF
|
||||
OFF
|
||||
CACHE BOOL
|
||||
"ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks"
|
||||
)
|
||||
|
||||
set(PCRE2_NEVER_BACKSLASH_C OFF CACHE BOOL "If ON, backslash-C (upper case C) is locked out.")
|
||||
|
||||
set(PCRE2_SUPPORT_VALGRIND OFF CACHE BOOL "Enable Valgrind support.")
|
||||
|
||||
option(PCRE2_SHOW_REPORT "Show the final configuration report" ON)
|
||||
option(PCRE2_BUILD_PCRE2GREP "Build pcre2grep" ON)
|
||||
option(PCRE2_BUILD_TESTS "Build the tests" ON)
|
||||
|
||||
set(
|
||||
PCRE2_INSTALL_CMAKEDIR
|
||||
"${CMAKE_INSTALL_LIBDIR}/cmake/pcre2"
|
||||
CACHE STRING
|
||||
"Path used during CMake install for placing PCRE2's CMake config files, relative to the installation root (prefix)"
|
||||
)
|
||||
|
||||
if(MINGW)
|
||||
option(
|
||||
NON_STANDARD_LIB_PREFIX
|
||||
"ON=Shared libraries built in mingw will be named pcre2.dll, etc., instead of libpcre2.dll, etc."
|
||||
OFF
|
||||
)
|
||||
|
||||
option(
|
||||
NON_STANDARD_LIB_SUFFIX
|
||||
"ON=Shared libraries built in mingw will be named libpcre2-0.dll, etc., instead of libpcre2.dll, etc."
|
||||
OFF
|
||||
)
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
option(PCRE2_STATIC_RUNTIME "ON=Compile against the static runtime (/MT)." OFF)
|
||||
option(INSTALL_MSVC_PDB "ON=Install .pdb files built by MSVC, if generated" OFF)
|
||||
endif()
|
||||
|
||||
# bzip2 lib
|
||||
if(BZIP2_FOUND)
|
||||
option(PCRE2_SUPPORT_LIBBZ2 "Enable support for linking pcre2grep with libbz2." ON)
|
||||
endif()
|
||||
if(PCRE2_SUPPORT_LIBBZ2)
|
||||
include_directories(${BZIP2_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
# zlib
|
||||
if(ZLIB_FOUND)
|
||||
option(PCRE2_SUPPORT_LIBZ "Enable support for linking pcre2grep with libz." ON)
|
||||
endif()
|
||||
if(PCRE2_SUPPORT_LIBZ)
|
||||
include_directories(${ZLIB_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
# editline lib
|
||||
if(EDITLINE_FOUND)
|
||||
option(PCRE2_SUPPORT_LIBEDIT "Enable support for linking pcre2test with libedit." OFF)
|
||||
endif()
|
||||
if(EDITLINE_FOUND)
|
||||
if(PCRE2_SUPPORT_LIBEDIT)
|
||||
include_directories(${EDITLINE_INCLUDE_DIR})
|
||||
endif()
|
||||
else()
|
||||
if(PCRE2_SUPPORT_LIBEDIT)
|
||||
message(
|
||||
FATAL_ERROR
|
||||
" libedit not found, set EDITLINE_INCLUDE_DIR to a compatible header\n"
|
||||
" or set Editline_ROOT to a full libedit installed tree, as needed\n"
|
||||
" Might need to enable policy CMP0074 in CMakeLists.txt"
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# readline lib
|
||||
if(READLINE_FOUND)
|
||||
option(PCRE2_SUPPORT_LIBREADLINE "Enable support for linking pcre2test with libreadline." ON)
|
||||
endif()
|
||||
if(PCRE2_SUPPORT_LIBREADLINE)
|
||||
include_directories(${READLINE_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
# Prepare build configuration
|
||||
|
||||
if(NOT BUILD_SHARED_LIBS AND NOT BUILD_STATIC_LIBS)
|
||||
message(FATAL_ERROR "At least one of BUILD_SHARED_LIBS or BUILD_STATIC_LIBS must be enabled.")
|
||||
endif()
|
||||
|
||||
if(NOT PCRE2_BUILD_PCRE2_8 AND NOT PCRE2_BUILD_PCRE2_16 AND NOT PCRE2_BUILD_PCRE2_32)
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"At least one of PCRE2_BUILD_PCRE2_8, PCRE2_BUILD_PCRE2_16 or PCRE2_BUILD_PCRE2_32 must be enabled"
|
||||
)
|
||||
endif()
|
||||
|
||||
if(PCRE2_BUILD_PCRE2_8)
|
||||
set(SUPPORT_PCRE2_8 1)
|
||||
endif()
|
||||
|
||||
if(PCRE2_BUILD_PCRE2_16)
|
||||
set(SUPPORT_PCRE2_16 1)
|
||||
endif()
|
||||
|
||||
if(PCRE2_BUILD_PCRE2_32)
|
||||
set(SUPPORT_PCRE2_32 1)
|
||||
endif()
|
||||
|
||||
if(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8)
|
||||
message(STATUS "** PCRE2_BUILD_PCRE2_8 must be enabled for the pcre2grep program")
|
||||
set(PCRE2_BUILD_PCRE2GREP OFF)
|
||||
endif()
|
||||
|
||||
if(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT)
|
||||
if(READLINE_FOUND)
|
||||
message(
|
||||
FATAL_ERROR
|
||||
" Only one of the readline compatible libraries can be enabled.\n"
|
||||
" Disable libreadline with -DPCRE2_SUPPORT_LIBREADLINE=OFF"
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(PCRE2_SUPPORT_BSR_ANYCRLF)
|
||||
set(BSR_ANYCRLF 1)
|
||||
endif()
|
||||
|
||||
if(PCRE2_NEVER_BACKSLASH_C)
|
||||
set(NEVER_BACKSLASH_C 1)
|
||||
endif()
|
||||
|
||||
if(PCRE2_SUPPORT_UNICODE)
|
||||
set(SUPPORT_UNICODE 1)
|
||||
endif()
|
||||
|
||||
if(PCRE2_SUPPORT_JIT)
|
||||
set(SUPPORT_JIT 1)
|
||||
if(UNIX)
|
||||
find_package(Threads REQUIRED)
|
||||
if(CMAKE_USE_PTHREADS_INIT)
|
||||
set(REQUIRE_PTHREAD 1)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(PCRE2_SUPPORT_JIT_SEALLOC)
|
||||
set(CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE)
|
||||
check_symbol_exists(mkostemp stdlib.h REQUIRED)
|
||||
unset(CMAKE_REQUIRED_DEFINITIONS)
|
||||
if(${REQUIRED})
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
|
||||
add_compile_definitions(_GNU_SOURCE)
|
||||
set(SLJIT_PROT_EXECUTABLE_ALLOCATOR 1)
|
||||
else()
|
||||
message(FATAL_ERROR "Your configuration is not supported")
|
||||
endif()
|
||||
else()
|
||||
set(PCRE2_SUPPORT_JIT_SEALLOC OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(PCRE2GREP_SUPPORT_JIT)
|
||||
set(SUPPORT_PCRE2GREP_JIT 1)
|
||||
endif()
|
||||
|
||||
if(PCRE2GREP_SUPPORT_CALLOUT)
|
||||
set(SUPPORT_PCRE2GREP_CALLOUT 1)
|
||||
if(PCRE2GREP_SUPPORT_CALLOUT_FORK)
|
||||
set(SUPPORT_PCRE2GREP_CALLOUT_FORK 1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(PCRE2_SUPPORT_VALGRIND)
|
||||
set(SUPPORT_VALGRIND 1)
|
||||
endif()
|
||||
|
||||
if(PCRE2_DISABLE_PERCENT_ZT)
|
||||
set(DISABLE_PERCENT_ZT 1)
|
||||
endif()
|
||||
|
||||
# This next one used to reference ${READLINE_LIBRARY})
|
||||
# but I was advised to add the NCURSES test as well, along with
|
||||
# some modifications to cmake/FindReadline.cmake which should
|
||||
# make it possible to override the default if necessary. PH
|
||||
|
||||
if(PCRE2_SUPPORT_LIBREADLINE)
|
||||
set(SUPPORT_LIBREADLINE 1)
|
||||
set(PCRE2TEST_LIBS ${READLINE_LIBRARY} ${NCURSES_LIBRARY})
|
||||
endif()
|
||||
|
||||
# libedit is a plug-compatible alternative to libreadline
|
||||
|
||||
if(PCRE2_SUPPORT_LIBEDIT)
|
||||
set(SUPPORT_LIBEDIT 1)
|
||||
set(PCRE2TEST_LIBS ${EDITLINE_LIBRARY})
|
||||
endif()
|
||||
|
||||
if(PCRE2_SUPPORT_LIBZ)
|
||||
set(SUPPORT_LIBZ 1)
|
||||
set(PCRE2GREP_LIBS ${PCRE2GREP_LIBS} ${ZLIB_LIBRARIES})
|
||||
endif()
|
||||
|
||||
if(PCRE2_SUPPORT_LIBBZ2)
|
||||
set(SUPPORT_LIBBZ2 1)
|
||||
set(PCRE2GREP_LIBS ${PCRE2GREP_LIBS} ${BZIP2_LIBRARIES})
|
||||
endif()
|
||||
|
||||
set(NEWLINE_DEFAULT "")
|
||||
|
||||
if(PCRE2_NEWLINE STREQUAL "CR")
|
||||
set(NEWLINE_DEFAULT "1")
|
||||
endif()
|
||||
if(PCRE2_NEWLINE STREQUAL "LF")
|
||||
set(NEWLINE_DEFAULT "2")
|
||||
endif()
|
||||
if(PCRE2_NEWLINE STREQUAL "CRLF")
|
||||
set(NEWLINE_DEFAULT "3")
|
||||
endif()
|
||||
if(PCRE2_NEWLINE STREQUAL "ANY")
|
||||
set(NEWLINE_DEFAULT "4")
|
||||
endif()
|
||||
if(PCRE2_NEWLINE STREQUAL "ANYCRLF")
|
||||
set(NEWLINE_DEFAULT "5")
|
||||
endif()
|
||||
if(PCRE2_NEWLINE STREQUAL "NUL")
|
||||
set(NEWLINE_DEFAULT "6")
|
||||
endif()
|
||||
|
||||
if(NEWLINE_DEFAULT STREQUAL "")
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"The PCRE2_NEWLINE variable must be set to one of the following values: \"LF\", \"CR\", \"CRLF\", \"ANY\", \"ANYCRLF\"."
|
||||
)
|
||||
endif()
|
||||
|
||||
if(PCRE2_EBCDIC)
|
||||
set(EBCDIC 1)
|
||||
endif()
|
||||
|
||||
if(PCRE2_EBCDIC_NL25)
|
||||
set(EBCDIC 1)
|
||||
set(EBCDIC_NL25 1)
|
||||
endif()
|
||||
|
||||
# Output files
|
||||
|
||||
configure_file(config-cmake.h.in ${PROJECT_BINARY_DIR}/config.h @ONLY)
|
||||
|
||||
# Parse version numbers and date out of configure.ac
|
||||
|
||||
file(
|
||||
STRINGS
|
||||
${PROJECT_SOURCE_DIR}/configure.ac
|
||||
configure_lines
|
||||
LIMIT_COUNT
|
||||
50 # Read only the first 50 lines of the file
|
||||
)
|
||||
|
||||
set(
|
||||
SEARCHED_VARIABLES
|
||||
"pcre2_major"
|
||||
"pcre2_minor"
|
||||
"pcre2_prerelease"
|
||||
"pcre2_date"
|
||||
"libpcre2_posix_version"
|
||||
"libpcre2_8_version"
|
||||
"libpcre2_16_version"
|
||||
"libpcre2_32_version"
|
||||
)
|
||||
foreach(configure_line ${configure_lines})
|
||||
foreach(substitution_variable ${SEARCHED_VARIABLES})
|
||||
string(TOUPPER ${substitution_variable} substitution_variable_upper)
|
||||
if(NOT ${substitution_variable_upper})
|
||||
string(REGEX MATCH "m4_define\\(${substitution_variable}, *\\[(.*)\\]" MATCHED_STRING ${configure_line})
|
||||
if(CMAKE_MATCH_1)
|
||||
set(${substitution_variable_upper} ${CMAKE_MATCH_1})
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
endforeach()
|
||||
|
||||
macro(PARSE_LIB_VERSION variable_prefix)
|
||||
string(REPLACE ":" ";" ${variable_prefix}_VERSION_LIST ${${variable_prefix}_VERSION})
|
||||
list(GET ${variable_prefix}_VERSION_LIST 0 ${variable_prefix}_VERSION_CURRENT)
|
||||
list(GET ${variable_prefix}_VERSION_LIST 1 ${variable_prefix}_VERSION_REVISION)
|
||||
list(GET ${variable_prefix}_VERSION_LIST 2 ${variable_prefix}_VERSION_AGE)
|
||||
|
||||
math(EXPR ${variable_prefix}_SOVERSION "${${variable_prefix}_VERSION_CURRENT} - ${${variable_prefix}_VERSION_AGE}")
|
||||
math(EXPR ${variable_prefix}_MACHO_COMPATIBILITY_VERSION "${${variable_prefix}_VERSION_CURRENT} + 1")
|
||||
math(EXPR ${variable_prefix}_MACHO_CURRENT_VERSION "${${variable_prefix}_VERSION_CURRENT} + 1")
|
||||
set(
|
||||
${variable_prefix}_MACHO_CURRENT_VERSION
|
||||
"${${variable_prefix}_MACHO_CURRENT_VERSION}.${${variable_prefix}_VERSION_REVISION}}"
|
||||
)
|
||||
set(
|
||||
${variable_prefix}_VERSION
|
||||
"${${variable_prefix}_SOVERSION}.${${variable_prefix}_VERSION_AGE}.${${variable_prefix}_VERSION_REVISION}"
|
||||
)
|
||||
endmacro()
|
||||
|
||||
parse_lib_version(LIBPCRE2_POSIX)
|
||||
parse_lib_version(LIBPCRE2_8)
|
||||
parse_lib_version(LIBPCRE2_16)
|
||||
parse_lib_version(LIBPCRE2_32)
|
||||
|
||||
configure_file(src/pcre2.h.in ${PROJECT_BINARY_DIR}/pcre2.h @ONLY)
|
||||
|
||||
# Make sure to not link debug libs
|
||||
# against release libs and vice versa
|
||||
if(WIN32)
|
||||
set(CMAKE_DEBUG_POSTFIX "d")
|
||||
endif()
|
||||
|
||||
# Character table generation
|
||||
|
||||
option(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)
|
||||
if(PCRE2_REBUILD_CHARTABLES)
|
||||
add_executable(pcre2_dftables src/pcre2_dftables.c)
|
||||
add_custom_command(
|
||||
OUTPUT ${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||
COMMAND pcre2_dftables
|
||||
ARGS ${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||
DEPENDS pcre2_dftables
|
||||
COMMENT "Generating character tables (pcre2_chartables.c) for current locale"
|
||||
VERBATIM
|
||||
)
|
||||
else()
|
||||
configure_file(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.dist ${PROJECT_BINARY_DIR}/pcre2_chartables.c COPYONLY)
|
||||
endif()
|
||||
|
||||
# Source code
|
||||
|
||||
set(PCRE2_HEADERS ${PROJECT_BINARY_DIR}/pcre2.h)
|
||||
|
||||
set(
|
||||
PCRE2_SOURCES
|
||||
src/pcre2_auto_possess.c
|
||||
${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||
src/pcre2_chkdint.c
|
||||
src/pcre2_compile.c
|
||||
src/pcre2_compile_class.c
|
||||
src/pcre2_config.c
|
||||
src/pcre2_context.c
|
||||
src/pcre2_convert.c
|
||||
src/pcre2_dfa_match.c
|
||||
src/pcre2_error.c
|
||||
src/pcre2_extuni.c
|
||||
src/pcre2_find_bracket.c
|
||||
src/pcre2_jit_compile.c
|
||||
src/pcre2_maketables.c
|
||||
src/pcre2_match.c
|
||||
src/pcre2_match_data.c
|
||||
src/pcre2_newline.c
|
||||
src/pcre2_ord2utf.c
|
||||
src/pcre2_pattern_info.c
|
||||
src/pcre2_script_run.c
|
||||
src/pcre2_serialize.c
|
||||
src/pcre2_string_utils.c
|
||||
src/pcre2_study.c
|
||||
src/pcre2_substitute.c
|
||||
src/pcre2_substring.c
|
||||
src/pcre2_tables.c
|
||||
src/pcre2_ucd.c
|
||||
src/pcre2_valid_utf.c
|
||||
src/pcre2_xclass.c
|
||||
)
|
||||
|
||||
set(PCRE2POSIX_HEADERS src/pcre2posix.h)
|
||||
set(PCRE2POSIX_SOURCES src/pcre2posix.c)
|
||||
|
||||
if(MINGW AND BUILD_SHARED_LIBS)
|
||||
if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
|
||||
add_custom_command(
|
||||
OUTPUT ${PROJECT_SOURCE_DIR}/pcre2.o PRE-LINK
|
||||
COMMAND windres
|
||||
ARGS pcre2.rc pcre2.o
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
COMMENT "Using pcre2 coff info in mingw build"
|
||||
)
|
||||
set(PCRE2_SOURCES ${PCRE2_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2.o)
|
||||
endif()
|
||||
|
||||
if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
|
||||
add_custom_command(
|
||||
OUTPUT ${PROJECT_SOURCE_DIR}/pcre2posix.o PRE-LINK
|
||||
COMMAND windres
|
||||
ARGS pcre2posix.rc pcre2posix.o
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
COMMENT "Using pcre2posix coff info in mingw build"
|
||||
)
|
||||
set(PCRE2POSIX_SOURCES ${PCRE2POSIX_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2posix.o)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(MSVC AND BUILD_SHARED_LIBS)
|
||||
if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
|
||||
set(PCRE2_SOURCES ${PCRE2_SOURCES} pcre2.rc)
|
||||
endif()
|
||||
|
||||
if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
|
||||
set(PCRE2POSIX_SOURCES ${PCRE2POSIX_SOURCES} pcre2posix.rc)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Fix static compilation with MSVC: https://bugs.exim.org/show_bug.cgi?id=1681
|
||||
# This code was taken from the CMake wiki, not from WebM.
|
||||
|
||||
if(MSVC AND PCRE2_STATIC_RUNTIME)
|
||||
message(STATUS "** MSVC and PCRE2_STATIC_RUNTIME: modifying compiler flags to use static runtime library")
|
||||
foreach(
|
||||
flag_var
|
||||
CMAKE_C_FLAGS
|
||||
CMAKE_C_FLAGS_DEBUG
|
||||
CMAKE_C_FLAGS_RELEASE
|
||||
CMAKE_C_FLAGS_MINSIZEREL
|
||||
CMAKE_C_FLAGS_RELWITHDEBINFO
|
||||
)
|
||||
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
# Build setup
|
||||
|
||||
add_compile_definitions(HAVE_CONFIG_H)
|
||||
|
||||
if(PCRE2_DEBUG STREQUAL "IfDebugBuild")
|
||||
add_compile_definitions("$<$<CONFIG:Debug>:PCRE2_DEBUG>")
|
||||
elseif(PCRE2_DEBUG)
|
||||
add_compile_definitions("PCRE2_DEBUG")
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
add_compile_definitions(_CRT_SECURE_NO_DEPRECATE _CRT_SECURE_NO_WARNINGS)
|
||||
endif()
|
||||
|
||||
set(CMAKE_INCLUDE_CURRENT_DIR 1)
|
||||
|
||||
set(TARGETS)
|
||||
|
||||
# 8-bit library
|
||||
|
||||
if(PCRE2_BUILD_PCRE2_8)
|
||||
if(BUILD_STATIC_LIBS)
|
||||
add_library(pcre2-8-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
set_target_properties(
|
||||
pcre2-8-static
|
||||
PROPERTIES
|
||||
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
|
||||
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}"
|
||||
MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}"
|
||||
VERSION ${LIBPCRE2_8_VERSION}
|
||||
SOVERSION ${LIBPCRE2_8_SOVERSION}
|
||||
)
|
||||
target_compile_definitions(pcre2-8-static PUBLIC PCRE2_STATIC)
|
||||
target_include_directories(pcre2-8-static PUBLIC ${PROJECT_BINARY_DIR})
|
||||
if(REQUIRE_PTHREAD)
|
||||
target_link_libraries(pcre2-8-static Threads::Threads)
|
||||
endif()
|
||||
set(TARGETS ${TARGETS} pcre2-8-static)
|
||||
add_library(pcre2-posix-static STATIC ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
|
||||
set_target_properties(
|
||||
pcre2-posix-static
|
||||
PROPERTIES
|
||||
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
|
||||
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}"
|
||||
MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}"
|
||||
VERSION ${LIBPCRE2_POSIX_VERSION}
|
||||
SOVERSION ${LIBPCRE2_POSIX_SOVERSION}
|
||||
)
|
||||
target_link_libraries(pcre2-posix-static pcre2-8-static)
|
||||
target_include_directories(pcre2-posix-static PUBLIC ${PROJECT_SOURCE_DIR}/src)
|
||||
set(TARGETS ${TARGETS} pcre2-posix-static)
|
||||
|
||||
if(MSVC)
|
||||
set_target_properties(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8-static)
|
||||
set_target_properties(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix-static)
|
||||
else()
|
||||
set_target_properties(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8)
|
||||
set_target_properties(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix)
|
||||
endif()
|
||||
if(PCRE2_STATIC_PIC)
|
||||
set_target_properties(pcre2-8-static pcre2-posix-static PROPERTIES POSITION_INDEPENDENT_CODE 1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(BUILD_SHARED_LIBS)
|
||||
add_library(pcre2-8-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
target_include_directories(pcre2-8-shared PUBLIC ${PROJECT_BINARY_DIR})
|
||||
set_target_properties(
|
||||
pcre2-8-shared
|
||||
PROPERTIES
|
||||
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
|
||||
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}"
|
||||
MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}"
|
||||
VERSION ${LIBPCRE2_8_VERSION}
|
||||
SOVERSION ${LIBPCRE2_8_SOVERSION}
|
||||
OUTPUT_NAME pcre2-8
|
||||
)
|
||||
if(REQUIRE_PTHREAD)
|
||||
target_link_libraries(pcre2-8-shared Threads::Threads)
|
||||
endif()
|
||||
set(TARGETS ${TARGETS} pcre2-8-shared)
|
||||
set(DLL_PDB_FILES $<TARGET_PDB_FILE_DIR:pcre2-8-shared>/pcre2-8.pdb ${DLL_PDB_FILES})
|
||||
set(DLL_PDB_DEBUG_FILES $<TARGET_PDB_FILE_DIR:pcre2-8-shared>/pcre2-8d.pdb ${DLL_PDB_DEBUG_FILES})
|
||||
|
||||
add_library(pcre2-posix-shared SHARED ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
|
||||
target_include_directories(pcre2-posix-shared PUBLIC ${PROJECT_SOURCE_DIR}/src)
|
||||
set_target_properties(
|
||||
pcre2-posix-shared
|
||||
PROPERTIES
|
||||
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
|
||||
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}"
|
||||
MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}"
|
||||
VERSION ${LIBPCRE2_POSIX_VERSION}
|
||||
SOVERSION ${LIBPCRE2_POSIX_SOVERSION}
|
||||
OUTPUT_NAME pcre2-posix
|
||||
)
|
||||
set(PCRE2POSIX_CFLAG "-DPCRE2POSIX_SHARED")
|
||||
target_compile_definitions(pcre2-posix-shared PUBLIC ${PCRE2POSIX_CFLAG})
|
||||
target_link_libraries(pcre2-posix-shared pcre2-8-shared)
|
||||
set(TARGETS ${TARGETS} pcre2-posix-shared)
|
||||
set(DLL_PDB_FILES $<TARGET_PDB_FILE_DIR:pcre2-posix-shared>/pcre2-posix.pdb ${DLL_PDB_FILES})
|
||||
set(DLL_PDB_DEBUG_FILES $<TARGET_PDB_FILE_DIR:pcre2-posix-shared>/pcre2-posixd.pdb ${DLL_PDB_DEBUG_FILES})
|
||||
|
||||
if(MINGW)
|
||||
if(NON_STANDARD_LIB_PREFIX)
|
||||
set_target_properties(pcre2-8-shared pcre2-posix-shared PROPERTIES PREFIX "")
|
||||
endif()
|
||||
if(NON_STANDARD_LIB_SUFFIX)
|
||||
set_target_properties(pcre2-8-shared pcre2-posix-shared PROPERTIES SUFFIX "-0.dll")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(BUILD_STATIC_LIBS)
|
||||
add_library(pcre2-8 ALIAS pcre2-8-static)
|
||||
add_library(pcre2-posix ALIAS pcre2-posix-static)
|
||||
else()
|
||||
add_library(pcre2-8 ALIAS pcre2-8-shared)
|
||||
add_library(pcre2-posix ALIAS pcre2-posix-shared)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# 16-bit library
|
||||
|
||||
if(PCRE2_BUILD_PCRE2_16)
|
||||
if(BUILD_STATIC_LIBS)
|
||||
add_library(pcre2-16-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
target_include_directories(pcre2-16-static PUBLIC ${PROJECT_BINARY_DIR})
|
||||
set_target_properties(
|
||||
pcre2-16-static
|
||||
PROPERTIES
|
||||
UNITY_BUILD OFF
|
||||
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16
|
||||
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
|
||||
MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
|
||||
VERSION ${LIBPCRE2_16_VERSION}
|
||||
SOVERSION ${LIBPCRE2_16_SOVERSION}
|
||||
)
|
||||
target_compile_definitions(pcre2-16-static PUBLIC PCRE2_STATIC)
|
||||
if(REQUIRE_PTHREAD)
|
||||
target_link_libraries(pcre2-16-static Threads::Threads)
|
||||
endif()
|
||||
set(TARGETS ${TARGETS} pcre2-16-static)
|
||||
|
||||
if(MSVC)
|
||||
set_target_properties(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16-static)
|
||||
else()
|
||||
set_target_properties(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16)
|
||||
endif()
|
||||
if(PCRE2_STATIC_PIC)
|
||||
set_target_properties(pcre2-16-static PROPERTIES POSITION_INDEPENDENT_CODE 1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(BUILD_SHARED_LIBS)
|
||||
add_library(pcre2-16-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
target_include_directories(pcre2-16-shared PUBLIC ${PROJECT_BINARY_DIR})
|
||||
set_target_properties(
|
||||
pcre2-16-shared
|
||||
PROPERTIES
|
||||
UNITY_BUILD OFF
|
||||
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16
|
||||
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
|
||||
MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
|
||||
VERSION ${LIBPCRE2_16_VERSION}
|
||||
SOVERSION ${LIBPCRE2_16_SOVERSION}
|
||||
OUTPUT_NAME pcre2-16
|
||||
)
|
||||
if(REQUIRE_PTHREAD)
|
||||
target_link_libraries(pcre2-16-shared Threads::Threads)
|
||||
endif()
|
||||
set(TARGETS ${TARGETS} pcre2-16-shared)
|
||||
set(DLL_PDB_FILES $<TARGET_PDB_FILE_DIR:pcre2-16-shared>/pcre2-16.pdb ${DLL_PDB_FILES})
|
||||
set(DLL_PDB_DEBUG_FILES $<TARGET_PDB_FILE_DIR:pcre2-16-shared>/pcre2-16d.pdb ${DLL_PDB_DEBUG_FILES})
|
||||
|
||||
if(MINGW)
|
||||
if(NON_STANDARD_LIB_PREFIX)
|
||||
set_target_properties(pcre2-16-shared PROPERTIES PREFIX "")
|
||||
endif()
|
||||
if(NON_STANDARD_LIB_SUFFIX)
|
||||
set_target_properties(pcre2-16-shared PROPERTIES SUFFIX "-0.dll")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(BUILD_STATIC_LIBS)
|
||||
add_library(pcre2-16 ALIAS pcre2-16-static)
|
||||
else()
|
||||
add_library(pcre2-16 ALIAS pcre2-16-shared)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# 32-bit library
|
||||
|
||||
if(PCRE2_BUILD_PCRE2_32)
|
||||
if(BUILD_STATIC_LIBS)
|
||||
add_library(pcre2-32-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
target_include_directories(pcre2-32-static PUBLIC ${PROJECT_BINARY_DIR})
|
||||
set_target_properties(
|
||||
pcre2-32-static
|
||||
PROPERTIES
|
||||
UNITY_BUILD OFF
|
||||
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32
|
||||
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
|
||||
MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
|
||||
VERSION ${LIBPCRE2_32_VERSION}
|
||||
SOVERSION ${LIBPCRE2_32_SOVERSION}
|
||||
)
|
||||
target_compile_definitions(pcre2-32-static PUBLIC PCRE2_STATIC)
|
||||
if(REQUIRE_PTHREAD)
|
||||
target_link_libraries(pcre2-32-static Threads::Threads)
|
||||
endif()
|
||||
set(TARGETS ${TARGETS} pcre2-32-static)
|
||||
|
||||
if(MSVC)
|
||||
set_target_properties(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32-static)
|
||||
else()
|
||||
set_target_properties(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32)
|
||||
endif()
|
||||
if(PCRE2_STATIC_PIC)
|
||||
set_target_properties(pcre2-32-static PROPERTIES POSITION_INDEPENDENT_CODE 1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(BUILD_SHARED_LIBS)
|
||||
add_library(pcre2-32-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
target_include_directories(pcre2-32-shared PUBLIC ${PROJECT_BINARY_DIR})
|
||||
set_target_properties(
|
||||
pcre2-32-shared
|
||||
PROPERTIES
|
||||
UNITY_BUILD OFF
|
||||
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32
|
||||
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
|
||||
MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
|
||||
VERSION ${LIBPCRE2_32_VERSION}
|
||||
SOVERSION ${LIBPCRE2_32_SOVERSION}
|
||||
OUTPUT_NAME pcre2-32
|
||||
)
|
||||
if(REQUIRE_PTHREAD)
|
||||
target_link_libraries(pcre2-32-shared Threads::Threads)
|
||||
endif()
|
||||
set(TARGETS ${TARGETS} pcre2-32-shared)
|
||||
set(DLL_PDB_FILES $<TARGET_PDB_FILE_DIR:pcre2-32-shared>/pcre2-32.pdb ${DLL_PDB_FILES})
|
||||
set(DLL_PDB_DEBUG_FILES $<TARGET_PDB_FILE_DIR:pcre2-32-shared>/pcre2-32d.pdb ${DLL_PDB_DEBUG_FILES})
|
||||
|
||||
if(MINGW)
|
||||
if(NON_STANDARD_LIB_PREFIX)
|
||||
set_target_properties(pcre2-32-shared PROPERTIES PREFIX "")
|
||||
endif()
|
||||
if(NON_STANDARD_LIB_SUFFIX)
|
||||
set_target_properties(pcre2-32-shared PROPERTIES SUFFIX "-0.dll")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(BUILD_STATIC_LIBS)
|
||||
add_library(pcre2-32 ALIAS pcre2-32-static)
|
||||
else()
|
||||
add_library(pcre2-32 ALIAS pcre2-32-shared)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Generate pkg-config files
|
||||
|
||||
set(PACKAGE_VERSION "${PCRE2_MAJOR}.${PCRE2_MINOR}")
|
||||
set(prefix ${CMAKE_INSTALL_PREFIX})
|
||||
set(exec_prefix "\${prefix}")
|
||||
set(libdir "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}")
|
||||
set(includedir "\${prefix}/include")
|
||||
if(WIN32 AND (CMAKE_BUILD_TYPE MATCHES Debug))
|
||||
set(LIB_POSTFIX ${CMAKE_DEBUG_POSTFIX})
|
||||
endif()
|
||||
|
||||
if(PCRE2_BUILD_PCRE2_8)
|
||||
configure_file(libpcre2-posix.pc.in libpcre2-posix.pc @ONLY)
|
||||
list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-posix.pc")
|
||||
configure_file(libpcre2-8.pc.in libpcre2-8.pc @ONLY)
|
||||
list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-8.pc")
|
||||
set(enable_pcre2_8 "yes")
|
||||
else()
|
||||
set(enable_pcre2_8 "no")
|
||||
endif()
|
||||
|
||||
if(PCRE2_BUILD_PCRE2_16)
|
||||
configure_file(libpcre2-16.pc.in libpcre2-16.pc @ONLY)
|
||||
list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-16.pc")
|
||||
set(enable_pcre2_16 "yes")
|
||||
else()
|
||||
set(enable_pcre2_16 "no")
|
||||
endif()
|
||||
|
||||
if(PCRE2_BUILD_PCRE2_32)
|
||||
configure_file(libpcre2-32.pc.in libpcre2-32.pc @ONLY)
|
||||
list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-32.pc")
|
||||
set(enable_pcre2_32 "yes")
|
||||
else()
|
||||
set(enable_pcre2_32 "no")
|
||||
endif()
|
||||
|
||||
configure_file(pcre2-config.in pcre2-config @ONLY NEWLINE_STYLE LF)
|
||||
|
||||
# Executables
|
||||
|
||||
if(PCRE2_BUILD_PCRE2GREP)
|
||||
add_executable(pcre2grep src/pcre2grep.c)
|
||||
set_property(TARGET pcre2grep PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
||||
set(TARGETS ${TARGETS} pcre2grep)
|
||||
target_link_libraries(pcre2grep pcre2-posix ${PCRE2GREP_LIBS})
|
||||
endif()
|
||||
|
||||
# Testing
|
||||
|
||||
if(PCRE2_BUILD_TESTS)
|
||||
enable_testing()
|
||||
|
||||
set(PCRE2TEST_SOURCES src/pcre2test.c)
|
||||
|
||||
if(MSVC)
|
||||
# This is needed to avoid a stack overflow error in the standard tests. The
|
||||
# flag should be indicated with a forward-slash instead of a hyphen, but
|
||||
# then CMake treats it as a file path.
|
||||
set(PCRE2TEST_LINKER_FLAGS -STACK:2500000)
|
||||
endif()
|
||||
|
||||
add_executable(pcre2test ${PCRE2TEST_SOURCES})
|
||||
set(TARGETS ${TARGETS} pcre2test)
|
||||
if(PCRE2_BUILD_PCRE2_8)
|
||||
list(APPEND PCRE2TEST_LIBS pcre2-posix pcre2-8)
|
||||
endif()
|
||||
if(PCRE2_BUILD_PCRE2_16)
|
||||
list(APPEND PCRE2TEST_LIBS pcre2-16)
|
||||
endif()
|
||||
if(PCRE2_BUILD_PCRE2_32)
|
||||
list(APPEND PCRE2TEST_LIBS pcre2-32)
|
||||
endif()
|
||||
target_link_libraries(pcre2test ${PCRE2TEST_LIBS} ${PCRE2TEST_LINKER_FLAGS})
|
||||
|
||||
if(PCRE2_BUILD_PCRE2_8)
|
||||
add_executable(pcre2posix_test src/pcre2posix_test.c)
|
||||
target_link_libraries(pcre2posix_test pcre2-posix pcre2-8)
|
||||
endif()
|
||||
|
||||
if(PCRE2_SUPPORT_JIT)
|
||||
add_executable(pcre2_jit_test src/pcre2_jit_test.c)
|
||||
set(PCRE2_JIT_TEST_LIBS)
|
||||
if(PCRE2_BUILD_PCRE2_8)
|
||||
list(APPEND PCRE2_JIT_TEST_LIBS pcre2-8)
|
||||
endif()
|
||||
if(PCRE2_BUILD_PCRE2_16)
|
||||
list(APPEND PCRE2_JIT_TEST_LIBS pcre2-16)
|
||||
endif()
|
||||
if(PCRE2_BUILD_PCRE2_32)
|
||||
list(APPEND PCRE2_JIT_TEST_LIBS pcre2-32)
|
||||
endif()
|
||||
target_link_libraries(pcre2_jit_test ${PCRE2_JIT_TEST_LIBS})
|
||||
endif()
|
||||
|
||||
# =================================================
|
||||
# Write out a CTest configuration file
|
||||
#
|
||||
file(
|
||||
WRITE
|
||||
${PROJECT_BINARY_DIR}/CTestCustom.ctest
|
||||
"# This is a generated file.
|
||||
MESSAGE(\"When testing is complete, review test output in the
|
||||
\\\"${PROJECT_BINARY_DIR}/Testing/Temporary\\\" folder.\")
|
||||
MESSAGE(\" \")
|
||||
"
|
||||
)
|
||||
|
||||
file(
|
||||
WRITE
|
||||
${PROJECT_BINARY_DIR}/pcre2_test.sh
|
||||
"#! /bin/sh
|
||||
# This is a generated file.
|
||||
srcdir=${PROJECT_SOURCE_DIR}
|
||||
pcre2test=${PROJECT_BINARY_DIR}/pcre2test
|
||||
test -z \"$CMAKE_CONFIG_TYPE\" || pcre2test=${PROJECT_BINARY_DIR}/$CMAKE_CONFIG_TYPE/pcre2test
|
||||
. ${PROJECT_SOURCE_DIR}/RunTest
|
||||
if test \"$?\" != \"0\"; then exit 1; fi
|
||||
# End
|
||||
"
|
||||
)
|
||||
|
||||
if(UNIX)
|
||||
add_test(pcre2_test sh ${PROJECT_BINARY_DIR}/pcre2_test.sh)
|
||||
endif()
|
||||
|
||||
if(PCRE2_BUILD_PCRE2GREP)
|
||||
file(
|
||||
WRITE
|
||||
${PROJECT_BINARY_DIR}/pcre2_grep_test.sh
|
||||
"#! /bin/sh
|
||||
# This is a generated file.
|
||||
srcdir=${PROJECT_SOURCE_DIR}
|
||||
pcre2grep=${PROJECT_BINARY_DIR}/pcre2grep
|
||||
test -z \"$CMAKE_CONFIG_TYPE\" || pcre2grep=${PROJECT_BINARY_DIR}/$CMAKE_CONFIG_TYPE/pcre2grep
|
||||
pcre2test=${PROJECT_BINARY_DIR}/pcre2test
|
||||
test -z \"$CMAKE_CONFIG_TYPE\" || pcre2test=${PROJECT_BINARY_DIR}/$CMAKE_CONFIG_TYPE/pcre2test
|
||||
. ${PROJECT_SOURCE_DIR}/RunGrepTest
|
||||
if test \"$?\" != \"0\"; then exit 1; fi
|
||||
# End
|
||||
"
|
||||
)
|
||||
|
||||
if(UNIX)
|
||||
add_test(pcre2_grep_test sh ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
# Provide environment for executing the bat file version of RunTest
|
||||
file(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} winsrc)
|
||||
file(TO_NATIVE_PATH ${PROJECT_BINARY_DIR} winbin)
|
||||
|
||||
file(
|
||||
WRITE
|
||||
${PROJECT_BINARY_DIR}/pcre2_test.bat
|
||||
"\@REM This is a generated file.
|
||||
\@echo off
|
||||
setlocal
|
||||
SET srcdir=\"${winsrc}\"
|
||||
SET pcre2test=\"${winbin}\\pcre2test.exe\"
|
||||
if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2test=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2test.exe\"
|
||||
call %srcdir%\\RunTest.bat
|
||||
if errorlevel 1 exit /b 1
|
||||
echo RunTest.bat tests successfully completed
|
||||
"
|
||||
)
|
||||
|
||||
add_test(NAME pcre2_test_bat COMMAND pcre2_test.bat)
|
||||
set_tests_properties(pcre2_test_bat PROPERTIES PASS_REGULAR_EXPRESSION "RunTest\\.bat tests successfully completed")
|
||||
|
||||
if(PCRE2_BUILD_PCRE2GREP)
|
||||
file(
|
||||
WRITE
|
||||
${PROJECT_BINARY_DIR}/pcre2_grep_test.bat
|
||||
"\@REM This is a generated file.
|
||||
\@echo off
|
||||
setlocal
|
||||
SET srcdir=\"${winsrc}\"
|
||||
SET pcre2test=\"${winbin}\\pcre2test.exe\"
|
||||
if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2test=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2test.exe\"
|
||||
SET pcre2grep=\"${winbin}\\pcre2grep.exe\"
|
||||
if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2grep=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2grep.exe\"
|
||||
call %srcdir%\\RunGrepTest.bat
|
||||
if errorlevel 1 exit /b 1
|
||||
echo RunGrepTest.bat tests successfully completed
|
||||
"
|
||||
)
|
||||
|
||||
add_test(NAME pcre2_grep_test_bat COMMAND pcre2_grep_test.bat)
|
||||
set_tests_properties(
|
||||
pcre2_grep_test_bat
|
||||
PROPERTIES PASS_REGULAR_EXPRESSION "RunGrepTest\\.bat tests successfully completed"
|
||||
)
|
||||
endif()
|
||||
|
||||
if("$ENV{OSTYPE}" STREQUAL "msys")
|
||||
# Both the sh and bat file versions of RunTest are run if make test is used
|
||||
# in msys
|
||||
add_test(pcre2_test_sh sh.exe ${PROJECT_BINARY_DIR}/pcre2_test.sh)
|
||||
if(PCRE2_BUILD_PCRE2GREP)
|
||||
add_test(pcre2_grep_test sh.exe ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Changed to accommodate testing whichever location was just built
|
||||
|
||||
if(PCRE2_SUPPORT_JIT)
|
||||
add_test(pcre2_jit_test pcre2_jit_test)
|
||||
endif()
|
||||
|
||||
if(PCRE2_BUILD_PCRE2_8)
|
||||
add_test(pcre2posix_test pcre2posix_test)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Installation
|
||||
|
||||
set(CMAKE_INSTALL_ALWAYS 1)
|
||||
|
||||
install(
|
||||
TARGETS ${TARGETS}
|
||||
RUNTIME DESTINATION bin
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
)
|
||||
install(FILES ${pkg_config_files} DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
|
||||
install(
|
||||
FILES "${CMAKE_CURRENT_BINARY_DIR}/pcre2-config"
|
||||
DESTINATION bin
|
||||
# Set 0755 permissions
|
||||
PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE
|
||||
)
|
||||
|
||||
install(FILES ${PCRE2_HEADERS} ${PCRE2POSIX_HEADERS} DESTINATION include)
|
||||
|
||||
# CMake config files.
|
||||
set(PCRE2_CONFIG_IN ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config.cmake.in)
|
||||
set(PCRE2_CONFIG_OUT ${CMAKE_CURRENT_BINARY_DIR}/cmake/pcre2-config.cmake)
|
||||
configure_file(${PCRE2_CONFIG_IN} ${PCRE2_CONFIG_OUT} @ONLY)
|
||||
set(PCRE2_CONFIG_VERSION_IN ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config-version.cmake.in)
|
||||
set(PCRE2_CONFIG_VERSION_OUT ${CMAKE_CURRENT_BINARY_DIR}/cmake/pcre2-config-version.cmake)
|
||||
configure_file(${PCRE2_CONFIG_VERSION_IN} ${PCRE2_CONFIG_VERSION_OUT} @ONLY)
|
||||
install(FILES ${PCRE2_CONFIG_OUT} ${PCRE2_CONFIG_VERSION_OUT} DESTINATION "${PCRE2_INSTALL_CMAKEDIR}")
|
||||
|
||||
file(GLOB html ${PROJECT_SOURCE_DIR}/doc/html/*.html ${PROJECT_SOURCE_DIR}/doc/html/*.txt)
|
||||
file(
|
||||
GLOB txts
|
||||
${PROJECT_SOURCE_DIR}/doc/*.txt
|
||||
AUTHORS.md
|
||||
COPYING
|
||||
ChangeLog
|
||||
LICENCE.md
|
||||
NEWS
|
||||
README
|
||||
SECURITY.md
|
||||
)
|
||||
file(GLOB man1 ${PROJECT_SOURCE_DIR}/doc/*.1)
|
||||
file(GLOB man3 ${PROJECT_SOURCE_DIR}/doc/*.3)
|
||||
|
||||
install(FILES ${man1} DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
|
||||
install(FILES ${man3} DESTINATION ${CMAKE_INSTALL_MANDIR}/man3)
|
||||
install(FILES ${txts} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/doc/pcre2)
|
||||
install(FILES ${html} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/doc/pcre2/html)
|
||||
|
||||
if(MSVC AND INSTALL_MSVC_PDB)
|
||||
install(FILES ${DLL_PDB_FILES} DESTINATION bin CONFIGURATIONS RelWithDebInfo)
|
||||
install(FILES ${DLL_PDB_DEBUG_FILES} DESTINATION bin CONFIGURATIONS Debug)
|
||||
endif()
|
||||
|
||||
# Help, only for nice output
|
||||
if(BUILD_STATIC_LIBS)
|
||||
set(BUILD_STATIC_LIBS ON)
|
||||
else()
|
||||
set(BUILD_STATIC_LIBS OFF)
|
||||
endif()
|
||||
|
||||
if(PCRE2_HEAP_MATCH_RECURSE)
|
||||
message(WARNING "HEAP_MATCH_RECURSE is obsolete and does nothing.")
|
||||
endif()
|
||||
|
||||
if(PCRE2_SHOW_REPORT)
|
||||
message(STATUS "")
|
||||
message(STATUS "")
|
||||
message(STATUS "PCRE2-${PCRE2_MAJOR}.${PCRE2_MINOR} configuration summary:")
|
||||
message(STATUS "")
|
||||
message(STATUS " Install prefix .................... : ${CMAKE_INSTALL_PREFIX}")
|
||||
message(STATUS " C compiler ........................ : ${CMAKE_C_COMPILER}")
|
||||
|
||||
if(CMAKE_C_FLAGS)
|
||||
set(CFSP " ")
|
||||
endif()
|
||||
if(CMAKE_CONFIGURATION_TYPES)
|
||||
foreach(config IN LISTS CMAKE_CONFIGURATION_TYPES)
|
||||
string(TOUPPER "${config}" buildtype)
|
||||
string(LENGTH " (${config})" buildtypelen)
|
||||
math(EXPR dotslen "18 - ${buildtypelen}")
|
||||
string(REPEAT "." ${dotslen} dots)
|
||||
message(STATUS " C compiler flags (${config}) ${dots} : ${CMAKE_C_FLAGS}${CFSP}${CMAKE_C_FLAGS_${buildtype}}")
|
||||
endforeach()
|
||||
else()
|
||||
string(TOUPPER "${CMAKE_BUILD_TYPE}" buildtype)
|
||||
message(STATUS " C compiler flags .................. : ${CMAKE_C_FLAGS}${CFSP}${CMAKE_C_FLAGS_${buildtype}}")
|
||||
endif()
|
||||
|
||||
message(STATUS "")
|
||||
if(CMAKE_CONFIGURATION_TYPES)
|
||||
message(STATUS " Build configurations .............. : ${CMAKE_CONFIGURATION_TYPES}")
|
||||
else()
|
||||
message(STATUS " Build type ........................ : ${CMAKE_BUILD_TYPE}")
|
||||
endif()
|
||||
message(STATUS " Build 8 bit PCRE2 library ......... : ${PCRE2_BUILD_PCRE2_8}")
|
||||
message(STATUS " Build 16 bit PCRE2 library ........ : ${PCRE2_BUILD_PCRE2_16}")
|
||||
message(STATUS " Build 32 bit PCRE2 library ........ : ${PCRE2_BUILD_PCRE2_32}")
|
||||
message(STATUS " Include debugging code ............ : ${PCRE2_DEBUG}")
|
||||
message(STATUS " Enable JIT compiling support ...... : ${PCRE2_SUPPORT_JIT}")
|
||||
message(STATUS " Use SELinux allocator in JIT ...... : ${PCRE2_SUPPORT_JIT_SEALLOC}")
|
||||
message(STATUS " Enable Unicode support ............ : ${PCRE2_SUPPORT_UNICODE}")
|
||||
message(STATUS " Newline char/sequence ............. : ${PCRE2_NEWLINE}")
|
||||
message(STATUS " \\R matches only ANYCRLF ........... : ${PCRE2_SUPPORT_BSR_ANYCRLF}")
|
||||
message(STATUS " \\C is disabled .................... : ${PCRE2_NEVER_BACKSLASH_C}")
|
||||
message(STATUS " EBCDIC coding ..................... : ${PCRE2_EBCDIC}")
|
||||
message(STATUS " EBCDIC coding with NL=0x25 ........ : ${PCRE2_EBCDIC_NL25}")
|
||||
message(STATUS " Rebuild char tables ............... : ${PCRE2_REBUILD_CHARTABLES}")
|
||||
message(STATUS " Internal link size ................ : ${PCRE2_LINK_SIZE}")
|
||||
message(STATUS " Maximum variable lookbehind ....... : ${PCRE2_MAX_VARLOOKBEHIND}")
|
||||
message(STATUS " Parentheses nest limit ............ : ${PCRE2_PARENS_NEST_LIMIT}")
|
||||
message(STATUS " Heap limit ........................ : ${PCRE2_HEAP_LIMIT}")
|
||||
message(STATUS " Match limit ....................... : ${PCRE2_MATCH_LIMIT}")
|
||||
message(STATUS " Match depth limit ................. : ${PCRE2_MATCH_LIMIT_DEPTH}")
|
||||
message(STATUS " Build shared libs ................. : ${BUILD_SHARED_LIBS}")
|
||||
message(STATUS " Build static libs ................. : ${BUILD_STATIC_LIBS}")
|
||||
message(STATUS " with PIC enabled ............... : ${PCRE2_STATIC_PIC}")
|
||||
message(STATUS " Build pcre2grep ................... : ${PCRE2_BUILD_PCRE2GREP}")
|
||||
message(STATUS " Enable JIT in pcre2grep ........... : ${PCRE2GREP_SUPPORT_JIT}")
|
||||
message(STATUS " Enable callouts in pcre2grep ...... : ${PCRE2GREP_SUPPORT_CALLOUT}")
|
||||
message(STATUS " Enable callout fork in pcre2grep .. : ${PCRE2GREP_SUPPORT_CALLOUT_FORK}")
|
||||
message(STATUS " Buffer size for pcre2grep ......... : ${PCRE2GREP_BUFSIZE}")
|
||||
message(STATUS " Build tests (implies pcre2test .... : ${PCRE2_BUILD_TESTS}")
|
||||
message(STATUS " and pcre2grep)")
|
||||
if(ZLIB_FOUND)
|
||||
message(STATUS " Link pcre2grep with libz .......... : ${PCRE2_SUPPORT_LIBZ}")
|
||||
else()
|
||||
message(STATUS " Link pcre2grep with libz .......... : Library not found")
|
||||
endif()
|
||||
if(BZIP2_FOUND)
|
||||
message(STATUS " Link pcre2grep with libbz2 ........ : ${PCRE2_SUPPORT_LIBBZ2}")
|
||||
else()
|
||||
message(STATUS " Link pcre2grep with libbz2 ........ : Library not found")
|
||||
endif()
|
||||
if(EDITLINE_FOUND)
|
||||
message(STATUS " Link pcre2test with libeditline ... : ${PCRE2_SUPPORT_LIBEDIT}")
|
||||
else()
|
||||
message(STATUS " Link pcre2test with libeditline ... : Library not found")
|
||||
endif()
|
||||
if(READLINE_FOUND)
|
||||
message(STATUS " Link pcre2test with libreadline ... : ${PCRE2_SUPPORT_LIBREADLINE}")
|
||||
else()
|
||||
message(STATUS " Link pcre2test with libreadline ... : Library not found")
|
||||
endif()
|
||||
message(STATUS " Support Valgrind .................. : ${PCRE2_SUPPORT_VALGRIND}")
|
||||
if(PCRE2_DISABLE_PERCENT_ZT)
|
||||
message(STATUS " Use %zu and %td ................... : OFF")
|
||||
else()
|
||||
message(STATUS " Use %zu and %td ................... : AUTO")
|
||||
endif()
|
||||
|
||||
if(MINGW AND BUILD_SHARED_LIBS)
|
||||
message(STATUS " Non-standard dll names (prefix) ... : ${NON_STANDARD_LIB_PREFIX}")
|
||||
message(STATUS " Non-standard dll names (suffix) ... : ${NON_STANDARD_LIB_SUFFIX}")
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
message(STATUS " Install MSVC .pdb files ........... : ${INSTALL_MSVC_PDB}")
|
||||
endif()
|
||||
|
||||
message(STATUS "")
|
||||
endif()
|
||||
|
||||
# end CMakeLists.txt
|
||||
103
3rd/pcre2/LICENCE.md
Normal file
103
3rd/pcre2/LICENCE.md
Normal file
@@ -0,0 +1,103 @@
|
||||
PCRE2 License
|
||||
=============
|
||||
|
||||
| SPDX-License-Identifier: | BSD-3-Clause WITH PCRE2-exception |
|
||||
|---------|-------|
|
||||
|
||||
PCRE2 is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Releases 10.00 and above of PCRE2 are distributed under the terms of the "BSD"
|
||||
licence, as specified below, with one exemption for certain binary
|
||||
redistributions. The documentation for PCRE2, supplied in the "doc" directory,
|
||||
is distributed under the same terms as the software itself. The data in the
|
||||
testdata directory is not copyrighted and is in the public domain.
|
||||
|
||||
The basic library functions are written in C and are freestanding. Also
|
||||
included in the distribution is a just-in-time compiler that can be used to
|
||||
optimize pattern matching. This is an optional feature that can be omitted when
|
||||
the library is built.
|
||||
|
||||
|
||||
COPYRIGHT
|
||||
---------
|
||||
|
||||
### The basic library functions
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
|
||||
Retired from University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
Copyright (c) 2007-2024 Philip Hazel
|
||||
All rights reserved.
|
||||
|
||||
### PCRE2 Just-In-Time compilation support
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Email domain: freemail.hu
|
||||
|
||||
Copyright (c) 2010-2024 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
### Stack-less Just-In-Time compiler
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Email domain: freemail.hu
|
||||
|
||||
Copyright (c) 2009-2024 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
### All other contributions
|
||||
|
||||
Many other contributors have participated in the authorship of PCRE2. As PCRE2
|
||||
has never required a Contributor Licensing Agreement, or other copyright
|
||||
assignment agreement, all contributions have copyright retained by each
|
||||
original contributor or their employer.
|
||||
|
||||
|
||||
THE "BSD" LICENCE
|
||||
-----------------
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notices,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notices, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of any
|
||||
contributors may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
EXEMPTION FOR BINARY LIBRARY-LIKE PACKAGES
|
||||
------------------------------------------
|
||||
|
||||
The second condition in the BSD licence (covering binary redistributions) does
|
||||
not apply all the way down a chain of software. If binary package A includes
|
||||
PCRE2, it must respect the condition, but if package B is software that
|
||||
includes package A, the condition is not imposed on package B unless it uses
|
||||
PCRE2 independently.
|
||||
|
||||
End
|
||||
22
3rd/pcre2/cmake/COPYING-CMAKE-SCRIPTS
Normal file
22
3rd/pcre2/cmake/COPYING-CMAKE-SCRIPTS
Normal file
@@ -0,0 +1,22 @@
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. The name of the author may not be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
13
3rd/pcre2/cmake/FindEditline.cmake
Normal file
13
3rd/pcre2/cmake/FindEditline.cmake
Normal file
@@ -0,0 +1,13 @@
|
||||
# Modified from FindReadline.cmake (PH Feb 2012)
|
||||
|
||||
if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)
|
||||
set(EDITLINE_FOUND TRUE)
|
||||
else()
|
||||
find_path(EDITLINE_INCLUDE_DIR readline.h PATH_SUFFIXES editline edit/readline)
|
||||
|
||||
find_library(EDITLINE_LIBRARY NAMES edit)
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
|
||||
|
||||
mark_as_advanced(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
|
||||
endif()
|
||||
27
3rd/pcre2/cmake/FindReadline.cmake
Normal file
27
3rd/pcre2/cmake/FindReadline.cmake
Normal file
@@ -0,0 +1,27 @@
|
||||
# from http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/FindReadline.cmake
|
||||
# http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/COPYING-CMAKE-SCRIPTS
|
||||
# --> BSD licensed
|
||||
#
|
||||
# GNU Readline library finder
|
||||
if(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
|
||||
set(READLINE_FOUND TRUE)
|
||||
else()
|
||||
find_path(READLINE_INCLUDE_DIR readline/readline.h /usr/include/readline)
|
||||
|
||||
# 2008-04-22 The next clause used to read like this:
|
||||
#
|
||||
# FIND_LIBRARY(READLINE_LIBRARY NAMES readline)
|
||||
# FIND_LIBRARY(NCURSES_LIBRARY NAMES ncurses )
|
||||
# include(FindPackageHandleStandardArgs)
|
||||
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG NCURSES_LIBRARY READLINE_INCLUDE_DIR READLINE_LIBRARY )
|
||||
#
|
||||
# I was advised to modify it such that it will find an ncurses library if
|
||||
# required, but not if one was explicitly given, that is, it allows the
|
||||
# default to be overridden. PH
|
||||
|
||||
find_library(READLINE_LIBRARY NAMES readline)
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(Readline DEFAULT_MSG READLINE_INCLUDE_DIR READLINE_LIBRARY)
|
||||
|
||||
mark_as_advanced(READLINE_INCLUDE_DIR READLINE_LIBRARY)
|
||||
endif()
|
||||
14
3rd/pcre2/cmake/pcre2-config-version.cmake.in
Normal file
14
3rd/pcre2/cmake/pcre2-config-version.cmake.in
Normal file
@@ -0,0 +1,14 @@
|
||||
set(PACKAGE_VERSION_MAJOR @PCRE2_MAJOR@)
|
||||
set(PACKAGE_VERSION_MINOR @PCRE2_MINOR@)
|
||||
set(PACKAGE_VERSION_PATCH 0)
|
||||
set(PACKAGE_VERSION @PCRE2_MAJOR@.@PCRE2_MINOR@.0)
|
||||
|
||||
# Check whether the requested PACKAGE_FIND_VERSION is compatible
|
||||
if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION OR PACKAGE_VERSION_MAJOR GREATER PACKAGE_FIND_VERSION_MAJOR)
|
||||
set(PACKAGE_VERSION_COMPATIBLE FALSE)
|
||||
else()
|
||||
set(PACKAGE_VERSION_COMPATIBLE TRUE)
|
||||
if(PACKAGE_VERSION VERSION_EQUAL PACKAGE_FIND_VERSION)
|
||||
set(PACKAGE_VERSION_EXACT TRUE)
|
||||
endif()
|
||||
endif()
|
||||
168
3rd/pcre2/cmake/pcre2-config.cmake.in
Normal file
168
3rd/pcre2/cmake/pcre2-config.cmake.in
Normal file
@@ -0,0 +1,168 @@
|
||||
# pcre2-config.cmake
|
||||
# ----------------
|
||||
#
|
||||
# Finds the PCRE2 library, specify the starting search path in PCRE2_ROOT.
|
||||
#
|
||||
# Static vs. shared
|
||||
# -----------------
|
||||
# To make use of the static library instead of the shared one, one needs
|
||||
# to set the variable PCRE2_USE_STATIC_LIBS to ON before calling find_package.
|
||||
# Example:
|
||||
# set(PCRE2_USE_STATIC_LIBS ON)
|
||||
# find_package(PCRE2 CONFIG COMPONENTS 8BIT)
|
||||
#
|
||||
# This will define the following variables:
|
||||
#
|
||||
# PCRE2_FOUND - True if the system has the PCRE2 library.
|
||||
# PCRE2_VERSION - The version of the PCRE2 library which was found.
|
||||
#
|
||||
# and the following imported targets:
|
||||
#
|
||||
# PCRE2::8BIT - The 8 bit PCRE2 library.
|
||||
# PCRE2::16BIT - The 16 bit PCRE2 library.
|
||||
# PCRE2::32BIT - The 32 bit PCRE2 library.
|
||||
# PCRE2::POSIX - The POSIX PCRE2 library.
|
||||
|
||||
set(PCRE2_NON_STANDARD_LIB_PREFIX @NON_STANDARD_LIB_PREFIX@)
|
||||
set(PCRE2_NON_STANDARD_LIB_SUFFIX @NON_STANDARD_LIB_SUFFIX@)
|
||||
set(PCRE2_8BIT_NAME pcre2-8)
|
||||
set(PCRE2_16BIT_NAME pcre2-16)
|
||||
set(PCRE2_32BIT_NAME pcre2-32)
|
||||
set(PCRE2_POSIX_NAME pcre2-posix)
|
||||
find_path(PCRE2_INCLUDE_DIR NAMES pcre2.h DOC "PCRE2 include directory")
|
||||
if(PCRE2_USE_STATIC_LIBS)
|
||||
if(MSVC)
|
||||
set(PCRE2_8BIT_NAME pcre2-8-static)
|
||||
set(PCRE2_16BIT_NAME pcre2-16-static)
|
||||
set(PCRE2_32BIT_NAME pcre2-32-static)
|
||||
set(PCRE2_POSIX_NAME pcre2-posix-static)
|
||||
endif()
|
||||
|
||||
set(PCRE2_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX})
|
||||
set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
|
||||
else()
|
||||
set(PCRE2_PREFIX ${CMAKE_SHARED_LIBRARY_PREFIX})
|
||||
if(MINGW AND PCRE2_NON_STANDARD_LIB_PREFIX)
|
||||
set(PCRE2_PREFIX "")
|
||||
endif()
|
||||
|
||||
set(PCRE2_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
|
||||
if(MINGW AND PCRE2_NON_STANDARD_LIB_SUFFIX)
|
||||
set(PCRE2_SUFFIX "-0.dll")
|
||||
elseif(MSVC)
|
||||
set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
|
||||
endif()
|
||||
endif()
|
||||
find_library(
|
||||
PCRE2_8BIT_LIBRARY
|
||||
NAMES ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX}
|
||||
DOC "8 bit PCRE2 library"
|
||||
)
|
||||
find_library(
|
||||
PCRE2_16BIT_LIBRARY
|
||||
NAMES ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}d${PCRE2_SUFFIX}
|
||||
DOC "16 bit PCRE2 library"
|
||||
)
|
||||
find_library(
|
||||
PCRE2_32BIT_LIBRARY
|
||||
NAMES ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}d${PCRE2_SUFFIX}
|
||||
DOC "32 bit PCRE2 library"
|
||||
)
|
||||
find_library(
|
||||
PCRE2_POSIX_LIBRARY
|
||||
NAMES ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}d${PCRE2_SUFFIX}
|
||||
DOC "8 bit POSIX PCRE2 library"
|
||||
)
|
||||
unset(PCRE2_NON_STANDARD_LIB_PREFIX)
|
||||
unset(PCRE2_NON_STANDARD_LIB_SUFFIX)
|
||||
unset(PCRE2_8BIT_NAME)
|
||||
unset(PCRE2_16BIT_NAME)
|
||||
unset(PCRE2_32BIT_NAME)
|
||||
unset(PCRE2_POSIX_NAME)
|
||||
|
||||
# Set version
|
||||
if(PCRE2_INCLUDE_DIR)
|
||||
set(PCRE2_VERSION "@PCRE2_MAJOR@.@PCRE2_MINOR@.0")
|
||||
endif()
|
||||
|
||||
# Which components have been found.
|
||||
if(PCRE2_8BIT_LIBRARY)
|
||||
set(PCRE2_8BIT_FOUND TRUE)
|
||||
endif()
|
||||
if(PCRE2_16BIT_LIBRARY)
|
||||
set(PCRE2_16BIT_FOUND TRUE)
|
||||
endif()
|
||||
if(PCRE2_32BIT_LIBRARY)
|
||||
set(PCRE2_32BIT_FOUND TRUE)
|
||||
endif()
|
||||
if(PCRE2_POSIX_LIBRARY)
|
||||
set(PCRE2_POSIX_FOUND TRUE)
|
||||
endif()
|
||||
|
||||
# Check if at least one component has been specified.
|
||||
list(LENGTH PCRE2_FIND_COMPONENTS PCRE2_NCOMPONENTS)
|
||||
if(PCRE2_NCOMPONENTS LESS 1)
|
||||
message(FATAL_ERROR "No components have been specified. This is not allowed. Please, specify at least one component.")
|
||||
endif()
|
||||
unset(PCRE2_NCOMPONENTS)
|
||||
|
||||
# When POSIX component has been specified make sure that also 8BIT component is specified.
|
||||
set(PCRE2_8BIT_COMPONENT FALSE)
|
||||
set(PCRE2_POSIX_COMPONENT FALSE)
|
||||
foreach(component ${PCRE2_FIND_COMPONENTS})
|
||||
if(component STREQUAL "8BIT")
|
||||
set(PCRE2_8BIT_COMPONENT TRUE)
|
||||
elseif(component STREQUAL "POSIX")
|
||||
set(PCRE2_POSIX_COMPONENT TRUE)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
if(PCRE2_POSIX_COMPONENT AND NOT PCRE2_8BIT_COMPONENT)
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"The component POSIX is specified while the 8BIT one is not. This is not allowed. Please, also specify the 8BIT component."
|
||||
)
|
||||
endif()
|
||||
unset(PCRE2_8BIT_COMPONENT)
|
||||
unset(PCRE2_POSIX_COMPONENT)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
set(${CMAKE_FIND_PACKAGE_NAME}_CONFIG "${CMAKE_CURRENT_LIST_FILE}")
|
||||
find_package_handle_standard_args(
|
||||
PCRE2
|
||||
FOUND_VAR PCRE2_FOUND
|
||||
REQUIRED_VARS PCRE2_INCLUDE_DIR
|
||||
HANDLE_COMPONENTS
|
||||
VERSION_VAR PCRE2_VERSION
|
||||
CONFIG_MODE
|
||||
)
|
||||
|
||||
set(PCRE2_LIBRARIES)
|
||||
if(PCRE2_FOUND)
|
||||
foreach(component ${PCRE2_FIND_COMPONENTS})
|
||||
if(PCRE2_USE_STATIC_LIBS)
|
||||
add_library(PCRE2::${component} STATIC IMPORTED)
|
||||
target_compile_definitions(PCRE2::${component} INTERFACE PCRE2_STATIC)
|
||||
else()
|
||||
add_library(PCRE2::${component} SHARED IMPORTED)
|
||||
endif()
|
||||
set_target_properties(
|
||||
PCRE2::${component}
|
||||
PROPERTIES
|
||||
IMPORTED_LOCATION "${PCRE2_${component}_LIBRARY}"
|
||||
IMPORTED_IMPLIB "${PCRE2_${component}_LIBRARY}"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${PCRE2_INCLUDE_DIR}"
|
||||
)
|
||||
if(component STREQUAL "POSIX")
|
||||
set_target_properties(
|
||||
PCRE2::${component}
|
||||
PROPERTIES INTERFACE_LINK_LIBRARIES "PCRE2::8BIT" LINK_LIBRARIES "PCRE2::8BIT"
|
||||
)
|
||||
endif()
|
||||
|
||||
set(PCRE2_LIBRARIES ${PCRE2_LIBRARIES} ${PCRE2_${component}_LIBRARY})
|
||||
mark_as_advanced(PCRE2_${component}_LIBRARY)
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
mark_as_advanced(PCRE2_INCLUDE_DIR)
|
||||
58
3rd/pcre2/config-cmake.h.in
Normal file
58
3rd/pcre2/config-cmake.h.in
Normal file
@@ -0,0 +1,58 @@
|
||||
/* config.h for CMake builds */
|
||||
|
||||
#cmakedefine HAVE_ASSERT_H 1
|
||||
#cmakedefine HAVE_BUILTIN_ASSUME 1
|
||||
#cmakedefine HAVE_BUILTIN_MUL_OVERFLOW 1
|
||||
#cmakedefine HAVE_BUILTIN_UNREACHABLE 1
|
||||
#cmakedefine HAVE_ATTRIBUTE_UNINITIALIZED 1
|
||||
#cmakedefine HAVE_DIRENT_H 1
|
||||
#cmakedefine HAVE_SYS_STAT_H 1
|
||||
#cmakedefine HAVE_SYS_TYPES_H 1
|
||||
#cmakedefine HAVE_UNISTD_H 1
|
||||
#cmakedefine HAVE_WINDOWS_H 1
|
||||
|
||||
#cmakedefine HAVE_BCOPY 1
|
||||
#cmakedefine HAVE_MEMFD_CREATE 1
|
||||
#cmakedefine HAVE_MEMMOVE 1
|
||||
#cmakedefine HAVE_SECURE_GETENV 1
|
||||
#cmakedefine HAVE_STRERROR 1
|
||||
|
||||
#cmakedefine SUPPORT_PCRE2_8 1
|
||||
#cmakedefine SUPPORT_PCRE2_16 1
|
||||
#cmakedefine SUPPORT_PCRE2_32 1
|
||||
#cmakedefine DISABLE_PERCENT_ZT 1
|
||||
|
||||
#cmakedefine SUPPORT_LIBBZ2 1
|
||||
#cmakedefine SUPPORT_LIBEDIT 1
|
||||
#cmakedefine SUPPORT_LIBREADLINE 1
|
||||
#cmakedefine SUPPORT_LIBZ 1
|
||||
|
||||
#cmakedefine SUPPORT_JIT 1
|
||||
#cmakedefine SLJIT_PROT_EXECUTABLE_ALLOCATOR 1
|
||||
#cmakedefine SUPPORT_PCRE2GREP_JIT 1
|
||||
#cmakedefine SUPPORT_PCRE2GREP_CALLOUT 1
|
||||
#cmakedefine SUPPORT_PCRE2GREP_CALLOUT_FORK 1
|
||||
#cmakedefine SUPPORT_UNICODE 1
|
||||
#cmakedefine SUPPORT_VALGRIND 1
|
||||
|
||||
#cmakedefine BSR_ANYCRLF 1
|
||||
#cmakedefine EBCDIC 1
|
||||
#cmakedefine EBCDIC_NL25 1
|
||||
#cmakedefine HEAP_MATCH_RECURSE 1
|
||||
#cmakedefine NEVER_BACKSLASH_C 1
|
||||
|
||||
#define PCRE2_EXPORT @PCRE2_EXPORT@
|
||||
#define LINK_SIZE @PCRE2_LINK_SIZE@
|
||||
#define HEAP_LIMIT @PCRE2_HEAP_LIMIT@
|
||||
#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@
|
||||
#define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@
|
||||
#define MAX_VARLOOKBEHIND @PCRE2_MAX_VARLOOKBEHIND@
|
||||
#define NEWLINE_DEFAULT @NEWLINE_DEFAULT@
|
||||
#define PARENS_NEST_LIMIT @PCRE2_PARENS_NEST_LIMIT@
|
||||
#define PCRE2GREP_BUFSIZE @PCRE2GREP_BUFSIZE@
|
||||
#define PCRE2GREP_MAX_BUFSIZE @PCRE2GREP_MAX_BUFSIZE@
|
||||
|
||||
#define MAX_NAME_SIZE 128
|
||||
#define MAX_NAME_COUNT 10000
|
||||
|
||||
/* end config.h for CMake builds */
|
||||
1228
3rd/pcre2/configure.ac
Normal file
1228
3rd/pcre2/configure.ac
Normal file
@@ -0,0 +1,1228 @@
|
||||
dnl Process this file with autoconf to produce a configure script.
|
||||
|
||||
dnl NOTE FOR MAINTAINERS: Do not use minor version numbers 08 or 09 because
|
||||
dnl the leading zeros may cause them to be treated as invalid octal constants
|
||||
dnl if a PCRE2 user writes code that uses PCRE2_MINOR as a number. There is now
|
||||
dnl a check further down that throws an error if 08 or 09 are used.
|
||||
|
||||
dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
|
||||
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||
|
||||
m4_define(pcre2_major, [10])
|
||||
m4_define(pcre2_minor, [45])
|
||||
m4_define(pcre2_prerelease, [])
|
||||
m4_define(pcre2_date, [2025-02-05])
|
||||
|
||||
# Libtool shared library interface versions (current:revision:age)
|
||||
m4_define(libpcre2_8_version, [14:0:14])
|
||||
m4_define(libpcre2_16_version, [14:0:14])
|
||||
m4_define(libpcre2_32_version, [14:0:14])
|
||||
m4_define(libpcre2_posix_version, [3:6:0])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
|
||||
AC_PREREQ([2.60])
|
||||
AC_INIT([PCRE2],pcre2_major.pcre2_minor[]pcre2_prerelease,[],[pcre2])
|
||||
AC_CONFIG_SRCDIR([src/pcre2.h.in])
|
||||
AM_INIT_AUTOMAKE([dist-bzip2 dist-zip foreign])
|
||||
ifelse(pcre2_prerelease, [-DEV],
|
||||
[dnl For development builds, ./configure is not checked in to Git, so we are
|
||||
dnl happy to have it regenerated as needed.
|
||||
AM_MAINTAINER_MODE([enable])],
|
||||
[dnl For a release build (or RC), the ./configure script we ship in the
|
||||
dnl tarball (and check in to the Git tag) should not be regenerated
|
||||
dnl implicitly. This is important if users want to check out a release tag
|
||||
dnl using Git.
|
||||
AM_MAINTAINER_MODE])
|
||||
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
||||
AC_CONFIG_HEADERS(src/config.h)
|
||||
|
||||
# This was added at the suggestion of libtoolize (03-Jan-10)
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
|
||||
# The default CFLAGS in Autoconf are "-g -O2" for gcc and just "-g" for any
|
||||
# other compiler. There doesn't seem to be a standard way of getting rid of the
|
||||
# -g (which I don't think is needed for a production library). This fudge seems
|
||||
# to achieve the necessary. First, we remember the externally set values of
|
||||
# CFLAGS. Then call the AC_PROG_CC macro to find the compiler - if CFLAGS is
|
||||
# not set, it will be set to Autoconf's defaults. Afterwards, if the original
|
||||
# values were not set, remove the -g from the Autoconf defaults.
|
||||
|
||||
remember_set_CFLAGS="$CFLAGS"
|
||||
|
||||
m4_version_prereq(2.70, [AC_PROG_CC], [AC_PROG_CC_C99])
|
||||
AM_PROG_CC_C_O
|
||||
AC_USE_SYSTEM_EXTENSIONS
|
||||
|
||||
if test "x$remember_set_CFLAGS" = "x"
|
||||
then
|
||||
if test "$CFLAGS" = "-g -O2"
|
||||
then
|
||||
CFLAGS="-O2"
|
||||
elif test "$CFLAGS" = "-g"
|
||||
then
|
||||
CFLAGS=""
|
||||
fi
|
||||
fi
|
||||
|
||||
# This is a new thing required to stop a warning from automake 1.12
|
||||
m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
|
||||
|
||||
# Check for a 64-bit integer type
|
||||
AC_TYPE_INT64_T
|
||||
|
||||
AC_PROG_INSTALL
|
||||
LT_INIT([win32-dll])
|
||||
AC_PROG_LN_S
|
||||
|
||||
AC_SYS_LARGEFILE
|
||||
|
||||
# Check for GCC visibility feature
|
||||
|
||||
PCRE2_VISIBILITY
|
||||
|
||||
# Check for Clang __attribute__((uninitialized)) feature
|
||||
|
||||
AC_MSG_CHECKING([for __attribute__((uninitialized))])
|
||||
AC_LANG_PUSH([C])
|
||||
tmp_CFLAGS=$CFLAGS
|
||||
if test $WORKING_WERROR -eq 1; then
|
||||
CFLAGS="$CFLAGS -Werror"
|
||||
fi
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,
|
||||
[[char buf[128] __attribute__((uninitialized));(void)buf]])],
|
||||
[pcre2_cc_cv_attribute_uninitialized=yes],
|
||||
[pcre2_cc_cv_attribute_uninitialized=no])
|
||||
AC_MSG_RESULT([$pcre2_cc_cv_attribute_uninitialized])
|
||||
if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then
|
||||
AC_DEFINE([HAVE_ATTRIBUTE_UNINITIALIZED], 1, [Define this if your compiler
|
||||
supports __attribute__((uninitialized))])
|
||||
fi
|
||||
CFLAGS=$tmp_CFLAGS
|
||||
AC_LANG_POP([C])
|
||||
|
||||
# Check for the assume() builtin
|
||||
|
||||
AC_MSG_CHECKING([for __assume()])
|
||||
AC_LANG_PUSH([C])
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([[]], [[__assume(1)]])],
|
||||
[pcre2_cc_cv_builtin_assume=yes],
|
||||
[pcre2_cc_cv_builtin_assume=no])
|
||||
AC_MSG_RESULT([$pcre2_cc_cv_builtin_assume])
|
||||
if test "$pcre2_cc_cv_builtin_assume" = yes; then
|
||||
AC_DEFINE([HAVE_BUILTIN_ASSUME], 1,
|
||||
[Define this if your compiler provides __assume()])
|
||||
fi
|
||||
AC_LANG_POP([C])
|
||||
|
||||
# Check for the mul_overflow() builtin
|
||||
|
||||
AC_MSG_CHECKING([for __builtin_mul_overflow()])
|
||||
AC_LANG_PUSH([C])
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([[
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#include <stddef.h>
|
||||
|
||||
int a, b;
|
||||
size_t m;
|
||||
]], [[__builtin_mul_overflow(a, b, &m)]])],
|
||||
[pcre2_cc_cv_builtin_mul_overflow=yes],
|
||||
[pcre2_cc_cv_builtin_mul_overflow=no])
|
||||
AC_MSG_RESULT([$pcre2_cc_cv_builtin_mul_overflow])
|
||||
if test "$pcre2_cc_cv_builtin_mul_overflow" = yes; then
|
||||
AC_DEFINE([HAVE_BUILTIN_MUL_OVERFLOW], 1,
|
||||
[Define this if your compiler provides __builtin_mul_overflow()])
|
||||
fi
|
||||
AC_LANG_POP([C])
|
||||
|
||||
# Check for the unreachable() builtin
|
||||
|
||||
AC_MSG_CHECKING([for __builtin_unreachable()])
|
||||
AC_LANG_PUSH([C])
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([[int r;]], [[if (r) __builtin_unreachable()]])],
|
||||
[pcre2_cc_cv_builtin_unreachable=yes],
|
||||
[pcre2_cc_cv_builtin_unreachable=no])
|
||||
AC_MSG_RESULT([$pcre2_cc_cv_builtin_unreachable])
|
||||
if test "$pcre2_cc_cv_builtin_unreachable" = yes; then
|
||||
AC_DEFINE([HAVE_BUILTIN_UNREACHABLE], 1,
|
||||
[Define this if your compiler provides __builtin_unreachable()])
|
||||
fi
|
||||
AC_LANG_POP([C])
|
||||
|
||||
# Versioning
|
||||
|
||||
PCRE2_MAJOR="pcre2_major"
|
||||
PCRE2_MINOR="pcre2_minor"
|
||||
PCRE2_PRERELEASE="pcre2_prerelease"
|
||||
PCRE2_DATE="pcre2_date"
|
||||
|
||||
if test "$PCRE2_MINOR" = "08" -o "$PCRE2_MINOR" = "09"
|
||||
then
|
||||
echo "***"
|
||||
echo "*** Minor version number $PCRE2_MINOR must not be used. ***"
|
||||
echo "*** Use only 00 to 07 or 10 onwards, to avoid octal issues. ***"
|
||||
echo "***"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
AC_SUBST(PCRE2_MAJOR)
|
||||
AC_SUBST(PCRE2_MINOR)
|
||||
AC_SUBST(PCRE2_PRERELEASE)
|
||||
AC_SUBST(PCRE2_DATE)
|
||||
|
||||
# Set a more sensible default value for $(htmldir).
|
||||
if test "x$htmldir" = 'x${docdir}'
|
||||
then
|
||||
htmldir='${docdir}/html'
|
||||
fi
|
||||
|
||||
# Force an error for PCRE1 size options
|
||||
AC_ARG_ENABLE(pcre8,,,enable_pcre8=no)
|
||||
AC_ARG_ENABLE(pcre16,,,enable_pcre16=no)
|
||||
AC_ARG_ENABLE(pcre32,,,enable_pcre32=no)
|
||||
|
||||
if test "$enable_pcre8$enable_pcre16$enable_pcre32" != "nonono"
|
||||
then
|
||||
echo "** ERROR: Use --[[en|dis]]able-pcre2-[[8|16|32]], not --[[en|dis]]able-pcre[[8|16|32]]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Handle --disable-pcre2-8 (enabled by default)
|
||||
AC_ARG_ENABLE(pcre2-8,
|
||||
AS_HELP_STRING([--disable-pcre2-8],
|
||||
[disable 8 bit character support]),
|
||||
, enable_pcre2_8=unset)
|
||||
AC_SUBST(enable_pcre2_8)
|
||||
|
||||
# Handle --enable-pcre2-16 (disabled by default)
|
||||
AC_ARG_ENABLE(pcre2-16,
|
||||
AS_HELP_STRING([--enable-pcre2-16],
|
||||
[enable 16 bit character support]),
|
||||
, enable_pcre2_16=unset)
|
||||
AC_SUBST(enable_pcre2_16)
|
||||
|
||||
# Handle --enable-pcre2-32 (disabled by default)
|
||||
AC_ARG_ENABLE(pcre2-32,
|
||||
AS_HELP_STRING([--enable-pcre2-32],
|
||||
[enable 32 bit character support]),
|
||||
, enable_pcre2_32=unset)
|
||||
AC_SUBST(enable_pcre2_32)
|
||||
|
||||
# Handle --enable-debug (disabled by default)
|
||||
AC_ARG_ENABLE(debug,
|
||||
AS_HELP_STRING([--enable-debug],
|
||||
[enable debugging code]),
|
||||
, enable_debug=no)
|
||||
|
||||
# Handle --enable-jit (disabled by default)
|
||||
AC_ARG_ENABLE(jit,
|
||||
AS_HELP_STRING([--enable-jit],
|
||||
[enable Just-In-Time compiling support]),
|
||||
, enable_jit=no)
|
||||
|
||||
# This code enables JIT if the hardware supports it.
|
||||
if test "$enable_jit" = "auto"; then
|
||||
AC_LANG(C)
|
||||
SAVE_CPPFLAGS=$CPPFLAGS
|
||||
CPPFLAGS=-I$srcdir
|
||||
AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
|
||||
#define SLJIT_CONFIG_AUTO 1
|
||||
#include "deps/sljit/sljit_src/sljitConfigCPU.h"
|
||||
#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
|
||||
#error unsupported
|
||||
#endif]])], enable_jit=yes, enable_jit=no)
|
||||
CPPFLAGS=$SAVE_CPPFLAGS
|
||||
echo checking for JIT support on this hardware... $enable_jit
|
||||
fi
|
||||
|
||||
# Handle --enable-jit-sealloc (disabled by default and only experimental)
|
||||
case $host_os in
|
||||
linux* | netbsd*)
|
||||
AC_ARG_ENABLE(jit-sealloc,
|
||||
AS_HELP_STRING([--enable-jit-sealloc],
|
||||
[enable SELinux compatible execmem allocator in JIT (experimental)]),
|
||||
,enable_jit_sealloc=no)
|
||||
;;
|
||||
*)
|
||||
enable_jit_sealloc=unsupported
|
||||
;;
|
||||
esac
|
||||
|
||||
# Handle --disable-pcre2grep-jit (enabled by default)
|
||||
AC_ARG_ENABLE(pcre2grep-jit,
|
||||
AS_HELP_STRING([--disable-pcre2grep-jit],
|
||||
[disable JIT support in pcre2grep]),
|
||||
, enable_pcre2grep_jit=yes)
|
||||
|
||||
# Handle --disable-pcre2grep-callout (enabled by default)
|
||||
AC_ARG_ENABLE(pcre2grep-callout,
|
||||
AS_HELP_STRING([--disable-pcre2grep-callout],
|
||||
[disable callout script support in pcre2grep]),
|
||||
, enable_pcre2grep_callout=yes)
|
||||
|
||||
# Handle --disable-pcre2grep-callout-fork (enabled by default)
|
||||
AC_ARG_ENABLE(pcre2grep-callout-fork,
|
||||
AS_HELP_STRING([--disable-pcre2grep-callout-fork],
|
||||
[disable callout script fork support in pcre2grep]),
|
||||
, enable_pcre2grep_callout_fork=yes)
|
||||
|
||||
# Handle --enable-rebuild-chartables
|
||||
AC_ARG_ENABLE(rebuild-chartables,
|
||||
AS_HELP_STRING([--enable-rebuild-chartables],
|
||||
[rebuild character tables in current locale]),
|
||||
, enable_rebuild_chartables=no)
|
||||
|
||||
# Handle --disable-unicode (enabled by default)
|
||||
AC_ARG_ENABLE(unicode,
|
||||
AS_HELP_STRING([--disable-unicode],
|
||||
[disable Unicode support]),
|
||||
, enable_unicode=unset)
|
||||
|
||||
# Handle newline options
|
||||
ac_pcre2_newline=lf
|
||||
AC_ARG_ENABLE(newline-is-cr,
|
||||
AS_HELP_STRING([--enable-newline-is-cr],
|
||||
[use CR as newline character]),
|
||||
ac_pcre2_newline=cr)
|
||||
AC_ARG_ENABLE(newline-is-lf,
|
||||
AS_HELP_STRING([--enable-newline-is-lf],
|
||||
[use LF as newline character (default)]),
|
||||
ac_pcre2_newline=lf)
|
||||
AC_ARG_ENABLE(newline-is-crlf,
|
||||
AS_HELP_STRING([--enable-newline-is-crlf],
|
||||
[use CRLF as newline sequence]),
|
||||
ac_pcre2_newline=crlf)
|
||||
AC_ARG_ENABLE(newline-is-anycrlf,
|
||||
AS_HELP_STRING([--enable-newline-is-anycrlf],
|
||||
[use CR, LF, or CRLF as newline sequence]),
|
||||
ac_pcre2_newline=anycrlf)
|
||||
AC_ARG_ENABLE(newline-is-any,
|
||||
AS_HELP_STRING([--enable-newline-is-any],
|
||||
[use any valid Unicode newline sequence]),
|
||||
ac_pcre2_newline=any)
|
||||
AC_ARG_ENABLE(newline-is-nul,
|
||||
AS_HELP_STRING([--enable-newline-is-nul],
|
||||
[use NUL (binary zero) as newline character]),
|
||||
ac_pcre2_newline=nul)
|
||||
enable_newline="$ac_pcre2_newline"
|
||||
|
||||
# Handle --enable-bsr-anycrlf
|
||||
AC_ARG_ENABLE(bsr-anycrlf,
|
||||
AS_HELP_STRING([--enable-bsr-anycrlf],
|
||||
[\R matches only CR, LF, CRLF by default]),
|
||||
, enable_bsr_anycrlf=no)
|
||||
|
||||
# Handle --enable-never-backslash-C
|
||||
AC_ARG_ENABLE(never-backslash-C,
|
||||
AS_HELP_STRING([--enable-never-backslash-C],
|
||||
[use of \C causes an error]),
|
||||
, enable_never_backslash_C=no)
|
||||
|
||||
# Handle --enable-ebcdic
|
||||
AC_ARG_ENABLE(ebcdic,
|
||||
AS_HELP_STRING([--enable-ebcdic],
|
||||
[assume EBCDIC coding rather than ASCII; incompatible with --enable-unicode; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]),
|
||||
, enable_ebcdic=no)
|
||||
|
||||
# Handle --enable-ebcdic-nl25
|
||||
AC_ARG_ENABLE(ebcdic-nl25,
|
||||
AS_HELP_STRING([--enable-ebcdic-nl25],
|
||||
[set EBCDIC code for NL to 0x25 instead of 0x15; it implies --enable-ebcdic]),
|
||||
, enable_ebcdic_nl25=no)
|
||||
|
||||
# Handle --enable-pcre2grep-libz
|
||||
AC_ARG_ENABLE(pcre2grep-libz,
|
||||
AS_HELP_STRING([--enable-pcre2grep-libz],
|
||||
[link pcre2grep with libz to handle .gz files]),
|
||||
, enable_pcre2grep_libz=no)
|
||||
|
||||
# Handle --enable-pcre2grep-libbz2
|
||||
AC_ARG_ENABLE(pcre2grep-libbz2,
|
||||
AS_HELP_STRING([--enable-pcre2grep-libbz2],
|
||||
[link pcre2grep with libbz2 to handle .bz2 files]),
|
||||
, enable_pcre2grep_libbz2=no)
|
||||
|
||||
# Handle --with-pcre2grep-bufsize=N
|
||||
AC_ARG_WITH(pcre2grep-bufsize,
|
||||
AS_HELP_STRING([--with-pcre2grep-bufsize=N],
|
||||
[pcre2grep initial buffer size (default=20480, minimum=8192)]),
|
||||
, with_pcre2grep_bufsize=20480)
|
||||
|
||||
# Handle --with-pcre2grep-max-bufsize=N
|
||||
AC_ARG_WITH(pcre2grep-max-bufsize,
|
||||
AS_HELP_STRING([--with-pcre2grep-max-bufsize=N],
|
||||
[pcre2grep maximum buffer size (default=1048576, minimum=8192)]),
|
||||
, with_pcre2grep_max_bufsize=1048576)
|
||||
|
||||
# Handle --enable-pcre2test-libedit
|
||||
AC_ARG_ENABLE(pcre2test-libedit,
|
||||
AS_HELP_STRING([--enable-pcre2test-libedit],
|
||||
[link pcre2test with libedit]),
|
||||
, enable_pcre2test_libedit=no)
|
||||
|
||||
# Handle --enable-pcre2test-libreadline
|
||||
AC_ARG_ENABLE(pcre2test-libreadline,
|
||||
AS_HELP_STRING([--enable-pcre2test-libreadline],
|
||||
[link pcre2test with libreadline]),
|
||||
, enable_pcre2test_libreadline=no)
|
||||
|
||||
# Handle --with-link-size=N
|
||||
AC_ARG_WITH(link-size,
|
||||
AS_HELP_STRING([--with-link-size=N],
|
||||
[internal link size (2, 3, or 4 allowed; default=2)]),
|
||||
, with_link_size=2)
|
||||
|
||||
# Handle --with-max-varlookbehind=N
|
||||
AC_ARG_WITH(max-varlookbehind,
|
||||
AS_HELP_STRING([--with-max-varlookbehind=N],
|
||||
[maximum length of variable lookbehind (default=255)]),
|
||||
, with_max_varlookbehind=255)
|
||||
|
||||
# Handle --with-parens-nest-limit=N
|
||||
AC_ARG_WITH(parens-nest-limit,
|
||||
AS_HELP_STRING([--with-parens-nest-limit=N],
|
||||
[nested parentheses limit (default=250)]),
|
||||
, with_parens_nest_limit=250)
|
||||
|
||||
# Handle --with-heap-limit
|
||||
AC_ARG_WITH(heap-limit,
|
||||
AS_HELP_STRING([--with-heap-limit=N],
|
||||
[default limit on heap memory (kibibytes, default=20000000)]),
|
||||
, with_heap_limit=20000000)
|
||||
|
||||
# Handle --with-match-limit=N
|
||||
AC_ARG_WITH(match-limit,
|
||||
AS_HELP_STRING([--with-match-limit=N],
|
||||
[default limit on internal looping (default=10000000)]),
|
||||
, with_match_limit=10000000)
|
||||
|
||||
# Handle --with-match-limit-depth=N
|
||||
# Recognize old synonym --with-match-limit-recursion
|
||||
#
|
||||
# Note: In config.h, the default is to define MATCH_LIMIT_DEPTH symbolically as
|
||||
# MATCH_LIMIT, which in turn is defined to be some numeric value (e.g.
|
||||
# 10000000). MATCH_LIMIT_DEPTH can otherwise be set to some different numeric
|
||||
# value (or even the same numeric value as MATCH_LIMIT, though no longer
|
||||
# defined in terms of the latter).
|
||||
#
|
||||
AC_ARG_WITH(match-limit-depth,
|
||||
AS_HELP_STRING([--with-match-limit-depth=N],
|
||||
[default limit on match tree depth (default=MATCH_LIMIT)]),
|
||||
, with_match_limit_depth=MATCH_LIMIT)
|
||||
|
||||
AC_ARG_WITH(match-limit-recursion,,
|
||||
, with_match_limit_recursion=UNSET)
|
||||
|
||||
# Handle --enable-valgrind
|
||||
AC_ARG_ENABLE(valgrind,
|
||||
AS_HELP_STRING([--enable-valgrind],
|
||||
[enable valgrind support]),
|
||||
, enable_valgrind=no)
|
||||
|
||||
# Enable code coverage reports using gcov
|
||||
AC_ARG_ENABLE(coverage,
|
||||
AS_HELP_STRING([--enable-coverage],
|
||||
[enable code coverage reports using gcov]),
|
||||
, enable_coverage=no)
|
||||
|
||||
# Handle --enable-fuzz-support
|
||||
AC_ARG_ENABLE(fuzz_support,
|
||||
AS_HELP_STRING([--enable-fuzz-support],
|
||||
[enable fuzzer support]),
|
||||
, enable_fuzz_support=no)
|
||||
|
||||
# Handle --enable-diff-fuzz-support
|
||||
AC_ARG_ENABLE(diff_fuzz_support,
|
||||
AS_HELP_STRING([--enable-diff-fuzz-support],
|
||||
[enable differential fuzzer support]),
|
||||
, enable_diff_fuzz_support=no)
|
||||
|
||||
# Handle --disable-stack-for-recursion
|
||||
# This option became obsolete at release 10.30.
|
||||
AC_ARG_ENABLE(stack-for-recursion,,
|
||||
, enable_stack_for_recursion=yes)
|
||||
|
||||
# Original code
|
||||
# AC_ARG_ENABLE(stack-for-recursion,
|
||||
# AS_HELP_STRING([--disable-stack-for-recursion],
|
||||
# [don't use stack recursion when matching]),
|
||||
# , enable_stack_for_recursion=yes)
|
||||
|
||||
# Handle --disable-percent_zt (set as "auto" by default)
|
||||
AC_ARG_ENABLE(percent-zt,
|
||||
AS_HELP_STRING([--disable-percent-zt],
|
||||
[disable the use of z and t formatting modifiers]),
|
||||
, enable_percent_zt=auto)
|
||||
|
||||
# Set the default value for pcre2-8
|
||||
if test "x$enable_pcre2_8" = "xunset"
|
||||
then
|
||||
enable_pcre2_8=yes
|
||||
fi
|
||||
|
||||
# Set the default value for pcre2-16
|
||||
if test "x$enable_pcre2_16" = "xunset"
|
||||
then
|
||||
enable_pcre2_16=no
|
||||
fi
|
||||
|
||||
# Set the default value for pcre2-32
|
||||
if test "x$enable_pcre2_32" = "xunset"
|
||||
then
|
||||
enable_pcre2_32=no
|
||||
fi
|
||||
|
||||
# Make sure at least one library is selected
|
||||
if test "x$enable_pcre2_8$enable_pcre2_16$enable_pcre2_32" = "xnonono"
|
||||
then
|
||||
AC_MSG_ERROR([At least one of the 8, 16 or 32 bit libraries must be enabled])
|
||||
fi
|
||||
|
||||
# Unicode is enabled by default.
|
||||
if test "x$enable_unicode" = "xunset"
|
||||
then
|
||||
enable_unicode=yes
|
||||
fi
|
||||
|
||||
# Convert the newline identifier into the appropriate integer value. These must
|
||||
# agree with the PCRE2_NEWLINE_xxx values in pcre2.h.
|
||||
|
||||
case "$enable_newline" in
|
||||
cr) ac_pcre2_newline_value=1 ;;
|
||||
lf) ac_pcre2_newline_value=2 ;;
|
||||
crlf) ac_pcre2_newline_value=3 ;;
|
||||
any) ac_pcre2_newline_value=4 ;;
|
||||
anycrlf) ac_pcre2_newline_value=5 ;;
|
||||
nul) ac_pcre2_newline_value=6 ;;
|
||||
*)
|
||||
AC_MSG_ERROR([invalid argument "$enable_newline" to --enable-newline option])
|
||||
;;
|
||||
esac
|
||||
|
||||
# --enable-ebcdic-nl25 implies --enable-ebcdic
|
||||
if test "x$enable_ebcdic_nl25" = "xyes"; then
|
||||
enable_ebcdic=yes
|
||||
fi
|
||||
|
||||
# Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled.
|
||||
# Also check that UTF support is not requested, because PCRE2 cannot handle
|
||||
# EBCDIC and UTF in the same build. To do so it would need to use different
|
||||
# character constants depending on the mode. Also, EBCDIC cannot be used with
|
||||
# 16-bit and 32-bit libraries.
|
||||
#
|
||||
if test "x$enable_ebcdic" = "xyes"; then
|
||||
enable_rebuild_chartables=yes
|
||||
if test "x$enable_unicode" = "xyes"; then
|
||||
AC_MSG_ERROR([support for EBCDIC and Unicode cannot be enabled at the same time])
|
||||
fi
|
||||
if test "x$enable_pcre2_16" = "xyes" -o "x$enable_pcre2_32" = "xyes"; then
|
||||
AC_MSG_ERROR([EBCDIC support is available only for the 8-bit library])
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check argument to --with-link-size
|
||||
case "$with_link_size" in
|
||||
2|3|4) ;;
|
||||
*)
|
||||
AC_MSG_ERROR([invalid argument "$with_link_size" to --with-link-size option])
|
||||
;;
|
||||
esac
|
||||
|
||||
AH_TOP([
|
||||
/* PCRE2 is written in Standard C, but there are a few non-standard things it
|
||||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||
systems.
|
||||
|
||||
In environments that support the GNU autotools, config.h.in is converted into
|
||||
config.h by the "configure" script. In environments that use CMake,
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
|
||||
hand" without using "configure" or CMake, you should copy the distributed
|
||||
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||
so that config.h is included at the start of every source.
|
||||
|
||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
||||
but if you do, default values will be taken from config.h for non-boolean
|
||||
macros that are not defined on the command line.
|
||||
|
||||
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be
|
||||
defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All
|
||||
such macros are listed as a commented #undef in config.h.generic. Macros such
|
||||
as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||
|
||||
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||
sure both macros are undefined; an emulation function will then be used. */])
|
||||
|
||||
# Checks for header files.
|
||||
AC_CHECK_HEADERS(assert.h limits.h sys/types.h sys/stat.h dirent.h)
|
||||
AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1])
|
||||
AC_CHECK_HEADERS([sys/wait.h], [HAVE_SYS_WAIT_H=1])
|
||||
|
||||
# Conditional compilation
|
||||
AM_CONDITIONAL(WITH_PCRE2_8, test "x$enable_pcre2_8" = "xyes")
|
||||
AM_CONDITIONAL(WITH_PCRE2_16, test "x$enable_pcre2_16" = "xyes")
|
||||
AM_CONDITIONAL(WITH_PCRE2_32, test "x$enable_pcre2_32" = "xyes")
|
||||
AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
|
||||
AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
|
||||
AM_CONDITIONAL(WITH_UNICODE, test "x$enable_unicode" = "xyes")
|
||||
AM_CONDITIONAL(WITH_VALGRIND, test "x$enable_valgrind" = "xyes")
|
||||
AM_CONDITIONAL(WITH_FUZZ_SUPPORT, test "x$enable_fuzz_support" = "xyes")
|
||||
AM_CONDITIONAL(WITH_DIFF_FUZZ_SUPPORT, test "x$enable_diff_fuzz_support" = "xyes")
|
||||
|
||||
if test "$enable_fuzz_support" = "yes" -a "$enable_pcre2_8" = "no"; then
|
||||
echo "** ERROR: Fuzzer support requires the 8-bit library"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if test "$enable_diff_fuzz_support" = "yes"; then
|
||||
if test "$enable_fuzz_support" = "no"; then
|
||||
echo "** ERROR: Differential fuzzing support requires fuzzing support"
|
||||
exit 1
|
||||
fi
|
||||
if test "$enable_jit" = "no"; then
|
||||
echo "** ERROR: Differential fuzzing support requires Just-in-Time compilation support"
|
||||
exit 1
|
||||
fi
|
||||
AC_DEFINE([SUPPORT_DIFF_FUZZ], [], [
|
||||
Define to any value to enable differential fuzzing support.])
|
||||
fi
|
||||
|
||||
# Checks for typedefs, structures, and compiler characteristics.
|
||||
|
||||
AC_C_CONST
|
||||
AC_TYPE_SIZE_T
|
||||
|
||||
# Checks for library functions.
|
||||
|
||||
AC_CHECK_FUNCS(bcopy memfd_create memmove mkostemp secure_getenv strerror)
|
||||
AC_MSG_CHECKING([for realpath])
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([[
|
||||
#include <stdlib.h>
|
||||
#include <limits.h>
|
||||
]],[[
|
||||
char buffer[PATH_MAX];
|
||||
realpath(".", buffer);
|
||||
]])],
|
||||
[AC_MSG_RESULT([yes])
|
||||
AC_DEFINE([HAVE_REALPATH], 1,
|
||||
[Define to 1 if you have the `realpath' function.])
|
||||
],
|
||||
AC_MSG_RESULT([no]))
|
||||
|
||||
# Check for the availability of libz (aka zlib)
|
||||
|
||||
AC_CHECK_HEADERS([zlib.h], [HAVE_ZLIB_H=1])
|
||||
AC_CHECK_LIB([z], [gzopen], [HAVE_LIBZ=1])
|
||||
|
||||
# Check for the availability of libbz2. Originally we just used AC_CHECK_LIB,
|
||||
# as for libz. However, this had the following problem, diagnosed and fixed by
|
||||
# a user:
|
||||
#
|
||||
# - libbz2 uses the Pascal calling convention (WINAPI) for the functions
|
||||
# under Win32.
|
||||
# - The standard autoconf AC_CHECK_LIB fails to include "bzlib.h",
|
||||
# therefore missing the function definition.
|
||||
# - The compiler thus generates a "C" signature for the test function.
|
||||
# - The linker fails to find the "C" function.
|
||||
# - PCRE2 fails to configure if asked to do so against libbz2.
|
||||
#
|
||||
# Solution:
|
||||
#
|
||||
# - Replace the AC_CHECK_LIB test with a custom test.
|
||||
|
||||
AC_CHECK_HEADERS([bzlib.h], [HAVE_BZLIB_H=1])
|
||||
# Original test
|
||||
# AC_CHECK_LIB([bz2], [BZ2_bzopen], [HAVE_LIBBZ2=1])
|
||||
#
|
||||
# Custom test follows
|
||||
|
||||
AC_MSG_CHECKING([for libbz2])
|
||||
OLD_LIBS="$LIBS"
|
||||
LIBS="$LIBS -lbz2"
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([[
|
||||
#ifdef HAVE_BZLIB_H
|
||||
#include <bzlib.h>
|
||||
#endif]],
|
||||
[[return (int)BZ2_bzopen("conftest", "rb");]])],
|
||||
[AC_MSG_RESULT([yes]);HAVE_LIBBZ2=1; break;],
|
||||
AC_MSG_RESULT([no]))
|
||||
LIBS="$OLD_LIBS"
|
||||
|
||||
# Check for the availabiity of libreadline
|
||||
|
||||
if test "$enable_pcre2test_libreadline" = "yes"; then
|
||||
AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_H=1])
|
||||
AC_CHECK_HEADERS([readline/history.h], [HAVE_HISTORY_H=1])
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lreadline"],
|
||||
[unset ac_cv_lib_readline_readline;
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltinfo"],
|
||||
[unset ac_cv_lib_readline_readline;
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lcurses"],
|
||||
[unset ac_cv_lib_readline_readline;
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncurses"],
|
||||
[unset ac_cv_lib_readline_readline;
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncursesw"],
|
||||
[unset ac_cv_lib_readline_readline;
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltermcap"],
|
||||
[LIBREADLINE=""],
|
||||
[-ltermcap])],
|
||||
[-lncursesw])],
|
||||
[-lncurses])],
|
||||
[-lcurses])],
|
||||
[-ltinfo])])
|
||||
AC_SUBST(LIBREADLINE)
|
||||
if test -n "$LIBREADLINE"; then
|
||||
if test "$LIBREADLINE" != "-lreadline"; then
|
||||
echo "-lreadline needs $LIBREADLINE"
|
||||
LIBREADLINE="-lreadline $LIBREADLINE"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check for the availability of libedit. Different distributions put its
|
||||
# headers in different places. Try to cover the most common ones.
|
||||
|
||||
if test "$enable_pcre2test_libedit" = "yes"; then
|
||||
AC_CHECK_HEADERS([editline/readline.h edit/readline/readline.h readline.h], [
|
||||
HAVE_LIBEDIT_HEADER=1
|
||||
break
|
||||
])
|
||||
AC_CHECK_LIB([edit], [readline], [LIBEDIT="-ledit"])
|
||||
fi
|
||||
|
||||
PCRE2_STATIC_CFLAG=""
|
||||
if test "x$enable_shared" = "xno" ; then
|
||||
AC_DEFINE([PCRE2_STATIC], [1], [
|
||||
Define to any value if linking statically (TODO: make nice with Libtool)])
|
||||
PCRE2_STATIC_CFLAG="-DPCRE2_STATIC"
|
||||
fi
|
||||
AC_SUBST(PCRE2_STATIC_CFLAG)
|
||||
|
||||
PCRE2POSIX_CFLAG=""
|
||||
if test "x$enable_shared" = "xyes" ; then
|
||||
PCRE2POSIX_CFLAG="-DPCRE2POSIX_SHARED"
|
||||
fi
|
||||
AC_SUBST(PCRE2POSIX_CFLAG)
|
||||
|
||||
# Here is where PCRE2-specific defines are handled
|
||||
|
||||
if test "$enable_pcre2_8" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_PCRE2_8], [], [
|
||||
Define to any value to enable the 8 bit PCRE2 library.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2_16" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_PCRE2_16], [], [
|
||||
Define to any value to enable the 16 bit PCRE2 library.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2_32" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_PCRE2_32], [], [
|
||||
Define to any value to enable the 32 bit PCRE2 library.])
|
||||
fi
|
||||
|
||||
if test "$enable_debug" = "yes"; then
|
||||
AC_DEFINE([PCRE2_DEBUG], [], [
|
||||
Define to any value to include debugging code.])
|
||||
fi
|
||||
|
||||
if test "$enable_percent_zt" = "no"; then
|
||||
AC_DEFINE([DISABLE_PERCENT_ZT], [], [
|
||||
Define to any value to disable the use of the z and t modifiers in
|
||||
formatting settings such as %zu or %td (this is rarely needed).])
|
||||
else
|
||||
enable_percent_zt=auto
|
||||
fi
|
||||
|
||||
# Unless running under Windows, JIT support requires pthreads.
|
||||
|
||||
if test "$enable_jit" = "yes"; then
|
||||
if test "$HAVE_WINDOWS_H" != "1"; then
|
||||
AX_PTHREAD([], [AC_MSG_ERROR([JIT support requires pthreads])])
|
||||
CC="$PTHREAD_CC"
|
||||
CFLAGS="$PTHREAD_CFLAGS $CFLAGS"
|
||||
LIBS="$PTHREAD_LIBS $LIBS"
|
||||
fi
|
||||
AC_DEFINE([SUPPORT_JIT], [], [
|
||||
Define to any value to enable support for Just-In-Time compiling.])
|
||||
else
|
||||
enable_pcre2grep_jit="no"
|
||||
fi
|
||||
|
||||
if test "$enable_jit_sealloc" = "yes"; then
|
||||
AC_DEFINE([SLJIT_PROT_EXECUTABLE_ALLOCATOR], [1], [
|
||||
Define to any non-zero number to enable support for SELinux
|
||||
compatible executable memory allocator in JIT. Note that this
|
||||
will have no effect unless SUPPORT_JIT is also defined.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2grep_jit" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_PCRE2GREP_JIT], [], [
|
||||
Define to any value to enable JIT support in pcre2grep. Note that this will
|
||||
have no effect unless SUPPORT_JIT is also defined.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2grep_callout" = "yes"; then
|
||||
if test "$enable_pcre2grep_callout_fork" = "yes"; then
|
||||
if test "$HAVE_WINDOWS_H" != "1"; then
|
||||
if test "$HAVE_SYS_WAIT_H" != "1"; then
|
||||
AC_MSG_ERROR([Callout script support needs sys/wait.h.])
|
||||
fi
|
||||
fi
|
||||
AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT_FORK], [], [
|
||||
Define to any value to enable fork support in pcre2grep callout scripts.
|
||||
This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also
|
||||
defined.])
|
||||
fi
|
||||
AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT], [], [
|
||||
Define to any value to enable callout script support in pcre2grep.])
|
||||
else
|
||||
enable_pcre2grep_callout_fork="no"
|
||||
fi
|
||||
|
||||
if test "$enable_unicode" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_UNICODE], [], [
|
||||
Define to any value to enable support for Unicode and UTF encoding.
|
||||
This will work even in an EBCDIC environment, but it is incompatible
|
||||
with the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC
|
||||
code *or* ASCII/Unicode, but not both at once.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2grep_libz" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBZ], [], [
|
||||
Define to any value to allow pcre2grep to be linked with libz, so that it is
|
||||
able to handle .gz files.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2grep_libbz2" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBBZ2], [], [
|
||||
Define to any value to allow pcre2grep to be linked with libbz2, so that it
|
||||
is able to handle .bz2 files.])
|
||||
fi
|
||||
|
||||
if test $with_pcre2grep_bufsize -lt 8192 ; then
|
||||
AC_MSG_WARN([$with_pcre2grep_bufsize is too small for --with-pcre2grep-bufsize; using 8192])
|
||||
with_pcre2grep_bufsize="8192"
|
||||
else
|
||||
if test $? -gt 1 ; then
|
||||
AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize])
|
||||
fi
|
||||
fi
|
||||
|
||||
if test $with_pcre2grep_max_bufsize -lt $with_pcre2grep_bufsize ; then
|
||||
with_pcre2grep_max_bufsize="$with_pcre2grep_bufsize"
|
||||
else
|
||||
if test $? -gt 1 ; then
|
||||
AC_MSG_ERROR([Bad value for --with-pcre2grep-max-bufsize])
|
||||
fi
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [
|
||||
The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
|
||||
pcre2grep to hold parts of the file it is searching. The buffer will be
|
||||
expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing very
|
||||
long lines. The actual amount of memory used by pcre2grep is three times this
|
||||
number, because it allows for the buffering of "before" and "after" lines.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([PCRE2GREP_MAX_BUFSIZE], [$with_pcre2grep_max_bufsize], [
|
||||
The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer
|
||||
used by pcre2grep to hold parts of the file it is searching. The actual
|
||||
amount of memory used by pcre2grep is three times this number, because it
|
||||
allows for the buffering of "before" and "after" lines.])
|
||||
|
||||
if test "$enable_pcre2test_libedit" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBEDIT], [], [
|
||||
Define to any value to allow pcre2test to be linked with libedit.])
|
||||
LIBREADLINE="$LIBEDIT"
|
||||
elif test "$enable_pcre2test_libreadline" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBREADLINE], [], [
|
||||
Define to any value to allow pcre2test to be linked with libreadline.])
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([NEWLINE_DEFAULT], [$ac_pcre2_newline_value], [
|
||||
The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY),
|
||||
5 (ANYCRLF), and 6 (NUL).])
|
||||
|
||||
if test "$enable_bsr_anycrlf" = "yes"; then
|
||||
AC_DEFINE([BSR_ANYCRLF], [], [
|
||||
By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||
The build-time default can be overridden by the user of PCRE2 at runtime.])
|
||||
fi
|
||||
|
||||
if test "$enable_never_backslash_C" = "yes"; then
|
||||
AC_DEFINE([NEVER_BACKSLASH_C], [], [
|
||||
Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns.])
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [
|
||||
The value of LINK_SIZE determines the number of bytes used to store
|
||||
links as offsets within the compiled regex. The default is 2, which
|
||||
allows for compiled patterns up to 65535 code units long. This covers the
|
||||
vast majority of cases. However, PCRE2 can also be compiled to use 3 or 4
|
||||
bytes instead. This allows for longer patterns in extreme cases.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([MAX_VARLOOKBEHIND], [$with_max_varlookbehind], [
|
||||
The value of MAX_VARLOOKBEHIND specifies the default maximum length, in
|
||||
characters, for a variable-length lookbehind assertion.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
|
||||
The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
stack that is used while compiling a pattern.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
|
||||
The value of MATCH_LIMIT determines the default number of times the
|
||||
pcre2_match() function can record a backtrack position during a single
|
||||
matching attempt. The value is also used to limit a loop counter in
|
||||
pcre2_dfa_match(). There is a runtime interface for setting a different
|
||||
limit. The limit exists in order to catch runaway regular expressions that
|
||||
take forever to determine that they do not match. The default is set very
|
||||
large so that it does not accidentally catch legitimate cases.])
|
||||
|
||||
# --with-match-limit-recursion is an obsolete synonym for --with-match-limit-depth
|
||||
|
||||
if test "$with_match_limit_recursion" != "UNSET"; then
|
||||
cat <<EOF
|
||||
|
||||
WARNING: --with-match-limit-recursion is an obsolete option. Please use
|
||||
--with-match-limit-depth in future. If both are set, --with-match-limit-depth
|
||||
will be used. See also --with-heap-limit.
|
||||
|
||||
EOF
|
||||
if test "$with_match_limit_depth" = "MATCH_LIMIT"; then
|
||||
with_match_limit_depth=$with_match_limit_recursion
|
||||
fi
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([MATCH_LIMIT_DEPTH], [$with_match_limit_depth], [
|
||||
The above limit applies to all backtracks, whether or not they are nested. In
|
||||
some environments it is desirable to limit the nesting of backtracking (that
|
||||
is, the depth of tree that is searched) more strictly, in order to restrict
|
||||
the maximum amount of heap memory that is used. The value of
|
||||
MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it must
|
||||
be less than the value of MATCH_LIMIT. The default is to use the same value
|
||||
as MATCH_LIMIT. There is a runtime method for setting a different limit. In
|
||||
the case of pcre2_dfa_match(), this limit controls the depth of the internal
|
||||
nested function calls that are used for pattern recursions, lookarounds, and
|
||||
atomic groups.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([HEAP_LIMIT], [$with_heap_limit], [
|
||||
This limits the amount of memory that may be used while matching
|
||||
a pattern. It applies to both pcre2_match() and pcre2_dfa_match(). It does
|
||||
not apply to JIT matching. The value is in kibibytes (units of 1024 bytes).])
|
||||
|
||||
AC_DEFINE([MAX_NAME_SIZE], [128], [
|
||||
This limit is parameterized just in case anybody ever wants to
|
||||
change it. Care must be taken if it is increased, because it guards
|
||||
against integer overflow caused by enormously large patterns.])
|
||||
|
||||
AC_DEFINE([MAX_NAME_COUNT], [10000], [
|
||||
This limit is parameterized just in case anybody ever wants to
|
||||
change it. Care must be taken if it is increased, because it guards
|
||||
against integer overflow caused by enormously large patterns.])
|
||||
|
||||
AH_VERBATIM([PCRE2_EXP_DEFN], [
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
Win32, and it needs some magic to be inserted before the definition
|
||||
of a function that is exported by the library, define this macro to
|
||||
contain the relevant magic. If you do not define this macro, a suitable
|
||||
__declspec value is used for Windows systems; in other environments
|
||||
a compiler relevant "extern" is used with any "visibility" related
|
||||
attributes from PCRE2_EXPORT included.
|
||||
This macro apears at the start of every exported function that is part
|
||||
of the external API. It does not appear on functions that are "external"
|
||||
in the C sense, but which are internal to the library. */
|
||||
#undef PCRE2_EXP_DEFN])
|
||||
|
||||
if test "$enable_ebcdic" = "yes"; then
|
||||
AC_DEFINE_UNQUOTED([EBCDIC], [], [
|
||||
If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro to any value. When EBCDIC is set, PCRE2
|
||||
assumes that all input strings are in EBCDIC. If you do not define this
|
||||
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
|
||||
is not possible to build a version of PCRE2 that supports both EBCDIC and
|
||||
UTF-8/16/32.])
|
||||
fi
|
||||
|
||||
if test "$enable_ebcdic_nl25" = "yes"; then
|
||||
AC_DEFINE_UNQUOTED([EBCDIC_NL25], [], [
|
||||
In an EBCDIC environment, define this macro to any value to arrange for
|
||||
the NL character to be 0x25 instead of the default 0x15. NL plays the role
|
||||
that LF does in an ASCII/Unicode environment.])
|
||||
fi
|
||||
|
||||
if test "$enable_valgrind" = "yes"; then
|
||||
AC_DEFINE_UNQUOTED([SUPPORT_VALGRIND], [], [
|
||||
Define to any value for valgrind support to find invalid memory reads.])
|
||||
fi
|
||||
|
||||
# Platform specific issues
|
||||
NO_UNDEFINED=
|
||||
EXPORT_ALL_SYMBOLS=
|
||||
case $host_os in
|
||||
cygwin* | mingw* )
|
||||
if test X"$enable_shared" = Xyes; then
|
||||
NO_UNDEFINED="-no-undefined"
|
||||
EXPORT_ALL_SYMBOLS="-Wl,--export-all-symbols"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
# The extra LDFLAGS for each particular library. The libpcre2*_version values
|
||||
# are m4 variables, assigned above.
|
||||
|
||||
EXTRA_LIBPCRE2_8_LDFLAGS="$EXTRA_LIBPCRE2_8_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info libpcre2_8_version"
|
||||
|
||||
EXTRA_LIBPCRE2_16_LDFLAGS="$EXTRA_LIBPCRE2_16_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info libpcre2_16_version"
|
||||
|
||||
EXTRA_LIBPCRE2_32_LDFLAGS="$EXTRA_LIBPCRE2_32_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info libpcre2_32_version"
|
||||
|
||||
EXTRA_LIBPCRE2_POSIX_LDFLAGS="$EXTRA_LIBPCRE2_POSIX_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info libpcre2_posix_version"
|
||||
|
||||
AC_SUBST(EXTRA_LIBPCRE2_8_LDFLAGS)
|
||||
AC_SUBST(EXTRA_LIBPCRE2_16_LDFLAGS)
|
||||
AC_SUBST(EXTRA_LIBPCRE2_32_LDFLAGS)
|
||||
AC_SUBST(EXTRA_LIBPCRE2_POSIX_LDFLAGS)
|
||||
|
||||
# When we run 'make distcheck', use these arguments. Turning off compiler
|
||||
# optimization makes it run faster.
|
||||
DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre2-16 --enable-pcre2-32 --enable-jit"
|
||||
AC_SUBST(DISTCHECK_CONFIGURE_FLAGS)
|
||||
|
||||
# Check that, if --enable-pcre2grep-libz or --enable-pcre2grep-libbz2 is
|
||||
# specified, the relevant library is available.
|
||||
|
||||
if test "$enable_pcre2grep_libz" = "yes"; then
|
||||
if test "$HAVE_ZLIB_H" != "1"; then
|
||||
echo "** Cannot --enable-pcre2grep-libz because zlib.h was not found"
|
||||
exit 1
|
||||
fi
|
||||
if test "$HAVE_LIBZ" != "1"; then
|
||||
echo "** Cannot --enable-pcre2grep-libz because libz was not found"
|
||||
exit 1
|
||||
fi
|
||||
LIBZ="-lz"
|
||||
fi
|
||||
AC_SUBST(LIBZ)
|
||||
|
||||
if test "$enable_pcre2grep_libbz2" = "yes"; then
|
||||
if test "$HAVE_BZLIB_H" != "1"; then
|
||||
echo "** Cannot --enable-pcre2grep-libbz2 because bzlib.h was not found"
|
||||
exit 1
|
||||
fi
|
||||
if test "$HAVE_LIBBZ2" != "1"; then
|
||||
echo "** Cannot --enable-pcre2grep-libbz2 because libbz2 was not found"
|
||||
exit 1
|
||||
fi
|
||||
LIBBZ2="-lbz2"
|
||||
fi
|
||||
AC_SUBST(LIBBZ2)
|
||||
|
||||
# Similarly for --enable-pcre2test-readline
|
||||
|
||||
if test "$enable_pcre2test_libedit" = "yes"; then
|
||||
if test "$enable_pcre2test_libreadline" = "yes"; then
|
||||
echo "** Cannot use both --enable-pcre2test-libedit and --enable-pcre2test-readline"
|
||||
exit 1
|
||||
fi
|
||||
if test -z "$HAVE_LIBEDIT_HEADER"; then
|
||||
echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h,"
|
||||
echo "** edit/readline/readline.h nor a compatible header was found."
|
||||
exit 1
|
||||
fi
|
||||
if test -z "$LIBEDIT"; then
|
||||
echo "** Cannot --enable-pcre2test-libedit because libedit library was not found."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2test_libreadline" = "yes"; then
|
||||
if test "$HAVE_READLINE_H" != "1"; then
|
||||
echo "** Cannot --enable-pcre2test-readline because readline/readline.h was not found."
|
||||
exit 1
|
||||
fi
|
||||
if test "$HAVE_HISTORY_H" != "1"; then
|
||||
echo "** Cannot --enable-pcre2test-readline because readline/history.h was not found."
|
||||
exit 1
|
||||
fi
|
||||
if test -z "$LIBREADLINE"; then
|
||||
echo "** Cannot --enable-pcre2test-readline because readline library was not found."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Handle valgrind support
|
||||
|
||||
if test "$enable_valgrind" = "yes"; then
|
||||
m4_ifdef([PKG_CHECK_MODULES],
|
||||
[PKG_CHECK_MODULES([VALGRIND],[valgrind])],
|
||||
[AC_MSG_ERROR([pkg-config not supported])])
|
||||
fi
|
||||
|
||||
# Handle code coverage reporting support
|
||||
if test "$enable_coverage" = "yes"; then
|
||||
if test "x$GCC" != "xyes"; then
|
||||
AC_MSG_ERROR([Code coverage reports can only be generated when using GCC])
|
||||
fi
|
||||
|
||||
# ccache is incompatible with gcov
|
||||
AC_PATH_PROG([SHTOOL],[shtool],[false])
|
||||
case `$SHTOOL path $CC` in
|
||||
*ccache*) cc_ccache=yes;;
|
||||
*) cc_ccache=no;;
|
||||
esac
|
||||
|
||||
if test "$cc_ccache" = "yes"; then
|
||||
if test -z "$CCACHE_DISABLE" -o "$CCACHE_DISABLE" != "1"; then
|
||||
AC_MSG_ERROR([must export CCACHE_DISABLE=1 to disable ccache for code coverage])
|
||||
fi
|
||||
fi
|
||||
|
||||
AC_ARG_VAR([LCOV],[the ltp lcov program])
|
||||
AC_PATH_PROG([LCOV],[lcov],[false])
|
||||
if test "x$LCOV" = "xfalse"; then
|
||||
AC_MSG_ERROR([lcov not found])
|
||||
fi
|
||||
|
||||
AC_ARG_VAR([GENHTML],[the ltp genhtml program])
|
||||
AC_PATH_PROG([GENHTML],[genhtml],[false])
|
||||
if test "x$GENHTML" = "xfalse"; then
|
||||
AC_MSG_ERROR([genhtml not found])
|
||||
fi
|
||||
|
||||
# Set flags needed for gcov
|
||||
GCOV_CFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
||||
GCOV_CXXFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
||||
GCOV_LIBS="-lgcov"
|
||||
AC_SUBST([GCOV_CFLAGS])
|
||||
AC_SUBST([GCOV_CXXFLAGS])
|
||||
AC_SUBST([GCOV_LIBS])
|
||||
fi # enable_coverage
|
||||
|
||||
AM_CONDITIONAL([WITH_GCOV],[test "x$enable_coverage" = "xyes"])
|
||||
|
||||
AC_MSG_CHECKING([whether Intel CET is enabled])
|
||||
AC_LANG_PUSH([C])
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,
|
||||
[[#ifndef __CET__
|
||||
# error CET is not enabled
|
||||
#endif]])],
|
||||
[pcre2_cc_cv_intel_cet_enabled=yes],
|
||||
[pcre2_cc_cv_intel_cet_enabled=no])
|
||||
AC_MSG_RESULT([$pcre2_cc_cv_intel_cet_enabled])
|
||||
if test "$pcre2_cc_cv_intel_cet_enabled" = yes; then
|
||||
CET_CFLAGS="-mshstk"
|
||||
AC_SUBST([CET_CFLAGS])
|
||||
fi
|
||||
AC_LANG_POP([C])
|
||||
|
||||
# LIB_POSTFIX is used by CMakeLists.txt for Windows debug builds.
|
||||
# Pass empty LIB_POSTFIX to *.pc files and pcre2-config here.
|
||||
AC_SUBST(LIB_POSTFIX)
|
||||
|
||||
# Produce these files, in addition to config.h.
|
||||
|
||||
AC_CONFIG_FILES(
|
||||
Makefile
|
||||
libpcre2-8.pc
|
||||
libpcre2-16.pc
|
||||
libpcre2-32.pc
|
||||
libpcre2-posix.pc
|
||||
pcre2-config
|
||||
src/pcre2.h
|
||||
)
|
||||
|
||||
# Make the generated script files executable.
|
||||
AC_CONFIG_COMMANDS([script-chmod], [chmod a+x pcre2-config])
|
||||
|
||||
# Make sure that pcre2_chartables.c is removed in case the method for
|
||||
# creating it was changed by reconfiguration.
|
||||
AC_CONFIG_COMMANDS([delete-old-chartables], [rm -f pcre2_chartables.c])
|
||||
|
||||
AC_OUTPUT
|
||||
|
||||
# --disable-stack-for-recursion is obsolete and has no effect.
|
||||
|
||||
if test "$enable_stack_for_recursion" = "no"; then
|
||||
cat <<EOF
|
||||
|
||||
WARNING: --disable-stack-for-recursion is obsolete and has no effect.
|
||||
EOF
|
||||
fi
|
||||
|
||||
# Print out a nice little message after configure is run displaying the
|
||||
# chosen options.
|
||||
|
||||
ebcdic_nl_code=n/a
|
||||
if test "$enable_ebcdic_nl25" = "yes"; then
|
||||
ebcdic_nl_code=0x25
|
||||
elif test "$enable_ebcdic" = "yes"; then
|
||||
ebcdic_nl_code=0x15
|
||||
fi
|
||||
|
||||
cat <<EOF
|
||||
|
||||
$PACKAGE-$VERSION configuration summary:
|
||||
|
||||
Install prefix ..................... : ${prefix}
|
||||
C preprocessor ..................... : ${CPP}
|
||||
C compiler ......................... : ${CC}
|
||||
Linker ............................. : ${LD}
|
||||
C preprocessor flags ............... : ${CPPFLAGS}
|
||||
C compiler flags ................... : ${CFLAGS} ${VISIBILITY_CFLAGS}
|
||||
Linker flags ....................... : ${LDFLAGS}
|
||||
Extra libraries .................... : ${LIBS}
|
||||
|
||||
Build 8-bit pcre2 library .......... : ${enable_pcre2_8}
|
||||
Build 16-bit pcre2 library ......... : ${enable_pcre2_16}
|
||||
Build 32-bit pcre2 library ......... : ${enable_pcre2_32}
|
||||
Include debugging code ............. : ${enable_debug}
|
||||
Enable JIT compiling support ....... : ${enable_jit}
|
||||
Use SELinux allocator in JIT ....... : ${enable_jit_sealloc}
|
||||
Enable Unicode support ............. : ${enable_unicode}
|
||||
Newline char/sequence .............. : ${enable_newline}
|
||||
\R matches only ANYCRLF ............ : ${enable_bsr_anycrlf}
|
||||
\C is disabled ..................... : ${enable_never_backslash_C}
|
||||
EBCDIC coding ...................... : ${enable_ebcdic}
|
||||
EBCDIC code for NL ................. : ${ebcdic_nl_code}
|
||||
Rebuild char tables ................ : ${enable_rebuild_chartables}
|
||||
Internal link size ................. : ${with_link_size}
|
||||
Maximum variable lookbehind ........ : ${with_max_varlookbehind}
|
||||
Nested parentheses limit ........... : ${with_parens_nest_limit}
|
||||
Heap limit ......................... : ${with_heap_limit} kibibytes
|
||||
Match limit ........................ : ${with_match_limit}
|
||||
Match depth limit .................. : ${with_match_limit_depth}
|
||||
Build shared libs .................. : ${enable_shared}
|
||||
Build static libs .................. : ${enable_static}
|
||||
Use JIT in pcre2grep ............... : ${enable_pcre2grep_jit}
|
||||
Enable callouts in pcre2grep ....... : ${enable_pcre2grep_callout}
|
||||
Enable fork in pcre2grep callouts .. : ${enable_pcre2grep_callout_fork}
|
||||
Initial buffer size for pcre2grep .. : ${with_pcre2grep_bufsize}
|
||||
Maximum buffer size for pcre2grep .. : ${with_pcre2grep_max_bufsize}
|
||||
Link pcre2grep with libz ........... : ${enable_pcre2grep_libz}
|
||||
Link pcre2grep with libbz2 ......... : ${enable_pcre2grep_libbz2}
|
||||
Link pcre2test with libedit ........ : ${enable_pcre2test_libedit}
|
||||
Link pcre2test with libreadline .... : ${enable_pcre2test_libreadline}
|
||||
Valgrind support ................... : ${enable_valgrind}
|
||||
Code coverage ...................... : ${enable_coverage}
|
||||
Fuzzer support ..................... : ${enable_fuzz_support}
|
||||
Differential fuzzer support ........ : ${enable_diff_fuzz_support}
|
||||
Use %zu and %td .................... : ${enable_percent_zt}
|
||||
|
||||
EOF
|
||||
|
||||
dnl end configure.ac
|
||||
13
3rd/pcre2/libpcre2-16.pc.in
Normal file
13
3rd/pcre2/libpcre2-16.pc.in
Normal file
@@ -0,0 +1,13 @@
|
||||
# Package Information for pkg-config
|
||||
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
libdir=@libdir@
|
||||
includedir=@includedir@
|
||||
|
||||
Name: libpcre2-16
|
||||
Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 16 bit character support
|
||||
Version: @PACKAGE_VERSION@
|
||||
Libs: -L${libdir} -lpcre2-16@LIB_POSTFIX@
|
||||
Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
|
||||
Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
|
||||
13
3rd/pcre2/libpcre2-32.pc.in
Normal file
13
3rd/pcre2/libpcre2-32.pc.in
Normal file
@@ -0,0 +1,13 @@
|
||||
# Package Information for pkg-config
|
||||
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
libdir=@libdir@
|
||||
includedir=@includedir@
|
||||
|
||||
Name: libpcre2-32
|
||||
Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 32 bit character support
|
||||
Version: @PACKAGE_VERSION@
|
||||
Libs: -L${libdir} -lpcre2-32@LIB_POSTFIX@
|
||||
Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
|
||||
Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
|
||||
13
3rd/pcre2/libpcre2-8.pc.in
Normal file
13
3rd/pcre2/libpcre2-8.pc.in
Normal file
@@ -0,0 +1,13 @@
|
||||
# Package Information for pkg-config
|
||||
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
libdir=@libdir@
|
||||
includedir=@includedir@
|
||||
|
||||
Name: libpcre2-8
|
||||
Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 8 bit character support
|
||||
Version: @PACKAGE_VERSION@
|
||||
Libs: -L${libdir} -lpcre2-8@LIB_POSTFIX@
|
||||
Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
|
||||
Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
|
||||
13
3rd/pcre2/libpcre2-posix.pc.in
Normal file
13
3rd/pcre2/libpcre2-posix.pc.in
Normal file
@@ -0,0 +1,13 @@
|
||||
# Package Information for pkg-config
|
||||
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
libdir=@libdir@
|
||||
includedir=@includedir@
|
||||
|
||||
Name: libpcre2-posix
|
||||
Description: Posix compatible interface to libpcre2-8
|
||||
Version: @PACKAGE_VERSION@
|
||||
Libs: -L${libdir} -lpcre2-posix@LIB_POSTFIX@
|
||||
Cflags: -I${includedir} @PCRE2POSIX_CFLAG@
|
||||
Requires.private: libpcre2-8
|
||||
121
3rd/pcre2/pcre2-config.in
Normal file
121
3rd/pcre2/pcre2-config.in
Normal file
@@ -0,0 +1,121 @@
|
||||
#!/bin/sh
|
||||
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
exec_prefix_set=no
|
||||
|
||||
cflags="[--cflags]"
|
||||
libs=
|
||||
|
||||
if test @enable_pcre2_16@ = yes ; then
|
||||
libs="[--libs16] $libs"
|
||||
fi
|
||||
|
||||
if test @enable_pcre2_32@ = yes ; then
|
||||
libs="[--libs32] $libs"
|
||||
fi
|
||||
|
||||
if test @enable_pcre2_8@ = yes ; then
|
||||
libs="[--libs8] [--libs-posix] $libs"
|
||||
cflags="$cflags [--cflags-posix]"
|
||||
fi
|
||||
|
||||
usage="Usage: pcre2-config [--prefix] [--exec-prefix] [--version] $libs $cflags"
|
||||
|
||||
if test $# -eq 0; then
|
||||
echo "${usage}" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
libR=
|
||||
case `uname -s` in
|
||||
*SunOS*)
|
||||
libR=" -R@libdir@"
|
||||
;;
|
||||
*BSD*)
|
||||
libR=" -Wl,-R@libdir@"
|
||||
;;
|
||||
esac
|
||||
|
||||
libS=
|
||||
if test @libdir@ != /usr/lib ; then
|
||||
libS=-L@libdir@
|
||||
fi
|
||||
|
||||
while test $# -gt 0; do
|
||||
case "$1" in
|
||||
-*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
|
||||
*) optarg= ;;
|
||||
esac
|
||||
|
||||
case $1 in
|
||||
--prefix=*)
|
||||
prefix=$optarg
|
||||
if test $exec_prefix_set = no ; then
|
||||
exec_prefix=$optarg
|
||||
fi
|
||||
;;
|
||||
--prefix)
|
||||
echo $prefix
|
||||
;;
|
||||
--exec-prefix=*)
|
||||
exec_prefix=$optarg
|
||||
exec_prefix_set=yes
|
||||
;;
|
||||
--exec-prefix)
|
||||
echo $exec_prefix
|
||||
;;
|
||||
--version)
|
||||
echo @PACKAGE_VERSION@
|
||||
;;
|
||||
--cflags)
|
||||
if test @includedir@ != /usr/include ; then
|
||||
includes=-I@includedir@
|
||||
fi
|
||||
echo $includes @PCRE2_STATIC_CFLAG@
|
||||
;;
|
||||
--cflags-posix)
|
||||
if test @enable_pcre2_8@ = yes ; then
|
||||
if test @includedir@ != /usr/include ; then
|
||||
includes=-I@includedir@
|
||||
fi
|
||||
echo $includes @PCRE2POSIX_CFLAG@
|
||||
else
|
||||
echo "${usage}" 1>&2
|
||||
fi
|
||||
;;
|
||||
--libs-posix)
|
||||
if test @enable_pcre2_8@ = yes ; then
|
||||
echo $libS$libR -lpcre2-posix@LIB_POSTFIX@ -lpcre2-8@LIB_POSTFIX@
|
||||
else
|
||||
echo "${usage}" 1>&2
|
||||
fi
|
||||
;;
|
||||
--libs8)
|
||||
if test @enable_pcre2_8@ = yes ; then
|
||||
echo $libS$libR -lpcre2-8@LIB_POSTFIX@
|
||||
else
|
||||
echo "${usage}" 1>&2
|
||||
fi
|
||||
;;
|
||||
--libs16)
|
||||
if test @enable_pcre2_16@ = yes ; then
|
||||
echo $libS$libR -lpcre2-16@LIB_POSTFIX@
|
||||
else
|
||||
echo "${usage}" 1>&2
|
||||
fi
|
||||
;;
|
||||
--libs32)
|
||||
if test @enable_pcre2_32@ = yes ; then
|
||||
echo $libS$libR -lpcre2-32@LIB_POSTFIX@
|
||||
else
|
||||
echo "${usage}" 1>&2
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
echo "${usage}" 1>&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
483
3rd/pcre2/src/config.h.generic
Normal file
483
3rd/pcre2/src/config.h.generic
Normal file
@@ -0,0 +1,483 @@
|
||||
/* src/config.h. Generated from config.h.in by configure. */
|
||||
/* src/config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* PCRE2 is written in Standard C, but there are a few non-standard things it
|
||||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||
systems.
|
||||
|
||||
In environments that support the GNU autotools, config.h.in is converted into
|
||||
config.h by the "configure" script. In environments that use CMake,
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
|
||||
hand" without using "configure" or CMake, you should copy the distributed
|
||||
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||
so that config.h is included at the start of every source.
|
||||
|
||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
||||
but if you do, default values will be taken from config.h for non-boolean
|
||||
macros that are not defined on the command line.
|
||||
|
||||
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be
|
||||
defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All
|
||||
such macros are listed as a commented #undef in config.h.generic. Macros such
|
||||
as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||
|
||||
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||
sure both macros are undefined; an emulation function will then be used. */
|
||||
|
||||
/* By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||
The build-time default can be overridden by the user of PCRE2 at runtime.
|
||||
*/
|
||||
/* #undef BSR_ANYCRLF */
|
||||
|
||||
/* Define to any value to disable the use of the z and t modifiers in
|
||||
formatting settings such as %zu or %td (this is rarely needed). */
|
||||
/* #undef DISABLE_PERCENT_ZT */
|
||||
|
||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro to any value. When EBCDIC is set, PCRE2
|
||||
assumes that all input strings are in EBCDIC. If you do not define this
|
||||
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
|
||||
is not possible to build a version of PCRE2 that supports both EBCDIC and
|
||||
UTF-8/16/32. */
|
||||
/* #undef EBCDIC */
|
||||
|
||||
/* In an EBCDIC environment, define this macro to any value to arrange for the
|
||||
NL character to be 0x25 instead of the default 0x15. NL plays the role that
|
||||
LF does in an ASCII/Unicode environment. */
|
||||
/* #undef EBCDIC_NL25 */
|
||||
|
||||
/* Define to 1 if you have the <assert.h> header file. */
|
||||
/* #undef HAVE_ASSERT_H */
|
||||
|
||||
/* Define this if your compiler supports __attribute__((uninitialized)) */
|
||||
/* #undef HAVE_ATTRIBUTE_UNINITIALIZED */
|
||||
|
||||
/* Define to 1 if you have the `bcopy' function. */
|
||||
/* #undef HAVE_BCOPY */
|
||||
|
||||
/* Define this if your compiler provides __assume() */
|
||||
/* #undef HAVE_BUILTIN_ASSUME */
|
||||
|
||||
/* Define this if your compiler provides __builtin_mul_overflow() */
|
||||
/* #undef HAVE_BUILTIN_MUL_OVERFLOW */
|
||||
|
||||
/* Define this if your compiler provides __builtin_unreachable() */
|
||||
/* #undef HAVE_BUILTIN_UNREACHABLE */
|
||||
|
||||
/* Define to 1 if you have the <bzlib.h> header file. */
|
||||
/* #undef HAVE_BZLIB_H */
|
||||
|
||||
/* Define to 1 if you have the <dirent.h> header file. */
|
||||
/* #undef HAVE_DIRENT_H */
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
/* #undef HAVE_DLFCN_H */
|
||||
|
||||
/* Define to 1 if you have the <editline/readline.h> header file. */
|
||||
/* #undef HAVE_EDITLINE_READLINE_H */
|
||||
|
||||
/* Define to 1 if you have the <edit/readline/readline.h> header file. */
|
||||
/* #undef HAVE_EDIT_READLINE_READLINE_H */
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
/* #undef HAVE_INTTYPES_H */
|
||||
|
||||
/* Define to 1 if you have the <limits.h> header file. */
|
||||
/* #undef HAVE_LIMITS_H */
|
||||
|
||||
/* Define to 1 if you have the `memfd_create' function. */
|
||||
/* #undef HAVE_MEMFD_CREATE */
|
||||
|
||||
/* Define to 1 if you have the `memmove' function. */
|
||||
/* #undef HAVE_MEMMOVE */
|
||||
|
||||
/* Define to 1 if you have the <minix/config.h> header file. */
|
||||
/* #undef HAVE_MINIX_CONFIG_H */
|
||||
|
||||
/* Define to 1 if you have the `mkostemp' function. */
|
||||
/* #undef HAVE_MKOSTEMP */
|
||||
|
||||
/* Define if you have POSIX threads libraries and header files. */
|
||||
/* #undef HAVE_PTHREAD */
|
||||
|
||||
/* Have PTHREAD_PRIO_INHERIT. */
|
||||
/* #undef HAVE_PTHREAD_PRIO_INHERIT */
|
||||
|
||||
/* Define to 1 if you have the <readline.h> header file. */
|
||||
/* #undef HAVE_READLINE_H */
|
||||
|
||||
/* Define to 1 if you have the <readline/history.h> header file. */
|
||||
/* #undef HAVE_READLINE_HISTORY_H */
|
||||
|
||||
/* Define to 1 if you have the <readline/readline.h> header file. */
|
||||
/* #undef HAVE_READLINE_READLINE_H */
|
||||
|
||||
/* Define to 1 if you have the `realpath' function. */
|
||||
/* #undef HAVE_REALPATH */
|
||||
|
||||
/* Define to 1 if you have the `secure_getenv' function. */
|
||||
/* #undef HAVE_SECURE_GETENV */
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
/* #undef HAVE_STDINT_H */
|
||||
|
||||
/* Define to 1 if you have the <stdio.h> header file. */
|
||||
/* #undef HAVE_STDIO_H */
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
/* #undef HAVE_STDLIB_H */
|
||||
|
||||
/* Define to 1 if you have the `strerror' function. */
|
||||
/* #undef HAVE_STRERROR */
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
/* #undef HAVE_STRINGS_H */
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
/* #undef HAVE_STRING_H */
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
/* #undef HAVE_SYS_STAT_H */
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
/* #undef HAVE_SYS_TYPES_H */
|
||||
|
||||
/* Define to 1 if you have the <sys/wait.h> header file. */
|
||||
/* #undef HAVE_SYS_WAIT_H */
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
/* #undef HAVE_UNISTD_H */
|
||||
|
||||
/* Define to 1 if the compiler supports GCC compatible visibility
|
||||
declarations. */
|
||||
/* #undef HAVE_VISIBILITY */
|
||||
|
||||
/* Define to 1 if you have the <wchar.h> header file. */
|
||||
/* #undef HAVE_WCHAR_H */
|
||||
|
||||
/* Define to 1 if you have the <windows.h> header file. */
|
||||
/* #undef HAVE_WINDOWS_H */
|
||||
|
||||
/* Define to 1 if you have the <zlib.h> header file. */
|
||||
/* #undef HAVE_ZLIB_H */
|
||||
|
||||
/* This limits the amount of memory that may be used while matching a pattern.
|
||||
It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply
|
||||
to JIT matching. The value is in kibibytes (units of 1024 bytes). */
|
||||
#ifndef HEAP_LIMIT
|
||||
#define HEAP_LIMIT 20000000
|
||||
#endif
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||
as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 65535 code units long. This covers the vast
|
||||
majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes
|
||||
instead. This allows for longer patterns in extreme cases. */
|
||||
#ifndef LINK_SIZE
|
||||
#define LINK_SIZE 2
|
||||
#endif
|
||||
|
||||
/* Define to the sub-directory where libtool stores uninstalled libraries. */
|
||||
/* This is ignored unless you are using libtool. */
|
||||
#ifndef LT_OBJDIR
|
||||
#define LT_OBJDIR ".libs/"
|
||||
#endif
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the
|
||||
pcre2_match() function can record a backtrack position during a single
|
||||
matching attempt. The value is also used to limit a loop counter in
|
||||
pcre2_dfa_match(). There is a runtime interface for setting a different
|
||||
limit. The limit exists in order to catch runaway regular expressions that
|
||||
take forever to determine that they do not match. The default is set very
|
||||
large so that it does not accidentally catch legitimate cases. */
|
||||
#ifndef MATCH_LIMIT
|
||||
#define MATCH_LIMIT 10000000
|
||||
#endif
|
||||
|
||||
/* The above limit applies to all backtracks, whether or not they are nested.
|
||||
In some environments it is desirable to limit the nesting of backtracking
|
||||
(that is, the depth of tree that is searched) more strictly, in order to
|
||||
restrict the maximum amount of heap memory that is used. The value of
|
||||
MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it
|
||||
must be less than the value of MATCH_LIMIT. The default is to use the same
|
||||
value as MATCH_LIMIT. There is a runtime method for setting a different
|
||||
limit. In the case of pcre2_dfa_match(), this limit controls the depth of
|
||||
the internal nested function calls that are used for pattern recursions,
|
||||
lookarounds, and atomic groups. */
|
||||
#ifndef MATCH_LIMIT_DEPTH
|
||||
#define MATCH_LIMIT_DEPTH MATCH_LIMIT
|
||||
#endif
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#ifndef MAX_NAME_COUNT
|
||||
#define MAX_NAME_COUNT 10000
|
||||
#endif
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#ifndef MAX_NAME_SIZE
|
||||
#define MAX_NAME_SIZE 128
|
||||
#endif
|
||||
|
||||
/* The value of MAX_VARLOOKBEHIND specifies the default maximum length, in
|
||||
characters, for a variable-length lookbehind assertion. */
|
||||
#ifndef MAX_VARLOOKBEHIND
|
||||
#define MAX_VARLOOKBEHIND 255
|
||||
#endif
|
||||
|
||||
/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
|
||||
/* #undef NEVER_BACKSLASH_C */
|
||||
|
||||
/* The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), 5
|
||||
(ANYCRLF), and 6 (NUL). */
|
||||
#ifndef NEWLINE_DEFAULT
|
||||
#define NEWLINE_DEFAULT 2
|
||||
#endif
|
||||
|
||||
/* Name of package */
|
||||
#define PACKAGE "pcre2"
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#define PACKAGE_BUGREPORT ""
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#define PACKAGE_NAME "PCRE2"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE2 10.45"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre2"
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "10.45"
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
stack that is used while compiling a pattern. */
|
||||
#ifndef PARENS_NEST_LIMIT
|
||||
#define PARENS_NEST_LIMIT 250
|
||||
#endif
|
||||
|
||||
/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
|
||||
pcre2grep to hold parts of the file it is searching. The buffer will be
|
||||
expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing
|
||||
very long lines. The actual amount of memory used by pcre2grep is three
|
||||
times this number, because it allows for the buffering of "before" and
|
||||
"after" lines. */
|
||||
#ifndef PCRE2GREP_BUFSIZE
|
||||
#define PCRE2GREP_BUFSIZE 20480
|
||||
#endif
|
||||
|
||||
/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer
|
||||
used by pcre2grep to hold parts of the file it is searching. The actual
|
||||
amount of memory used by pcre2grep is three times this number, because it
|
||||
allows for the buffering of "before" and "after" lines. */
|
||||
#ifndef PCRE2GREP_MAX_BUFSIZE
|
||||
#define PCRE2GREP_MAX_BUFSIZE 1048576
|
||||
#endif
|
||||
|
||||
/* Define to any value to include debugging code. */
|
||||
/* #undef PCRE2_DEBUG */
|
||||
|
||||
/* to make a symbol visible */
|
||||
#define PCRE2_EXPORT
|
||||
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
Win32, and it needs some magic to be inserted before the definition
|
||||
of a function that is exported by the library, define this macro to
|
||||
contain the relevant magic. If you do not define this macro, a suitable
|
||||
__declspec value is used for Windows systems; in other environments
|
||||
a compiler relevant "extern" is used with any "visibility" related
|
||||
attributes from PCRE2_EXPORT included.
|
||||
This macro apears at the start of every exported function that is part
|
||||
of the external API. It does not appear on functions that are "external"
|
||||
in the C sense, but which are internal to the library. */
|
||||
/* #undef PCRE2_EXP_DEFN */
|
||||
|
||||
/* Define to any value if linking statically (TODO: make nice with Libtool) */
|
||||
/* #undef PCRE2_STATIC */
|
||||
|
||||
/* Define to necessary symbol if this constant uses a non-standard name on
|
||||
your system. */
|
||||
/* #undef PTHREAD_CREATE_JOINABLE */
|
||||
|
||||
/* Define to any non-zero number to enable support for SELinux compatible
|
||||
executable memory allocator in JIT. Note that this will have no effect
|
||||
unless SUPPORT_JIT is also defined. */
|
||||
/* #undef SLJIT_PROT_EXECUTABLE_ALLOCATOR */
|
||||
|
||||
/* Define to 1 if all of the C90 standard headers exist (not just the ones
|
||||
required in a freestanding environment). This macro is provided for
|
||||
backward compatibility; new code need not use it. */
|
||||
/* #undef STDC_HEADERS */
|
||||
|
||||
/* Define to any value to enable differential fuzzing support. */
|
||||
/* #undef SUPPORT_DIFF_FUZZ */
|
||||
|
||||
/* Define to any value to enable support for Just-In-Time compiling. */
|
||||
/* #undef SUPPORT_JIT */
|
||||
|
||||
/* Define to any value to allow pcre2grep to be linked with libbz2, so that it
|
||||
is able to handle .bz2 files. */
|
||||
/* #undef SUPPORT_LIBBZ2 */
|
||||
|
||||
/* Define to any value to allow pcre2test to be linked with libedit. */
|
||||
/* #undef SUPPORT_LIBEDIT */
|
||||
|
||||
/* Define to any value to allow pcre2test to be linked with libreadline. */
|
||||
/* #undef SUPPORT_LIBREADLINE */
|
||||
|
||||
/* Define to any value to allow pcre2grep to be linked with libz, so that it
|
||||
is able to handle .gz files. */
|
||||
/* #undef SUPPORT_LIBZ */
|
||||
|
||||
/* Define to any value to enable callout script support in pcre2grep. */
|
||||
/* #undef SUPPORT_PCRE2GREP_CALLOUT */
|
||||
|
||||
/* Define to any value to enable fork support in pcre2grep callout scripts.
|
||||
This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined.
|
||||
*/
|
||||
/* #undef SUPPORT_PCRE2GREP_CALLOUT_FORK */
|
||||
|
||||
/* Define to any value to enable JIT support in pcre2grep. Note that this will
|
||||
have no effect unless SUPPORT_JIT is also defined. */
|
||||
/* #undef SUPPORT_PCRE2GREP_JIT */
|
||||
|
||||
/* Define to any value to enable the 16 bit PCRE2 library. */
|
||||
/* #undef SUPPORT_PCRE2_16 */
|
||||
|
||||
/* Define to any value to enable the 32 bit PCRE2 library. */
|
||||
/* #undef SUPPORT_PCRE2_32 */
|
||||
|
||||
/* Define to any value to enable the 8 bit PCRE2 library. */
|
||||
/* #undef SUPPORT_PCRE2_8 */
|
||||
|
||||
/* Define to any value to enable support for Unicode and UTF encoding. This
|
||||
will work even in an EBCDIC environment, but it is incompatible with the
|
||||
EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
|
||||
ASCII/Unicode, but not both at once. */
|
||||
/* #undef SUPPORT_UNICODE */
|
||||
|
||||
/* Define to any value for valgrind support to find invalid memory reads. */
|
||||
/* #undef SUPPORT_VALGRIND */
|
||||
|
||||
/* Enable extensions on AIX 3, Interix. */
|
||||
#ifndef _ALL_SOURCE
|
||||
# define _ALL_SOURCE 1
|
||||
#endif
|
||||
/* Enable general extensions on macOS. */
|
||||
#ifndef _DARWIN_C_SOURCE
|
||||
# define _DARWIN_C_SOURCE 1
|
||||
#endif
|
||||
/* Enable general extensions on Solaris. */
|
||||
#ifndef __EXTENSIONS__
|
||||
# define __EXTENSIONS__ 1
|
||||
#endif
|
||||
/* Enable GNU extensions on systems that have them. */
|
||||
#ifndef _GNU_SOURCE
|
||||
# define _GNU_SOURCE 1
|
||||
#endif
|
||||
/* Enable X/Open compliant socket functions that do not require linking
|
||||
with -lxnet on HP-UX 11.11. */
|
||||
#ifndef _HPUX_ALT_XOPEN_SOCKET_API
|
||||
# define _HPUX_ALT_XOPEN_SOCKET_API 1
|
||||
#endif
|
||||
/* Identify the host operating system as Minix.
|
||||
This macro does not affect the system headers' behavior.
|
||||
A future release of Autoconf may stop defining this macro. */
|
||||
#ifndef _MINIX
|
||||
/* # undef _MINIX */
|
||||
#endif
|
||||
/* Enable general extensions on NetBSD.
|
||||
Enable NetBSD compatibility extensions on Minix. */
|
||||
#ifndef _NETBSD_SOURCE
|
||||
# define _NETBSD_SOURCE 1
|
||||
#endif
|
||||
/* Enable OpenBSD compatibility extensions on NetBSD.
|
||||
Oddly enough, this does nothing on OpenBSD. */
|
||||
#ifndef _OPENBSD_SOURCE
|
||||
# define _OPENBSD_SOURCE 1
|
||||
#endif
|
||||
/* Define to 1 if needed for POSIX-compatible behavior. */
|
||||
#ifndef _POSIX_SOURCE
|
||||
/* # undef _POSIX_SOURCE */
|
||||
#endif
|
||||
/* Define to 2 if needed for POSIX-compatible behavior. */
|
||||
#ifndef _POSIX_1_SOURCE
|
||||
/* # undef _POSIX_1_SOURCE */
|
||||
#endif
|
||||
/* Enable POSIX-compatible threading on Solaris. */
|
||||
#ifndef _POSIX_PTHREAD_SEMANTICS
|
||||
# define _POSIX_PTHREAD_SEMANTICS 1
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TS 18661-5:2014. */
|
||||
#ifndef __STDC_WANT_IEC_60559_ATTRIBS_EXT__
|
||||
# define __STDC_WANT_IEC_60559_ATTRIBS_EXT__ 1
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TS 18661-1:2014. */
|
||||
#ifndef __STDC_WANT_IEC_60559_BFP_EXT__
|
||||
# define __STDC_WANT_IEC_60559_BFP_EXT__ 1
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TS 18661-2:2015. */
|
||||
#ifndef __STDC_WANT_IEC_60559_DFP_EXT__
|
||||
# define __STDC_WANT_IEC_60559_DFP_EXT__ 1
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TS 18661-4:2015. */
|
||||
#ifndef __STDC_WANT_IEC_60559_FUNCS_EXT__
|
||||
# define __STDC_WANT_IEC_60559_FUNCS_EXT__ 1
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TS 18661-3:2015. */
|
||||
#ifndef __STDC_WANT_IEC_60559_TYPES_EXT__
|
||||
# define __STDC_WANT_IEC_60559_TYPES_EXT__ 1
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TR 24731-2:2010. */
|
||||
#ifndef __STDC_WANT_LIB_EXT2__
|
||||
# define __STDC_WANT_LIB_EXT2__ 1
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC 24747:2009. */
|
||||
#ifndef __STDC_WANT_MATH_SPEC_FUNCS__
|
||||
# define __STDC_WANT_MATH_SPEC_FUNCS__ 1
|
||||
#endif
|
||||
/* Enable extensions on HP NonStop. */
|
||||
#ifndef _TANDEM_SOURCE
|
||||
# define _TANDEM_SOURCE 1
|
||||
#endif
|
||||
/* Enable X/Open extensions. Define to 500 only if necessary
|
||||
to make mbstate_t available. */
|
||||
#ifndef _XOPEN_SOURCE
|
||||
/* # undef _XOPEN_SOURCE */
|
||||
#endif
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "10.45"
|
||||
|
||||
/* Number of bits in a file offset, on hosts where this is settable. */
|
||||
/* #undef _FILE_OFFSET_BITS */
|
||||
|
||||
/* Define for large files, on AIX-style hosts. */
|
||||
/* #undef _LARGE_FILES */
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
/* #undef const */
|
||||
|
||||
/* Define to the type of a signed integer type of width exactly 64 bits if
|
||||
such a type exists and the standard includes do not define it. */
|
||||
/* #undef int64_t */
|
||||
|
||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
||||
/* #undef size_t */
|
||||
460
3rd/pcre2/src/config.h.in
Normal file
460
3rd/pcre2/src/config.h.in
Normal file
@@ -0,0 +1,460 @@
|
||||
/* src/config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
|
||||
/* PCRE2 is written in Standard C, but there are a few non-standard things it
|
||||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||
systems.
|
||||
|
||||
In environments that support the GNU autotools, config.h.in is converted into
|
||||
config.h by the "configure" script. In environments that use CMake,
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
|
||||
hand" without using "configure" or CMake, you should copy the distributed
|
||||
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||
so that config.h is included at the start of every source.
|
||||
|
||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
||||
but if you do, default values will be taken from config.h for non-boolean
|
||||
macros that are not defined on the command line.
|
||||
|
||||
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be
|
||||
defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All
|
||||
such macros are listed as a commented #undef in config.h.generic. Macros such
|
||||
as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||
|
||||
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||
sure both macros are undefined; an emulation function will then be used. */
|
||||
|
||||
/* By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||
The build-time default can be overridden by the user of PCRE2 at runtime.
|
||||
*/
|
||||
#undef BSR_ANYCRLF
|
||||
|
||||
/* Define to any value to disable the use of the z and t modifiers in
|
||||
formatting settings such as %zu or %td (this is rarely needed). */
|
||||
#undef DISABLE_PERCENT_ZT
|
||||
|
||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro to any value. When EBCDIC is set, PCRE2
|
||||
assumes that all input strings are in EBCDIC. If you do not define this
|
||||
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
|
||||
is not possible to build a version of PCRE2 that supports both EBCDIC and
|
||||
UTF-8/16/32. */
|
||||
#undef EBCDIC
|
||||
|
||||
/* In an EBCDIC environment, define this macro to any value to arrange for the
|
||||
NL character to be 0x25 instead of the default 0x15. NL plays the role that
|
||||
LF does in an ASCII/Unicode environment. */
|
||||
#undef EBCDIC_NL25
|
||||
|
||||
/* Define to 1 if you have the <assert.h> header file. */
|
||||
#undef HAVE_ASSERT_H
|
||||
|
||||
/* Define this if your compiler supports __attribute__((uninitialized)) */
|
||||
#undef HAVE_ATTRIBUTE_UNINITIALIZED
|
||||
|
||||
/* Define to 1 if you have the `bcopy' function. */
|
||||
#undef HAVE_BCOPY
|
||||
|
||||
/* Define this if your compiler provides __assume() */
|
||||
#undef HAVE_BUILTIN_ASSUME
|
||||
|
||||
/* Define this if your compiler provides __builtin_mul_overflow() */
|
||||
#undef HAVE_BUILTIN_MUL_OVERFLOW
|
||||
|
||||
/* Define this if your compiler provides __builtin_unreachable() */
|
||||
#undef HAVE_BUILTIN_UNREACHABLE
|
||||
|
||||
/* Define to 1 if you have the <bzlib.h> header file. */
|
||||
#undef HAVE_BZLIB_H
|
||||
|
||||
/* Define to 1 if you have the <dirent.h> header file. */
|
||||
#undef HAVE_DIRENT_H
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#undef HAVE_DLFCN_H
|
||||
|
||||
/* Define to 1 if you have the <editline/readline.h> header file. */
|
||||
#undef HAVE_EDITLINE_READLINE_H
|
||||
|
||||
/* Define to 1 if you have the <edit/readline/readline.h> header file. */
|
||||
#undef HAVE_EDIT_READLINE_READLINE_H
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#undef HAVE_INTTYPES_H
|
||||
|
||||
/* Define to 1 if you have the <limits.h> header file. */
|
||||
#undef HAVE_LIMITS_H
|
||||
|
||||
/* Define to 1 if you have the `memfd_create' function. */
|
||||
#undef HAVE_MEMFD_CREATE
|
||||
|
||||
/* Define to 1 if you have the `memmove' function. */
|
||||
#undef HAVE_MEMMOVE
|
||||
|
||||
/* Define to 1 if you have the <minix/config.h> header file. */
|
||||
#undef HAVE_MINIX_CONFIG_H
|
||||
|
||||
/* Define to 1 if you have the `mkostemp' function. */
|
||||
#undef HAVE_MKOSTEMP
|
||||
|
||||
/* Define if you have POSIX threads libraries and header files. */
|
||||
#undef HAVE_PTHREAD
|
||||
|
||||
/* Have PTHREAD_PRIO_INHERIT. */
|
||||
#undef HAVE_PTHREAD_PRIO_INHERIT
|
||||
|
||||
/* Define to 1 if you have the <readline.h> header file. */
|
||||
#undef HAVE_READLINE_H
|
||||
|
||||
/* Define to 1 if you have the <readline/history.h> header file. */
|
||||
#undef HAVE_READLINE_HISTORY_H
|
||||
|
||||
/* Define to 1 if you have the <readline/readline.h> header file. */
|
||||
#undef HAVE_READLINE_READLINE_H
|
||||
|
||||
/* Define to 1 if you have the `realpath' function. */
|
||||
#undef HAVE_REALPATH
|
||||
|
||||
/* Define to 1 if you have the `secure_getenv' function. */
|
||||
#undef HAVE_SECURE_GETENV
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#undef HAVE_STDINT_H
|
||||
|
||||
/* Define to 1 if you have the <stdio.h> header file. */
|
||||
#undef HAVE_STDIO_H
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#undef HAVE_STDLIB_H
|
||||
|
||||
/* Define to 1 if you have the `strerror' function. */
|
||||
#undef HAVE_STRERROR
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#undef HAVE_STRINGS_H
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#undef HAVE_STRING_H
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#undef HAVE_SYS_STAT_H
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#undef HAVE_SYS_TYPES_H
|
||||
|
||||
/* Define to 1 if you have the <sys/wait.h> header file. */
|
||||
#undef HAVE_SYS_WAIT_H
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#undef HAVE_UNISTD_H
|
||||
|
||||
/* Define to 1 if the compiler supports GCC compatible visibility
|
||||
declarations. */
|
||||
#undef HAVE_VISIBILITY
|
||||
|
||||
/* Define to 1 if you have the <wchar.h> header file. */
|
||||
#undef HAVE_WCHAR_H
|
||||
|
||||
/* Define to 1 if you have the <windows.h> header file. */
|
||||
#undef HAVE_WINDOWS_H
|
||||
|
||||
/* Define to 1 if you have the <zlib.h> header file. */
|
||||
#undef HAVE_ZLIB_H
|
||||
|
||||
/* This limits the amount of memory that may be used while matching a pattern.
|
||||
It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply
|
||||
to JIT matching. The value is in kibibytes (units of 1024 bytes). */
|
||||
#undef HEAP_LIMIT
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||
as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 65535 code units long. This covers the vast
|
||||
majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes
|
||||
instead. This allows for longer patterns in extreme cases. */
|
||||
#undef LINK_SIZE
|
||||
|
||||
/* Define to the sub-directory where libtool stores uninstalled libraries. */
|
||||
#undef LT_OBJDIR
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the
|
||||
pcre2_match() function can record a backtrack position during a single
|
||||
matching attempt. The value is also used to limit a loop counter in
|
||||
pcre2_dfa_match(). There is a runtime interface for setting a different
|
||||
limit. The limit exists in order to catch runaway regular expressions that
|
||||
take forever to determine that they do not match. The default is set very
|
||||
large so that it does not accidentally catch legitimate cases. */
|
||||
#undef MATCH_LIMIT
|
||||
|
||||
/* The above limit applies to all backtracks, whether or not they are nested.
|
||||
In some environments it is desirable to limit the nesting of backtracking
|
||||
(that is, the depth of tree that is searched) more strictly, in order to
|
||||
restrict the maximum amount of heap memory that is used. The value of
|
||||
MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it
|
||||
must be less than the value of MATCH_LIMIT. The default is to use the same
|
||||
value as MATCH_LIMIT. There is a runtime method for setting a different
|
||||
limit. In the case of pcre2_dfa_match(), this limit controls the depth of
|
||||
the internal nested function calls that are used for pattern recursions,
|
||||
lookarounds, and atomic groups. */
|
||||
#undef MATCH_LIMIT_DEPTH
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#undef MAX_NAME_COUNT
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#undef MAX_NAME_SIZE
|
||||
|
||||
/* The value of MAX_VARLOOKBEHIND specifies the default maximum length, in
|
||||
characters, for a variable-length lookbehind assertion. */
|
||||
#undef MAX_VARLOOKBEHIND
|
||||
|
||||
/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
|
||||
#undef NEVER_BACKSLASH_C
|
||||
|
||||
/* The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), 5
|
||||
(ANYCRLF), and 6 (NUL). */
|
||||
#undef NEWLINE_DEFAULT
|
||||
|
||||
/* Name of package */
|
||||
#undef PACKAGE
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#undef PACKAGE_BUGREPORT
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#undef PACKAGE_NAME
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#undef PACKAGE_STRING
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#undef PACKAGE_TARNAME
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#undef PACKAGE_URL
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#undef PACKAGE_VERSION
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
stack that is used while compiling a pattern. */
|
||||
#undef PARENS_NEST_LIMIT
|
||||
|
||||
/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
|
||||
pcre2grep to hold parts of the file it is searching. The buffer will be
|
||||
expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing
|
||||
very long lines. The actual amount of memory used by pcre2grep is three
|
||||
times this number, because it allows for the buffering of "before" and
|
||||
"after" lines. */
|
||||
#undef PCRE2GREP_BUFSIZE
|
||||
|
||||
/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer
|
||||
used by pcre2grep to hold parts of the file it is searching. The actual
|
||||
amount of memory used by pcre2grep is three times this number, because it
|
||||
allows for the buffering of "before" and "after" lines. */
|
||||
#undef PCRE2GREP_MAX_BUFSIZE
|
||||
|
||||
/* Define to any value to include debugging code. */
|
||||
#undef PCRE2_DEBUG
|
||||
|
||||
/* to make a symbol visible */
|
||||
#undef PCRE2_EXPORT
|
||||
|
||||
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
Win32, and it needs some magic to be inserted before the definition
|
||||
of a function that is exported by the library, define this macro to
|
||||
contain the relevant magic. If you do not define this macro, a suitable
|
||||
__declspec value is used for Windows systems; in other environments
|
||||
a compiler relevant "extern" is used with any "visibility" related
|
||||
attributes from PCRE2_EXPORT included.
|
||||
This macro apears at the start of every exported function that is part
|
||||
of the external API. It does not appear on functions that are "external"
|
||||
in the C sense, but which are internal to the library. */
|
||||
#undef PCRE2_EXP_DEFN
|
||||
|
||||
/* Define to any value if linking statically (TODO: make nice with Libtool) */
|
||||
#undef PCRE2_STATIC
|
||||
|
||||
/* Define to necessary symbol if this constant uses a non-standard name on
|
||||
your system. */
|
||||
#undef PTHREAD_CREATE_JOINABLE
|
||||
|
||||
/* Define to any non-zero number to enable support for SELinux compatible
|
||||
executable memory allocator in JIT. Note that this will have no effect
|
||||
unless SUPPORT_JIT is also defined. */
|
||||
#undef SLJIT_PROT_EXECUTABLE_ALLOCATOR
|
||||
|
||||
/* Define to 1 if all of the C90 standard headers exist (not just the ones
|
||||
required in a freestanding environment). This macro is provided for
|
||||
backward compatibility; new code need not use it. */
|
||||
#undef STDC_HEADERS
|
||||
|
||||
/* Define to any value to enable differential fuzzing support. */
|
||||
#undef SUPPORT_DIFF_FUZZ
|
||||
|
||||
/* Define to any value to enable support for Just-In-Time compiling. */
|
||||
#undef SUPPORT_JIT
|
||||
|
||||
/* Define to any value to allow pcre2grep to be linked with libbz2, so that it
|
||||
is able to handle .bz2 files. */
|
||||
#undef SUPPORT_LIBBZ2
|
||||
|
||||
/* Define to any value to allow pcre2test to be linked with libedit. */
|
||||
#undef SUPPORT_LIBEDIT
|
||||
|
||||
/* Define to any value to allow pcre2test to be linked with libreadline. */
|
||||
#undef SUPPORT_LIBREADLINE
|
||||
|
||||
/* Define to any value to allow pcre2grep to be linked with libz, so that it
|
||||
is able to handle .gz files. */
|
||||
#undef SUPPORT_LIBZ
|
||||
|
||||
/* Define to any value to enable callout script support in pcre2grep. */
|
||||
#undef SUPPORT_PCRE2GREP_CALLOUT
|
||||
|
||||
/* Define to any value to enable fork support in pcre2grep callout scripts.
|
||||
This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined.
|
||||
*/
|
||||
#undef SUPPORT_PCRE2GREP_CALLOUT_FORK
|
||||
|
||||
/* Define to any value to enable JIT support in pcre2grep. Note that this will
|
||||
have no effect unless SUPPORT_JIT is also defined. */
|
||||
#undef SUPPORT_PCRE2GREP_JIT
|
||||
|
||||
/* Define to any value to enable the 16 bit PCRE2 library. */
|
||||
#undef SUPPORT_PCRE2_16
|
||||
|
||||
/* Define to any value to enable the 32 bit PCRE2 library. */
|
||||
#undef SUPPORT_PCRE2_32
|
||||
|
||||
/* Define to any value to enable the 8 bit PCRE2 library. */
|
||||
#undef SUPPORT_PCRE2_8
|
||||
|
||||
/* Define to any value to enable support for Unicode and UTF encoding. This
|
||||
will work even in an EBCDIC environment, but it is incompatible with the
|
||||
EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
|
||||
ASCII/Unicode, but not both at once. */
|
||||
#undef SUPPORT_UNICODE
|
||||
|
||||
/* Define to any value for valgrind support to find invalid memory reads. */
|
||||
#undef SUPPORT_VALGRIND
|
||||
|
||||
/* Enable extensions on AIX 3, Interix. */
|
||||
#ifndef _ALL_SOURCE
|
||||
# undef _ALL_SOURCE
|
||||
#endif
|
||||
/* Enable general extensions on macOS. */
|
||||
#ifndef _DARWIN_C_SOURCE
|
||||
# undef _DARWIN_C_SOURCE
|
||||
#endif
|
||||
/* Enable general extensions on Solaris. */
|
||||
#ifndef __EXTENSIONS__
|
||||
# undef __EXTENSIONS__
|
||||
#endif
|
||||
/* Enable GNU extensions on systems that have them. */
|
||||
#ifndef _GNU_SOURCE
|
||||
# undef _GNU_SOURCE
|
||||
#endif
|
||||
/* Enable X/Open compliant socket functions that do not require linking
|
||||
with -lxnet on HP-UX 11.11. */
|
||||
#ifndef _HPUX_ALT_XOPEN_SOCKET_API
|
||||
# undef _HPUX_ALT_XOPEN_SOCKET_API
|
||||
#endif
|
||||
/* Identify the host operating system as Minix.
|
||||
This macro does not affect the system headers' behavior.
|
||||
A future release of Autoconf may stop defining this macro. */
|
||||
#ifndef _MINIX
|
||||
# undef _MINIX
|
||||
#endif
|
||||
/* Enable general extensions on NetBSD.
|
||||
Enable NetBSD compatibility extensions on Minix. */
|
||||
#ifndef _NETBSD_SOURCE
|
||||
# undef _NETBSD_SOURCE
|
||||
#endif
|
||||
/* Enable OpenBSD compatibility extensions on NetBSD.
|
||||
Oddly enough, this does nothing on OpenBSD. */
|
||||
#ifndef _OPENBSD_SOURCE
|
||||
# undef _OPENBSD_SOURCE
|
||||
#endif
|
||||
/* Define to 1 if needed for POSIX-compatible behavior. */
|
||||
#ifndef _POSIX_SOURCE
|
||||
# undef _POSIX_SOURCE
|
||||
#endif
|
||||
/* Define to 2 if needed for POSIX-compatible behavior. */
|
||||
#ifndef _POSIX_1_SOURCE
|
||||
# undef _POSIX_1_SOURCE
|
||||
#endif
|
||||
/* Enable POSIX-compatible threading on Solaris. */
|
||||
#ifndef _POSIX_PTHREAD_SEMANTICS
|
||||
# undef _POSIX_PTHREAD_SEMANTICS
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TS 18661-5:2014. */
|
||||
#ifndef __STDC_WANT_IEC_60559_ATTRIBS_EXT__
|
||||
# undef __STDC_WANT_IEC_60559_ATTRIBS_EXT__
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TS 18661-1:2014. */
|
||||
#ifndef __STDC_WANT_IEC_60559_BFP_EXT__
|
||||
# undef __STDC_WANT_IEC_60559_BFP_EXT__
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TS 18661-2:2015. */
|
||||
#ifndef __STDC_WANT_IEC_60559_DFP_EXT__
|
||||
# undef __STDC_WANT_IEC_60559_DFP_EXT__
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TS 18661-4:2015. */
|
||||
#ifndef __STDC_WANT_IEC_60559_FUNCS_EXT__
|
||||
# undef __STDC_WANT_IEC_60559_FUNCS_EXT__
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TS 18661-3:2015. */
|
||||
#ifndef __STDC_WANT_IEC_60559_TYPES_EXT__
|
||||
# undef __STDC_WANT_IEC_60559_TYPES_EXT__
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC TR 24731-2:2010. */
|
||||
#ifndef __STDC_WANT_LIB_EXT2__
|
||||
# undef __STDC_WANT_LIB_EXT2__
|
||||
#endif
|
||||
/* Enable extensions specified by ISO/IEC 24747:2009. */
|
||||
#ifndef __STDC_WANT_MATH_SPEC_FUNCS__
|
||||
# undef __STDC_WANT_MATH_SPEC_FUNCS__
|
||||
#endif
|
||||
/* Enable extensions on HP NonStop. */
|
||||
#ifndef _TANDEM_SOURCE
|
||||
# undef _TANDEM_SOURCE
|
||||
#endif
|
||||
/* Enable X/Open extensions. Define to 500 only if necessary
|
||||
to make mbstate_t available. */
|
||||
#ifndef _XOPEN_SOURCE
|
||||
# undef _XOPEN_SOURCE
|
||||
#endif
|
||||
|
||||
|
||||
/* Version number of package */
|
||||
#undef VERSION
|
||||
|
||||
/* Number of bits in a file offset, on hosts where this is settable. */
|
||||
#undef _FILE_OFFSET_BITS
|
||||
|
||||
/* Define for large files, on AIX-style hosts. */
|
||||
#undef _LARGE_FILES
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
#undef const
|
||||
|
||||
/* Define to the type of a signed integer type of width exactly 64 bits if
|
||||
such a type exists and the standard includes do not define it. */
|
||||
#undef int64_t
|
||||
|
||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
||||
#undef size_t
|
||||
1069
3rd/pcre2/src/pcre2.h.generic
Normal file
1069
3rd/pcre2/src/pcre2.h.generic
Normal file
@@ -0,0 +1,1069 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This is the public header file for the PCRE library, second API, to be
|
||||
#included by applications that call PCRE2 functions.
|
||||
|
||||
Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef PCRE2_H_IDEMPOTENT_GUARD
|
||||
#define PCRE2_H_IDEMPOTENT_GUARD
|
||||
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 45
|
||||
#define PCRE2_PRERELEASE
|
||||
#define PCRE2_DATE 2025-02-05
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
export setting is defined in pcre2_internal.h, which includes this file. So we
|
||||
don't change existing definitions of PCRE2_EXP_DECL. */
|
||||
|
||||
#if defined(_WIN32) && !defined(PCRE2_STATIC)
|
||||
# ifndef PCRE2_EXP_DECL
|
||||
# define PCRE2_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCRE2_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCRE2_EXP_DECL extern "C"
|
||||
# else
|
||||
# define PCRE2_EXP_DECL extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* When compiling with the MSVC compiler, it is sometimes necessary to include
|
||||
a "calling convention" before exported function names. (This is secondhand
|
||||
information; I know nothing about MSVC myself). For example, something like
|
||||
|
||||
void __cdecl function(....)
|
||||
|
||||
might be needed. In order so make this easy, all the exported functions have
|
||||
PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
|
||||
set, we ensure here that it has no effect. */
|
||||
|
||||
#ifndef PCRE2_CALL_CONVENTION
|
||||
#define PCRE2_CALL_CONVENTION
|
||||
#endif
|
||||
|
||||
/* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and
|
||||
uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do
|
||||
not have stdint.h, which is why we use inttypes.h, which according to the C
|
||||
standard is a superset of stdint.h. If inttypes.h is not available the build
|
||||
will break and the relevant values must be provided by some other means. */
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
/* Allow for C++ users compiling this directly. */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* The following option bits can be passed to pcre2_compile(), pcre2_match(),
|
||||
or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it
|
||||
is passed. Put these bits at the most significant end of the options word so
|
||||
others can be added next to them */
|
||||
|
||||
#define PCRE2_ANCHORED 0x80000000u
|
||||
#define PCRE2_NO_UTF_CHECK 0x40000000u
|
||||
#define PCRE2_ENDANCHORED 0x20000000u
|
||||
|
||||
/* The following option bits can be passed only to pcre2_compile(). However,
|
||||
they may affect compilation, JIT compilation, and/or interpretive execution.
|
||||
The following tags indicate which:
|
||||
|
||||
C alters what is compiled by pcre2_compile()
|
||||
J alters what is compiled by pcre2_jit_compile()
|
||||
M is inspected during pcre2_match() execution
|
||||
D is inspected during pcre2_dfa_match() execution
|
||||
*/
|
||||
|
||||
#define PCRE2_ALLOW_EMPTY_CLASS 0x00000001u /* C */
|
||||
#define PCRE2_ALT_BSUX 0x00000002u /* C */
|
||||
#define PCRE2_AUTO_CALLOUT 0x00000004u /* C */
|
||||
#define PCRE2_CASELESS 0x00000008u /* C */
|
||||
#define PCRE2_DOLLAR_ENDONLY 0x00000010u /* J M D */
|
||||
#define PCRE2_DOTALL 0x00000020u /* C */
|
||||
#define PCRE2_DUPNAMES 0x00000040u /* C */
|
||||
#define PCRE2_EXTENDED 0x00000080u /* C */
|
||||
#define PCRE2_FIRSTLINE 0x00000100u /* J M D */
|
||||
#define PCRE2_MATCH_UNSET_BACKREF 0x00000200u /* C J M */
|
||||
#define PCRE2_MULTILINE 0x00000400u /* C */
|
||||
#define PCRE2_NEVER_UCP 0x00000800u /* C */
|
||||
#define PCRE2_NEVER_UTF 0x00001000u /* C */
|
||||
#define PCRE2_NO_AUTO_CAPTURE 0x00002000u /* C */
|
||||
#define PCRE2_NO_AUTO_POSSESS 0x00004000u /* C */
|
||||
#define PCRE2_NO_DOTSTAR_ANCHOR 0x00008000u /* C */
|
||||
#define PCRE2_NO_START_OPTIMIZE 0x00010000u /* J M D */
|
||||
#define PCRE2_UCP 0x00020000u /* C J M D */
|
||||
#define PCRE2_UNGREEDY 0x00040000u /* C */
|
||||
#define PCRE2_UTF 0x00080000u /* C J M D */
|
||||
#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */
|
||||
#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */
|
||||
#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */
|
||||
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
|
||||
#define PCRE2_EXTENDED_MORE 0x01000000u /* C */
|
||||
#define PCRE2_LITERAL 0x02000000u /* C */
|
||||
#define PCRE2_MATCH_INVALID_UTF 0x04000000u /* J M D */
|
||||
#define PCRE2_ALT_EXTENDED_CLASS 0x08000000u /* C */
|
||||
|
||||
/* An additional compile options word is available in the compile context. */
|
||||
|
||||
#define PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES 0x00000001u /* C */
|
||||
#define PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL 0x00000002u /* C */
|
||||
#define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */
|
||||
#define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */
|
||||
#define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */
|
||||
#define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */
|
||||
#define PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK 0x00000040u /* C */
|
||||
#define PCRE2_EXTRA_CASELESS_RESTRICT 0x00000080u /* C */
|
||||
#define PCRE2_EXTRA_ASCII_BSD 0x00000100u /* C */
|
||||
#define PCRE2_EXTRA_ASCII_BSS 0x00000200u /* C */
|
||||
#define PCRE2_EXTRA_ASCII_BSW 0x00000400u /* C */
|
||||
#define PCRE2_EXTRA_ASCII_POSIX 0x00000800u /* C */
|
||||
#define PCRE2_EXTRA_ASCII_DIGIT 0x00001000u /* C */
|
||||
#define PCRE2_EXTRA_PYTHON_OCTAL 0x00002000u /* C */
|
||||
#define PCRE2_EXTRA_NO_BS0 0x00004000u /* C */
|
||||
#define PCRE2_EXTRA_NEVER_CALLOUT 0x00008000u /* C */
|
||||
#define PCRE2_EXTRA_TURKISH_CASING 0x00010000u /* C */
|
||||
|
||||
/* These are for pcre2_jit_compile(). */
|
||||
|
||||
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
|
||||
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
|
||||
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
|
||||
#define PCRE2_JIT_INVALID_UTF 0x00000100u
|
||||
#define PCRE2_JIT_TEST_ALLOC 0x00000200u
|
||||
|
||||
/* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and
|
||||
pcre2_substitute(). Some are allowed only for one of the functions, and in
|
||||
these cases it is noted below. Note that PCRE2_ANCHORED, PCRE2_ENDANCHORED and
|
||||
PCRE2_NO_UTF_CHECK can also be passed to these functions (though
|
||||
pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */
|
||||
|
||||
#define PCRE2_NOTBOL 0x00000001u
|
||||
#define PCRE2_NOTEOL 0x00000002u
|
||||
#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
|
||||
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
|
||||
#define PCRE2_PARTIAL_SOFT 0x00000010u
|
||||
#define PCRE2_PARTIAL_HARD 0x00000020u
|
||||
#define PCRE2_DFA_RESTART 0x00000040u /* pcre2_dfa_match() only */
|
||||
#define PCRE2_DFA_SHORTEST 0x00000080u /* pcre2_dfa_match() only */
|
||||
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */
|
||||
#define PCRE2_NO_JIT 0x00002000u /* not for pcre2_dfa_match() */
|
||||
#define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u
|
||||
#define PCRE2_SUBSTITUTE_LITERAL 0x00008000u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_MATCHED 0x00010000u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_REPLACEMENT_ONLY 0x00020000u /* pcre2_substitute() only */
|
||||
#define PCRE2_DISABLE_RECURSELOOP_CHECK 0x00040000u /* not for pcre2_dfa_match() or pcre2_jit_match() */
|
||||
|
||||
/* Options for pcre2_pattern_convert(). */
|
||||
|
||||
#define PCRE2_CONVERT_UTF 0x00000001u
|
||||
#define PCRE2_CONVERT_NO_UTF_CHECK 0x00000002u
|
||||
#define PCRE2_CONVERT_POSIX_BASIC 0x00000004u
|
||||
#define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u
|
||||
#define PCRE2_CONVERT_GLOB 0x00000010u
|
||||
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000030u
|
||||
#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000050u
|
||||
|
||||
/* Newline and \R settings, for use in compile contexts. The newline values
|
||||
must be kept in step with values set in config.h and both sets must all be
|
||||
greater than zero. */
|
||||
|
||||
#define PCRE2_NEWLINE_CR 1
|
||||
#define PCRE2_NEWLINE_LF 2
|
||||
#define PCRE2_NEWLINE_CRLF 3
|
||||
#define PCRE2_NEWLINE_ANY 4
|
||||
#define PCRE2_NEWLINE_ANYCRLF 5
|
||||
#define PCRE2_NEWLINE_NUL 6
|
||||
|
||||
#define PCRE2_BSR_UNICODE 1
|
||||
#define PCRE2_BSR_ANYCRLF 2
|
||||
|
||||
/* Error codes for pcre2_compile(). Some of these are also used by
|
||||
pcre2_pattern_convert(). */
|
||||
|
||||
#define PCRE2_ERROR_END_BACKSLASH 101
|
||||
#define PCRE2_ERROR_END_BACKSLASH_C 102
|
||||
#define PCRE2_ERROR_UNKNOWN_ESCAPE 103
|
||||
#define PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER 104
|
||||
#define PCRE2_ERROR_QUANTIFIER_TOO_BIG 105
|
||||
#define PCRE2_ERROR_MISSING_SQUARE_BRACKET 106
|
||||
#define PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS 107
|
||||
#define PCRE2_ERROR_CLASS_RANGE_ORDER 108
|
||||
#define PCRE2_ERROR_QUANTIFIER_INVALID 109
|
||||
#define PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT 110
|
||||
#define PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY 111
|
||||
#define PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS 112
|
||||
#define PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING 113
|
||||
#define PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS 114
|
||||
#define PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE 115
|
||||
#define PCRE2_ERROR_NULL_PATTERN 116
|
||||
#define PCRE2_ERROR_BAD_OPTIONS 117
|
||||
#define PCRE2_ERROR_MISSING_COMMENT_CLOSING 118
|
||||
#define PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP 119
|
||||
#define PCRE2_ERROR_PATTERN_TOO_LARGE 120
|
||||
#define PCRE2_ERROR_HEAP_FAILED 121
|
||||
#define PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS 122
|
||||
#define PCRE2_ERROR_INTERNAL_CODE_OVERFLOW 123
|
||||
#define PCRE2_ERROR_MISSING_CONDITION_CLOSING 124
|
||||
#define PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH 125
|
||||
#define PCRE2_ERROR_ZERO_RELATIVE_REFERENCE 126
|
||||
#define PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES 127
|
||||
#define PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED 128
|
||||
#define PCRE2_ERROR_BAD_RELATIVE_REFERENCE 129
|
||||
#define PCRE2_ERROR_UNKNOWN_POSIX_CLASS 130
|
||||
#define PCRE2_ERROR_INTERNAL_STUDY_ERROR 131
|
||||
#define PCRE2_ERROR_UNICODE_NOT_SUPPORTED 132
|
||||
#define PCRE2_ERROR_PARENTHESES_STACK_CHECK 133
|
||||
#define PCRE2_ERROR_CODE_POINT_TOO_BIG 134
|
||||
#define PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED 135
|
||||
#define PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C 136
|
||||
#define PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE 137
|
||||
#define PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG 138
|
||||
#define PCRE2_ERROR_MISSING_CALLOUT_CLOSING 139
|
||||
#define PCRE2_ERROR_ESCAPE_INVALID_IN_VERB 140
|
||||
#define PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P 141
|
||||
#define PCRE2_ERROR_MISSING_NAME_TERMINATOR 142
|
||||
#define PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME 143
|
||||
#define PCRE2_ERROR_INVALID_SUBPATTERN_NAME 144
|
||||
#define PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE 145
|
||||
#define PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY 146
|
||||
#define PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY 147
|
||||
#define PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG 148
|
||||
#define PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS 149
|
||||
#define PCRE2_ERROR_CLASS_INVALID_RANGE 150
|
||||
#define PCRE2_ERROR_OCTAL_BYTE_TOO_BIG 151
|
||||
#define PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE 152
|
||||
#define PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN 153
|
||||
#define PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES 154
|
||||
#define PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE 155
|
||||
#define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE 156
|
||||
#define PCRE2_ERROR_BACKSLASH_G_SYNTAX 157
|
||||
#define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158
|
||||
/* Error 159 is obsolete and should now never occur */
|
||||
#define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED 159
|
||||
#define PCRE2_ERROR_VERB_UNKNOWN 160
|
||||
#define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG 161
|
||||
#define PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED 162
|
||||
#define PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW 163
|
||||
#define PCRE2_ERROR_INVALID_OCTAL 164
|
||||
#define PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH 165
|
||||
#define PCRE2_ERROR_MARK_MISSING_ARGUMENT 166
|
||||
#define PCRE2_ERROR_INVALID_HEXADECIMAL 167
|
||||
#define PCRE2_ERROR_BACKSLASH_C_SYNTAX 168
|
||||
#define PCRE2_ERROR_BACKSLASH_K_SYNTAX 169
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS 170
|
||||
#define PCRE2_ERROR_BACKSLASH_N_IN_CLASS 171
|
||||
#define PCRE2_ERROR_CALLOUT_STRING_TOO_LONG 172
|
||||
#define PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT 173
|
||||
#define PCRE2_ERROR_UTF_IS_DISABLED 174
|
||||
#define PCRE2_ERROR_UCP_IS_DISABLED 175
|
||||
#define PCRE2_ERROR_VERB_NAME_TOO_LONG 176
|
||||
#define PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG 177
|
||||
#define PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS 178
|
||||
#define PCRE2_ERROR_VERSION_CONDITION_SYNTAX 179
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS 180
|
||||
#define PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER 181
|
||||
#define PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER 182
|
||||
#define PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED 183
|
||||
#define PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP 184
|
||||
#define PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED 185
|
||||
#define PCRE2_ERROR_PATTERN_TOO_COMPLICATED 186
|
||||
#define PCRE2_ERROR_LOOKBEHIND_TOO_LONG 187
|
||||
#define PCRE2_ERROR_PATTERN_STRING_TOO_LONG 188
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE 189
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190
|
||||
#define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191
|
||||
#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192
|
||||
#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193
|
||||
#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194
|
||||
#define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195
|
||||
#define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196
|
||||
#define PCRE2_ERROR_TOO_MANY_CAPTURES 197
|
||||
#define PCRE2_ERROR_MISSING_OCTAL_DIGIT 198
|
||||
#define PCRE2_ERROR_BACKSLASH_K_IN_LOOKAROUND 199
|
||||
#define PCRE2_ERROR_MAX_VAR_LOOKBEHIND_EXCEEDED 200
|
||||
#define PCRE2_ERROR_PATTERN_COMPILED_SIZE_TOO_BIG 201
|
||||
#define PCRE2_ERROR_OVERSIZE_PYTHON_OCTAL 202
|
||||
#define PCRE2_ERROR_CALLOUT_CALLER_DISABLED 203
|
||||
#define PCRE2_ERROR_EXTRA_CASING_REQUIRES_UNICODE 204
|
||||
#define PCRE2_ERROR_TURKISH_CASING_REQUIRES_UTF 205
|
||||
#define PCRE2_ERROR_EXTRA_CASING_INCOMPATIBLE 206
|
||||
#define PCRE2_ERROR_ECLASS_NEST_TOO_DEEP 207
|
||||
#define PCRE2_ERROR_ECLASS_INVALID_OPERATOR 208
|
||||
#define PCRE2_ERROR_ECLASS_UNEXPECTED_OPERATOR 209
|
||||
#define PCRE2_ERROR_ECLASS_EXPECTED_OPERAND 210
|
||||
#define PCRE2_ERROR_ECLASS_MIXED_OPERATORS 211
|
||||
#define PCRE2_ERROR_ECLASS_HINT_SQUARE_BRACKET 212
|
||||
#define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_EXPR 213
|
||||
#define PCRE2_ERROR_PERL_ECLASS_EMPTY_EXPR 214
|
||||
#define PCRE2_ERROR_PERL_ECLASS_MISSING_CLOSE 215
|
||||
#define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_CHAR 216
|
||||
|
||||
/* "Expected" matching error codes: no match and partial match. */
|
||||
|
||||
#define PCRE2_ERROR_NOMATCH (-1)
|
||||
#define PCRE2_ERROR_PARTIAL (-2)
|
||||
|
||||
/* Error codes for UTF-8 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF8_ERR1 (-3)
|
||||
#define PCRE2_ERROR_UTF8_ERR2 (-4)
|
||||
#define PCRE2_ERROR_UTF8_ERR3 (-5)
|
||||
#define PCRE2_ERROR_UTF8_ERR4 (-6)
|
||||
#define PCRE2_ERROR_UTF8_ERR5 (-7)
|
||||
#define PCRE2_ERROR_UTF8_ERR6 (-8)
|
||||
#define PCRE2_ERROR_UTF8_ERR7 (-9)
|
||||
#define PCRE2_ERROR_UTF8_ERR8 (-10)
|
||||
#define PCRE2_ERROR_UTF8_ERR9 (-11)
|
||||
#define PCRE2_ERROR_UTF8_ERR10 (-12)
|
||||
#define PCRE2_ERROR_UTF8_ERR11 (-13)
|
||||
#define PCRE2_ERROR_UTF8_ERR12 (-14)
|
||||
#define PCRE2_ERROR_UTF8_ERR13 (-15)
|
||||
#define PCRE2_ERROR_UTF8_ERR14 (-16)
|
||||
#define PCRE2_ERROR_UTF8_ERR15 (-17)
|
||||
#define PCRE2_ERROR_UTF8_ERR16 (-18)
|
||||
#define PCRE2_ERROR_UTF8_ERR17 (-19)
|
||||
#define PCRE2_ERROR_UTF8_ERR18 (-20)
|
||||
#define PCRE2_ERROR_UTF8_ERR19 (-21)
|
||||
#define PCRE2_ERROR_UTF8_ERR20 (-22)
|
||||
#define PCRE2_ERROR_UTF8_ERR21 (-23)
|
||||
|
||||
/* Error codes for UTF-16 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF16_ERR1 (-24)
|
||||
#define PCRE2_ERROR_UTF16_ERR2 (-25)
|
||||
#define PCRE2_ERROR_UTF16_ERR3 (-26)
|
||||
|
||||
/* Error codes for UTF-32 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF32_ERR1 (-27)
|
||||
#define PCRE2_ERROR_UTF32_ERR2 (-28)
|
||||
|
||||
/* Miscellaneous error codes for pcre2[_dfa]_match(), substring extraction
|
||||
functions, context functions, and serializing functions. They are in numerical
|
||||
order. Originally they were in alphabetical order too, but now that PCRE2 is
|
||||
released, the numbers must not be changed. */
|
||||
|
||||
#define PCRE2_ERROR_BADDATA (-29)
|
||||
#define PCRE2_ERROR_MIXEDTABLES (-30) /* Name was changed */
|
||||
#define PCRE2_ERROR_BADMAGIC (-31)
|
||||
#define PCRE2_ERROR_BADMODE (-32)
|
||||
#define PCRE2_ERROR_BADOFFSET (-33)
|
||||
#define PCRE2_ERROR_BADOPTION (-34)
|
||||
#define PCRE2_ERROR_BADREPLACEMENT (-35)
|
||||
#define PCRE2_ERROR_BADUTFOFFSET (-36)
|
||||
#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
|
||||
#define PCRE2_ERROR_DFA_BADRESTART (-38)
|
||||
#define PCRE2_ERROR_DFA_RECURSE (-39)
|
||||
#define PCRE2_ERROR_DFA_UCOND (-40)
|
||||
#define PCRE2_ERROR_DFA_UFUNC (-41)
|
||||
#define PCRE2_ERROR_DFA_UITEM (-42)
|
||||
#define PCRE2_ERROR_DFA_WSSIZE (-43)
|
||||
#define PCRE2_ERROR_INTERNAL (-44)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-45)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-46)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-47)
|
||||
#define PCRE2_ERROR_NOMEMORY (-48)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-49)
|
||||
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-50)
|
||||
#define PCRE2_ERROR_NULL (-51)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-52)
|
||||
#define PCRE2_ERROR_DEPTHLIMIT (-53)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-53) /* Obsolete synonym */
|
||||
#define PCRE2_ERROR_UNAVAILABLE (-54)
|
||||
#define PCRE2_ERROR_UNSET (-55)
|
||||
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
|
||||
#define PCRE2_ERROR_BADREPESCAPE (-57)
|
||||
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
|
||||
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
|
||||
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
|
||||
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
|
||||
#define PCRE2_ERROR_HEAPLIMIT (-63)
|
||||
#define PCRE2_ERROR_CONVERT_SYNTAX (-64)
|
||||
#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65)
|
||||
#define PCRE2_ERROR_DFA_UINVALID_UTF (-66)
|
||||
#define PCRE2_ERROR_INVALIDOFFSET (-67)
|
||||
#define PCRE2_ERROR_JIT_UNSUPPORTED (-68)
|
||||
#define PCRE2_ERROR_REPLACECASE (-69)
|
||||
#define PCRE2_ERROR_TOOLARGEREPLACE (-70)
|
||||
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
#define PCRE2_INFO_ALLOPTIONS 0
|
||||
#define PCRE2_INFO_ARGOPTIONS 1
|
||||
#define PCRE2_INFO_BACKREFMAX 2
|
||||
#define PCRE2_INFO_BSR 3
|
||||
#define PCRE2_INFO_CAPTURECOUNT 4
|
||||
#define PCRE2_INFO_FIRSTCODEUNIT 5
|
||||
#define PCRE2_INFO_FIRSTCODETYPE 6
|
||||
#define PCRE2_INFO_FIRSTBITMAP 7
|
||||
#define PCRE2_INFO_HASCRORLF 8
|
||||
#define PCRE2_INFO_JCHANGED 9
|
||||
#define PCRE2_INFO_JITSIZE 10
|
||||
#define PCRE2_INFO_LASTCODEUNIT 11
|
||||
#define PCRE2_INFO_LASTCODETYPE 12
|
||||
#define PCRE2_INFO_MATCHEMPTY 13
|
||||
#define PCRE2_INFO_MATCHLIMIT 14
|
||||
#define PCRE2_INFO_MAXLOOKBEHIND 15
|
||||
#define PCRE2_INFO_MINLENGTH 16
|
||||
#define PCRE2_INFO_NAMECOUNT 17
|
||||
#define PCRE2_INFO_NAMEENTRYSIZE 18
|
||||
#define PCRE2_INFO_NAMETABLE 19
|
||||
#define PCRE2_INFO_NEWLINE 20
|
||||
#define PCRE2_INFO_DEPTHLIMIT 21
|
||||
#define PCRE2_INFO_RECURSIONLIMIT 21 /* Obsolete synonym */
|
||||
#define PCRE2_INFO_SIZE 22
|
||||
#define PCRE2_INFO_HASBACKSLASHC 23
|
||||
#define PCRE2_INFO_FRAMESIZE 24
|
||||
#define PCRE2_INFO_HEAPLIMIT 25
|
||||
#define PCRE2_INFO_EXTRAOPTIONS 26
|
||||
|
||||
/* Request types for pcre2_config(). */
|
||||
|
||||
#define PCRE2_CONFIG_BSR 0
|
||||
#define PCRE2_CONFIG_JIT 1
|
||||
#define PCRE2_CONFIG_JITTARGET 2
|
||||
#define PCRE2_CONFIG_LINKSIZE 3
|
||||
#define PCRE2_CONFIG_MATCHLIMIT 4
|
||||
#define PCRE2_CONFIG_NEWLINE 5
|
||||
#define PCRE2_CONFIG_PARENSLIMIT 6
|
||||
#define PCRE2_CONFIG_DEPTHLIMIT 7
|
||||
#define PCRE2_CONFIG_RECURSIONLIMIT 7 /* Obsolete synonym */
|
||||
#define PCRE2_CONFIG_STACKRECURSE 8 /* Obsolete */
|
||||
#define PCRE2_CONFIG_UNICODE 9
|
||||
#define PCRE2_CONFIG_UNICODE_VERSION 10
|
||||
#define PCRE2_CONFIG_VERSION 11
|
||||
#define PCRE2_CONFIG_HEAPLIMIT 12
|
||||
#define PCRE2_CONFIG_NEVER_BACKSLASH_C 13
|
||||
#define PCRE2_CONFIG_COMPILED_WIDTHS 14
|
||||
#define PCRE2_CONFIG_TABLES_LENGTH 15
|
||||
|
||||
/* Optimization directives for pcre2_set_optimize().
|
||||
For binary compatibility, only add to this list; do not renumber. */
|
||||
|
||||
#define PCRE2_OPTIMIZATION_NONE 0
|
||||
#define PCRE2_OPTIMIZATION_FULL 1
|
||||
|
||||
#define PCRE2_AUTO_POSSESS 64
|
||||
#define PCRE2_AUTO_POSSESS_OFF 65
|
||||
#define PCRE2_DOTSTAR_ANCHOR 66
|
||||
#define PCRE2_DOTSTAR_ANCHOR_OFF 67
|
||||
#define PCRE2_START_OPTIMIZE 68
|
||||
#define PCRE2_START_OPTIMIZE_OFF 69
|
||||
|
||||
/* Types used in pcre2_set_substitute_case_callout().
|
||||
|
||||
PCRE2_SUBSTITUTE_CASE_LOWER and PCRE2_SUBSTITUTE_CASE_UPPER are passed to the
|
||||
callout to indicate that the case of the entire callout input should be
|
||||
case-transformed. PCRE2_SUBSTITUTE_CASE_TITLE_FIRST is passed to indicate that
|
||||
only the first character or glyph should be transformed to Unicode titlecase,
|
||||
and the rest to lowercase. */
|
||||
|
||||
#define PCRE2_SUBSTITUTE_CASE_LOWER 1
|
||||
#define PCRE2_SUBSTITUTE_CASE_UPPER 2
|
||||
#define PCRE2_SUBSTITUTE_CASE_TITLE_FIRST 3
|
||||
|
||||
/* Types for code units in patterns and subject strings. */
|
||||
|
||||
typedef uint8_t PCRE2_UCHAR8;
|
||||
typedef uint16_t PCRE2_UCHAR16;
|
||||
typedef uint32_t PCRE2_UCHAR32;
|
||||
|
||||
typedef const PCRE2_UCHAR8 *PCRE2_SPTR8;
|
||||
typedef const PCRE2_UCHAR16 *PCRE2_SPTR16;
|
||||
typedef const PCRE2_UCHAR32 *PCRE2_SPTR32;
|
||||
|
||||
/* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE2,
|
||||
including pattern offsets for errors and subject offsets after a match. We
|
||||
define special values to indicate zero-terminated strings and unset offsets in
|
||||
the offset vector (ovector). */
|
||||
|
||||
#define PCRE2_SIZE size_t
|
||||
#define PCRE2_SIZE_MAX SIZE_MAX
|
||||
#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0)
|
||||
#define PCRE2_UNSET (~(PCRE2_SIZE)0)
|
||||
|
||||
/* Generic types for opaque structures and JIT callback functions. These
|
||||
declarations are defined in a macro that is expanded for each width later. */
|
||||
|
||||
#define PCRE2_TYPES_LIST \
|
||||
struct pcre2_real_general_context; \
|
||||
typedef struct pcre2_real_general_context pcre2_general_context; \
|
||||
\
|
||||
struct pcre2_real_compile_context; \
|
||||
typedef struct pcre2_real_compile_context pcre2_compile_context; \
|
||||
\
|
||||
struct pcre2_real_match_context; \
|
||||
typedef struct pcre2_real_match_context pcre2_match_context; \
|
||||
\
|
||||
struct pcre2_real_convert_context; \
|
||||
typedef struct pcre2_real_convert_context pcre2_convert_context; \
|
||||
\
|
||||
struct pcre2_real_code; \
|
||||
typedef struct pcre2_real_code pcre2_code; \
|
||||
\
|
||||
struct pcre2_real_match_data; \
|
||||
typedef struct pcre2_real_match_data pcre2_match_data; \
|
||||
\
|
||||
struct pcre2_real_jit_stack; \
|
||||
typedef struct pcre2_real_jit_stack pcre2_jit_stack; \
|
||||
\
|
||||
typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *);
|
||||
|
||||
|
||||
/* The structures for passing out data via callout functions. We use structures
|
||||
so that new fields can be added on the end in future versions, without changing
|
||||
the API of the function, thereby allowing old clients to work without
|
||||
modification. Define the generic versions in a macro; the width-specific
|
||||
versions are generated from this macro below. */
|
||||
|
||||
/* Flags for the callout_flags field. These are cleared after a callout. */
|
||||
|
||||
#define PCRE2_CALLOUT_STARTMATCH 0x00000001u /* Set for each bumpalong */
|
||||
#define PCRE2_CALLOUT_BACKTRACK 0x00000002u /* Set after a backtrack */
|
||||
|
||||
#define PCRE2_STRUCTURE_LIST \
|
||||
typedef struct pcre2_callout_block { \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||
uint32_t capture_top; /* Max current capture */ \
|
||||
uint32_t capture_last; /* Most recently closed capture */ \
|
||||
PCRE2_SIZE *offset_vector; /* The offset vector */ \
|
||||
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
|
||||
PCRE2_SPTR subject; /* The subject being matched */ \
|
||||
PCRE2_SIZE subject_length; /* The length of the subject */ \
|
||||
PCRE2_SIZE start_match; /* Offset to start of this match attempt */ \
|
||||
PCRE2_SIZE current_position; /* Where we currently are in the subject */ \
|
||||
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||
/* ------------------- Added for Version 1 -------------------------- */ \
|
||||
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
|
||||
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||
/* ------------------- Added for Version 2 -------------------------- */ \
|
||||
uint32_t callout_flags; /* See above for list */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_callout_block; \
|
||||
\
|
||||
typedef struct pcre2_callout_enumerate_block { \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
|
||||
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_callout_enumerate_block; \
|
||||
\
|
||||
typedef struct pcre2_substitute_callout_block { \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
PCRE2_SPTR input; /* Pointer to input subject string */ \
|
||||
PCRE2_SPTR output; /* Pointer to output buffer */ \
|
||||
PCRE2_SIZE output_offsets[2]; /* Changed portion of the output */ \
|
||||
PCRE2_SIZE *ovector; /* Pointer to current ovector */ \
|
||||
uint32_t oveccount; /* Count of pairs set in ovector */ \
|
||||
uint32_t subscount; /* Substitution number */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_substitute_callout_block;
|
||||
|
||||
|
||||
/* List the generic forms of all other functions in macros, which will be
|
||||
expanded for each width below. Start with functions that give general
|
||||
information. */
|
||||
|
||||
#define PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_config(uint32_t, void *);
|
||||
|
||||
|
||||
/* Functions for manipulating contexts. */
|
||||
|
||||
#define PCRE2_GENERAL_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_general_context_copy(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_general_context_create(void *(*)(size_t, void *), \
|
||||
void (*)(void *, void *), void *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_general_context_free(pcre2_general_context *);
|
||||
|
||||
#define PCRE2_COMPILE_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_compile_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_compile_context_copy(pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL pcre2_compile_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_compile_context_create(pcre2_general_context *);\
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_compile_context_free(pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_character_tables(pcre2_compile_context *, const uint8_t *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_max_pattern_compiled_length(pcre2_compile_context *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_max_varlookbehind(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_newline(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_compile_recursion_guard(pcre2_compile_context *, \
|
||||
int (*)(uint32_t, void *), void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_optimize(pcre2_compile_context *, uint32_t);
|
||||
|
||||
#define PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_context_copy(pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_context_create(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_context_free(pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_callout(pcre2_match_context *, \
|
||||
int (*)(pcre2_callout_block *, void *), void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_substitute_callout(pcre2_match_context *, \
|
||||
int (*)(pcre2_substitute_callout_block *, void *), void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_substitute_case_callout(pcre2_match_context *, \
|
||||
PCRE2_SIZE (*)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE, int, \
|
||||
void *), \
|
||||
void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_offset_limit(pcre2_match_context *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_recursion_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_recursion_memory_management(pcre2_match_context *, \
|
||||
void *(*)(size_t, void *), void (*)(void *, void *), void *);
|
||||
|
||||
#define PCRE2_CONVERT_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_convert_context_copy(pcre2_convert_context *); \
|
||||
PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_convert_context_create(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_convert_context_free(pcre2_convert_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_glob_separator(pcre2_convert_context *, uint32_t);
|
||||
|
||||
|
||||
/* Functions concerned with compiling a pattern to PCRE internal code. */
|
||||
|
||||
#define PCRE2_COMPILE_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
|
||||
pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \
|
||||
pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_code_free(pcre2_code *); \
|
||||
PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
|
||||
pcre2_code_copy(const pcre2_code *); \
|
||||
PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
|
||||
pcre2_code_copy_with_tables(const pcre2_code *);
|
||||
|
||||
|
||||
/* Functions that give information about a compiled pattern. */
|
||||
|
||||
#define PCRE2_PATTERN_INFO_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_pattern_info(const pcre2_code *, uint32_t, void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_callout_enumerate(const pcre2_code *, \
|
||||
int (*)(pcre2_callout_enumerate_block *, void *), void *);
|
||||
|
||||
|
||||
/* Functions for running a match and inspecting the result. */
|
||||
|
||||
#define PCRE2_MATCH_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_match_data *PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_data_create(uint32_t, pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL pcre2_match_data *PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_data_create_from_pattern(const pcre2_code *, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *, int *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_data_free(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_mark(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_match_data_size(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_match_data_heapframes_size(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_ovector_count(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE *PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_ovector_pointer(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_startchar(pcre2_match_data *);
|
||||
|
||||
|
||||
/* Convenience functions for handling matched substrings. */
|
||||
|
||||
#define PCRE2_SUBSTRING_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_copy_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR *, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_copy_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR *, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_free(PCRE2_UCHAR *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_get_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR **, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_get_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR **, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_length_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_length_bynumber(pcre2_match_data *, uint32_t, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_nametable_scan(const pcre2_code *, PCRE2_SPTR, PCRE2_SPTR *, \
|
||||
PCRE2_SPTR *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_number_from_name(const pcre2_code *, PCRE2_SPTR); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_list_free(PCRE2_UCHAR **); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **);
|
||||
|
||||
|
||||
/* Functions for serializing / deserializing compiled patterns. */
|
||||
|
||||
#define PCRE2_SERIALIZE_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_encode(const pcre2_code **, int32_t, uint8_t **, \
|
||||
PCRE2_SIZE *, pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_decode(pcre2_code **, int32_t, const uint8_t *, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_get_number_of_codes(const uint8_t *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_free(uint8_t *);
|
||||
|
||||
|
||||
/* Convenience function for match + substitute. */
|
||||
|
||||
#define PCRE2_SUBSTITUTE_FUNCTION \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substitute(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *, PCRE2_SPTR, \
|
||||
PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *);
|
||||
|
||||
|
||||
/* Functions for converting pattern source strings. */
|
||||
|
||||
#define PCRE2_CONVERT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_pattern_convert(PCRE2_SPTR, PCRE2_SIZE, uint32_t, PCRE2_UCHAR **, \
|
||||
PCRE2_SIZE *, pcre2_convert_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_converted_pattern_free(PCRE2_UCHAR *);
|
||||
|
||||
|
||||
/* Functions for JIT processing */
|
||||
|
||||
#define PCRE2_JIT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_compile(pcre2_code *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_free_unused_memory(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL pcre2_jit_stack *PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_stack_create(size_t, size_t, pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_stack_free(pcre2_jit_stack *);
|
||||
|
||||
|
||||
/* Other miscellaneous functions. */
|
||||
|
||||
#define PCRE2_OTHER_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL const uint8_t *PCRE2_CALL_CONVENTION \
|
||||
pcre2_maketables(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_maketables_free(pcre2_general_context *, const uint8_t *);
|
||||
|
||||
/* Define macros that generate width-specific names from generic versions. The
|
||||
three-level macro scheme is necessary to get the macros expanded when we want
|
||||
them to be. First we get the width from PCRE2_LOCAL_WIDTH, which is used for
|
||||
generating three versions of everything below. After that, PCRE2_SUFFIX will be
|
||||
re-defined to use PCRE2_CODE_UNIT_WIDTH, for use when macros such as
|
||||
pcre2_compile are called by application code. */
|
||||
|
||||
#define PCRE2_JOIN(a,b) a ## b
|
||||
#define PCRE2_GLUE(a,b) PCRE2_JOIN(a,b)
|
||||
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a,PCRE2_LOCAL_WIDTH)
|
||||
|
||||
|
||||
/* Data types */
|
||||
|
||||
#define PCRE2_UCHAR PCRE2_SUFFIX(PCRE2_UCHAR)
|
||||
#define PCRE2_SPTR PCRE2_SUFFIX(PCRE2_SPTR)
|
||||
|
||||
#define pcre2_code PCRE2_SUFFIX(pcre2_code_)
|
||||
#define pcre2_jit_callback PCRE2_SUFFIX(pcre2_jit_callback_)
|
||||
#define pcre2_jit_stack PCRE2_SUFFIX(pcre2_jit_stack_)
|
||||
|
||||
#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_)
|
||||
#define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_)
|
||||
#define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_)
|
||||
#define pcre2_real_convert_context PCRE2_SUFFIX(pcre2_real_convert_context_)
|
||||
#define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_)
|
||||
#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_)
|
||||
#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_)
|
||||
|
||||
|
||||
/* Data blocks */
|
||||
|
||||
#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_)
|
||||
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
|
||||
#define pcre2_substitute_callout_block PCRE2_SUFFIX(pcre2_substitute_callout_block_)
|
||||
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
|
||||
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
|
||||
#define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_)
|
||||
#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_)
|
||||
#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_)
|
||||
|
||||
|
||||
/* Functions: the complete list in alphabetical order */
|
||||
|
||||
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
||||
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
|
||||
#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_)
|
||||
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
||||
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
||||
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
||||
#define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_)
|
||||
#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_)
|
||||
#define pcre2_config PCRE2_SUFFIX(pcre2_config_)
|
||||
#define pcre2_convert_context_copy PCRE2_SUFFIX(pcre2_convert_context_copy_)
|
||||
#define pcre2_convert_context_create PCRE2_SUFFIX(pcre2_convert_context_create_)
|
||||
#define pcre2_convert_context_free PCRE2_SUFFIX(pcre2_convert_context_free_)
|
||||
#define pcre2_converted_pattern_free PCRE2_SUFFIX(pcre2_converted_pattern_free_)
|
||||
#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_)
|
||||
#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_)
|
||||
#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_)
|
||||
#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_)
|
||||
#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_)
|
||||
#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_)
|
||||
#define pcre2_get_match_data_heapframes_size PCRE2_SUFFIX(pcre2_get_match_data_heapframes_size_)
|
||||
#define pcre2_get_match_data_size PCRE2_SUFFIX(pcre2_get_match_data_size_)
|
||||
#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
|
||||
#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_)
|
||||
#define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_)
|
||||
#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_)
|
||||
#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_)
|
||||
#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_)
|
||||
#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_)
|
||||
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
|
||||
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
|
||||
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
|
||||
#define pcre2_maketables_free PCRE2_SUFFIX(pcre2_maketables_free_)
|
||||
#define pcre2_match PCRE2_SUFFIX(pcre2_match_)
|
||||
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
|
||||
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)
|
||||
#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_)
|
||||
#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_)
|
||||
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
|
||||
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
|
||||
#define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_)
|
||||
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
|
||||
#define pcre2_serialize_decode PCRE2_SUFFIX(pcre2_serialize_decode_)
|
||||
#define pcre2_serialize_encode PCRE2_SUFFIX(pcre2_serialize_encode_)
|
||||
#define pcre2_serialize_free PCRE2_SUFFIX(pcre2_serialize_free_)
|
||||
#define pcre2_serialize_get_number_of_codes PCRE2_SUFFIX(pcre2_serialize_get_number_of_codes_)
|
||||
#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_)
|
||||
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
|
||||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||
#define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_)
|
||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
|
||||
#define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_)
|
||||
#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
|
||||
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
|
||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||
#define pcre2_set_max_varlookbehind PCRE2_SUFFIX(pcre2_set_max_varlookbehind_)
|
||||
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||
#define pcre2_set_max_pattern_compiled_length PCRE2_SUFFIX(pcre2_set_max_pattern_compiled_length_)
|
||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
|
||||
#define pcre2_set_optimize PCRE2_SUFFIX(pcre2_set_optimize_)
|
||||
#define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_)
|
||||
#define pcre2_set_substitute_case_callout PCRE2_SUFFIX(pcre2_set_substitute_case_callout_)
|
||||
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
|
||||
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
|
||||
#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)
|
||||
#define pcre2_substring_free PCRE2_SUFFIX(pcre2_substring_free_)
|
||||
#define pcre2_substring_get_byname PCRE2_SUFFIX(pcre2_substring_get_byname_)
|
||||
#define pcre2_substring_get_bynumber PCRE2_SUFFIX(pcre2_substring_get_bynumber_)
|
||||
#define pcre2_substring_length_byname PCRE2_SUFFIX(pcre2_substring_length_byname_)
|
||||
#define pcre2_substring_length_bynumber PCRE2_SUFFIX(pcre2_substring_length_bynumber_)
|
||||
#define pcre2_substring_list_get PCRE2_SUFFIX(pcre2_substring_list_get_)
|
||||
#define pcre2_substring_list_free PCRE2_SUFFIX(pcre2_substring_list_free_)
|
||||
#define pcre2_substring_nametable_scan PCRE2_SUFFIX(pcre2_substring_nametable_scan_)
|
||||
#define pcre2_substring_number_from_name PCRE2_SUFFIX(pcre2_substring_number_from_name_)
|
||||
|
||||
/* Keep this old function name for backwards compatibility */
|
||||
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
|
||||
|
||||
/* Keep this obsolete function for backwards compatibility: it is now a noop. */
|
||||
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
|
||||
|
||||
/* Now generate all three sets of width-specific structures and function
|
||||
prototypes. */
|
||||
|
||||
#define PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS \
|
||||
PCRE2_TYPES_LIST \
|
||||
PCRE2_STRUCTURE_LIST \
|
||||
PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||
PCRE2_GENERAL_CONTEXT_FUNCTIONS \
|
||||
PCRE2_COMPILE_CONTEXT_FUNCTIONS \
|
||||
PCRE2_CONVERT_CONTEXT_FUNCTIONS \
|
||||
PCRE2_CONVERT_FUNCTIONS \
|
||||
PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
||||
PCRE2_COMPILE_FUNCTIONS \
|
||||
PCRE2_PATTERN_INFO_FUNCTIONS \
|
||||
PCRE2_MATCH_FUNCTIONS \
|
||||
PCRE2_SUBSTRING_FUNCTIONS \
|
||||
PCRE2_SERIALIZE_FUNCTIONS \
|
||||
PCRE2_SUBSTITUTE_FUNCTION \
|
||||
PCRE2_JIT_FUNCTIONS \
|
||||
PCRE2_OTHER_FUNCTIONS
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 8
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 16
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 32
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
/* Undefine the list macros; they are no longer needed. */
|
||||
|
||||
#undef PCRE2_TYPES_LIST
|
||||
#undef PCRE2_STRUCTURE_LIST
|
||||
#undef PCRE2_GENERAL_INFO_FUNCTIONS
|
||||
#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_CONVERT_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_MATCH_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_COMPILE_FUNCTIONS
|
||||
#undef PCRE2_PATTERN_INFO_FUNCTIONS
|
||||
#undef PCRE2_MATCH_FUNCTIONS
|
||||
#undef PCRE2_SUBSTRING_FUNCTIONS
|
||||
#undef PCRE2_SERIALIZE_FUNCTIONS
|
||||
#undef PCRE2_SUBSTITUTE_FUNCTION
|
||||
#undef PCRE2_JIT_FUNCTIONS
|
||||
#undef PCRE2_OTHER_FUNCTIONS
|
||||
#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
|
||||
/* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine
|
||||
PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make
|
||||
PCRE2_SUFFIX a no-op. Otherwise, generate an error. */
|
||||
|
||||
#undef PCRE2_SUFFIX
|
||||
#ifndef PCRE2_CODE_UNIT_WIDTH
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h.
|
||||
#error Use 8, 16, or 32; or 0 for a multi-width application.
|
||||
#else /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8 || \
|
||||
PCRE2_CODE_UNIT_WIDTH == 16 || \
|
||||
PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH)
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 0
|
||||
#undef PCRE2_JOIN
|
||||
#undef PCRE2_GLUE
|
||||
#define PCRE2_SUFFIX(a) a
|
||||
#else
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32.
|
||||
#endif
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* PCRE2_H_IDEMPOTENT_GUARD */
|
||||
|
||||
/* End of pcre2.h */
|
||||
1069
3rd/pcre2/src/pcre2.h.in
Normal file
1069
3rd/pcre2/src/pcre2.h.in
Normal file
@@ -0,0 +1,1069 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This is the public header file for the PCRE library, second API, to be
|
||||
#included by applications that call PCRE2 functions.
|
||||
|
||||
Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef PCRE2_H_IDEMPOTENT_GUARD
|
||||
#define PCRE2_H_IDEMPOTENT_GUARD
|
||||
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE2_MAJOR @PCRE2_MAJOR@
|
||||
#define PCRE2_MINOR @PCRE2_MINOR@
|
||||
#define PCRE2_PRERELEASE @PCRE2_PRERELEASE@
|
||||
#define PCRE2_DATE @PCRE2_DATE@
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
export setting is defined in pcre2_internal.h, which includes this file. So we
|
||||
don't change existing definitions of PCRE2_EXP_DECL. */
|
||||
|
||||
#if defined(_WIN32) && !defined(PCRE2_STATIC)
|
||||
# ifndef PCRE2_EXP_DECL
|
||||
# define PCRE2_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCRE2_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCRE2_EXP_DECL extern "C"
|
||||
# else
|
||||
# define PCRE2_EXP_DECL extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* When compiling with the MSVC compiler, it is sometimes necessary to include
|
||||
a "calling convention" before exported function names. (This is secondhand
|
||||
information; I know nothing about MSVC myself). For example, something like
|
||||
|
||||
void __cdecl function(....)
|
||||
|
||||
might be needed. In order so make this easy, all the exported functions have
|
||||
PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
|
||||
set, we ensure here that it has no effect. */
|
||||
|
||||
#ifndef PCRE2_CALL_CONVENTION
|
||||
#define PCRE2_CALL_CONVENTION
|
||||
#endif
|
||||
|
||||
/* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and
|
||||
uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do
|
||||
not have stdint.h, which is why we use inttypes.h, which according to the C
|
||||
standard is a superset of stdint.h. If inttypes.h is not available the build
|
||||
will break and the relevant values must be provided by some other means. */
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
/* Allow for C++ users compiling this directly. */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* The following option bits can be passed to pcre2_compile(), pcre2_match(),
|
||||
or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it
|
||||
is passed. Put these bits at the most significant end of the options word so
|
||||
others can be added next to them */
|
||||
|
||||
#define PCRE2_ANCHORED 0x80000000u
|
||||
#define PCRE2_NO_UTF_CHECK 0x40000000u
|
||||
#define PCRE2_ENDANCHORED 0x20000000u
|
||||
|
||||
/* The following option bits can be passed only to pcre2_compile(). However,
|
||||
they may affect compilation, JIT compilation, and/or interpretive execution.
|
||||
The following tags indicate which:
|
||||
|
||||
C alters what is compiled by pcre2_compile()
|
||||
J alters what is compiled by pcre2_jit_compile()
|
||||
M is inspected during pcre2_match() execution
|
||||
D is inspected during pcre2_dfa_match() execution
|
||||
*/
|
||||
|
||||
#define PCRE2_ALLOW_EMPTY_CLASS 0x00000001u /* C */
|
||||
#define PCRE2_ALT_BSUX 0x00000002u /* C */
|
||||
#define PCRE2_AUTO_CALLOUT 0x00000004u /* C */
|
||||
#define PCRE2_CASELESS 0x00000008u /* C */
|
||||
#define PCRE2_DOLLAR_ENDONLY 0x00000010u /* J M D */
|
||||
#define PCRE2_DOTALL 0x00000020u /* C */
|
||||
#define PCRE2_DUPNAMES 0x00000040u /* C */
|
||||
#define PCRE2_EXTENDED 0x00000080u /* C */
|
||||
#define PCRE2_FIRSTLINE 0x00000100u /* J M D */
|
||||
#define PCRE2_MATCH_UNSET_BACKREF 0x00000200u /* C J M */
|
||||
#define PCRE2_MULTILINE 0x00000400u /* C */
|
||||
#define PCRE2_NEVER_UCP 0x00000800u /* C */
|
||||
#define PCRE2_NEVER_UTF 0x00001000u /* C */
|
||||
#define PCRE2_NO_AUTO_CAPTURE 0x00002000u /* C */
|
||||
#define PCRE2_NO_AUTO_POSSESS 0x00004000u /* C */
|
||||
#define PCRE2_NO_DOTSTAR_ANCHOR 0x00008000u /* C */
|
||||
#define PCRE2_NO_START_OPTIMIZE 0x00010000u /* J M D */
|
||||
#define PCRE2_UCP 0x00020000u /* C J M D */
|
||||
#define PCRE2_UNGREEDY 0x00040000u /* C */
|
||||
#define PCRE2_UTF 0x00080000u /* C J M D */
|
||||
#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */
|
||||
#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */
|
||||
#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */
|
||||
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
|
||||
#define PCRE2_EXTENDED_MORE 0x01000000u /* C */
|
||||
#define PCRE2_LITERAL 0x02000000u /* C */
|
||||
#define PCRE2_MATCH_INVALID_UTF 0x04000000u /* J M D */
|
||||
#define PCRE2_ALT_EXTENDED_CLASS 0x08000000u /* C */
|
||||
|
||||
/* An additional compile options word is available in the compile context. */
|
||||
|
||||
#define PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES 0x00000001u /* C */
|
||||
#define PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL 0x00000002u /* C */
|
||||
#define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */
|
||||
#define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */
|
||||
#define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */
|
||||
#define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */
|
||||
#define PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK 0x00000040u /* C */
|
||||
#define PCRE2_EXTRA_CASELESS_RESTRICT 0x00000080u /* C */
|
||||
#define PCRE2_EXTRA_ASCII_BSD 0x00000100u /* C */
|
||||
#define PCRE2_EXTRA_ASCII_BSS 0x00000200u /* C */
|
||||
#define PCRE2_EXTRA_ASCII_BSW 0x00000400u /* C */
|
||||
#define PCRE2_EXTRA_ASCII_POSIX 0x00000800u /* C */
|
||||
#define PCRE2_EXTRA_ASCII_DIGIT 0x00001000u /* C */
|
||||
#define PCRE2_EXTRA_PYTHON_OCTAL 0x00002000u /* C */
|
||||
#define PCRE2_EXTRA_NO_BS0 0x00004000u /* C */
|
||||
#define PCRE2_EXTRA_NEVER_CALLOUT 0x00008000u /* C */
|
||||
#define PCRE2_EXTRA_TURKISH_CASING 0x00010000u /* C */
|
||||
|
||||
/* These are for pcre2_jit_compile(). */
|
||||
|
||||
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
|
||||
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
|
||||
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
|
||||
#define PCRE2_JIT_INVALID_UTF 0x00000100u
|
||||
#define PCRE2_JIT_TEST_ALLOC 0x00000200u
|
||||
|
||||
/* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and
|
||||
pcre2_substitute(). Some are allowed only for one of the functions, and in
|
||||
these cases it is noted below. Note that PCRE2_ANCHORED, PCRE2_ENDANCHORED and
|
||||
PCRE2_NO_UTF_CHECK can also be passed to these functions (though
|
||||
pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */
|
||||
|
||||
#define PCRE2_NOTBOL 0x00000001u
|
||||
#define PCRE2_NOTEOL 0x00000002u
|
||||
#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
|
||||
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
|
||||
#define PCRE2_PARTIAL_SOFT 0x00000010u
|
||||
#define PCRE2_PARTIAL_HARD 0x00000020u
|
||||
#define PCRE2_DFA_RESTART 0x00000040u /* pcre2_dfa_match() only */
|
||||
#define PCRE2_DFA_SHORTEST 0x00000080u /* pcre2_dfa_match() only */
|
||||
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */
|
||||
#define PCRE2_NO_JIT 0x00002000u /* not for pcre2_dfa_match() */
|
||||
#define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u
|
||||
#define PCRE2_SUBSTITUTE_LITERAL 0x00008000u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_MATCHED 0x00010000u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_REPLACEMENT_ONLY 0x00020000u /* pcre2_substitute() only */
|
||||
#define PCRE2_DISABLE_RECURSELOOP_CHECK 0x00040000u /* not for pcre2_dfa_match() or pcre2_jit_match() */
|
||||
|
||||
/* Options for pcre2_pattern_convert(). */
|
||||
|
||||
#define PCRE2_CONVERT_UTF 0x00000001u
|
||||
#define PCRE2_CONVERT_NO_UTF_CHECK 0x00000002u
|
||||
#define PCRE2_CONVERT_POSIX_BASIC 0x00000004u
|
||||
#define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u
|
||||
#define PCRE2_CONVERT_GLOB 0x00000010u
|
||||
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000030u
|
||||
#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000050u
|
||||
|
||||
/* Newline and \R settings, for use in compile contexts. The newline values
|
||||
must be kept in step with values set in config.h and both sets must all be
|
||||
greater than zero. */
|
||||
|
||||
#define PCRE2_NEWLINE_CR 1
|
||||
#define PCRE2_NEWLINE_LF 2
|
||||
#define PCRE2_NEWLINE_CRLF 3
|
||||
#define PCRE2_NEWLINE_ANY 4
|
||||
#define PCRE2_NEWLINE_ANYCRLF 5
|
||||
#define PCRE2_NEWLINE_NUL 6
|
||||
|
||||
#define PCRE2_BSR_UNICODE 1
|
||||
#define PCRE2_BSR_ANYCRLF 2
|
||||
|
||||
/* Error codes for pcre2_compile(). Some of these are also used by
|
||||
pcre2_pattern_convert(). */
|
||||
|
||||
#define PCRE2_ERROR_END_BACKSLASH 101
|
||||
#define PCRE2_ERROR_END_BACKSLASH_C 102
|
||||
#define PCRE2_ERROR_UNKNOWN_ESCAPE 103
|
||||
#define PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER 104
|
||||
#define PCRE2_ERROR_QUANTIFIER_TOO_BIG 105
|
||||
#define PCRE2_ERROR_MISSING_SQUARE_BRACKET 106
|
||||
#define PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS 107
|
||||
#define PCRE2_ERROR_CLASS_RANGE_ORDER 108
|
||||
#define PCRE2_ERROR_QUANTIFIER_INVALID 109
|
||||
#define PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT 110
|
||||
#define PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY 111
|
||||
#define PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS 112
|
||||
#define PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING 113
|
||||
#define PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS 114
|
||||
#define PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE 115
|
||||
#define PCRE2_ERROR_NULL_PATTERN 116
|
||||
#define PCRE2_ERROR_BAD_OPTIONS 117
|
||||
#define PCRE2_ERROR_MISSING_COMMENT_CLOSING 118
|
||||
#define PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP 119
|
||||
#define PCRE2_ERROR_PATTERN_TOO_LARGE 120
|
||||
#define PCRE2_ERROR_HEAP_FAILED 121
|
||||
#define PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS 122
|
||||
#define PCRE2_ERROR_INTERNAL_CODE_OVERFLOW 123
|
||||
#define PCRE2_ERROR_MISSING_CONDITION_CLOSING 124
|
||||
#define PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH 125
|
||||
#define PCRE2_ERROR_ZERO_RELATIVE_REFERENCE 126
|
||||
#define PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES 127
|
||||
#define PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED 128
|
||||
#define PCRE2_ERROR_BAD_RELATIVE_REFERENCE 129
|
||||
#define PCRE2_ERROR_UNKNOWN_POSIX_CLASS 130
|
||||
#define PCRE2_ERROR_INTERNAL_STUDY_ERROR 131
|
||||
#define PCRE2_ERROR_UNICODE_NOT_SUPPORTED 132
|
||||
#define PCRE2_ERROR_PARENTHESES_STACK_CHECK 133
|
||||
#define PCRE2_ERROR_CODE_POINT_TOO_BIG 134
|
||||
#define PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED 135
|
||||
#define PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C 136
|
||||
#define PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE 137
|
||||
#define PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG 138
|
||||
#define PCRE2_ERROR_MISSING_CALLOUT_CLOSING 139
|
||||
#define PCRE2_ERROR_ESCAPE_INVALID_IN_VERB 140
|
||||
#define PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P 141
|
||||
#define PCRE2_ERROR_MISSING_NAME_TERMINATOR 142
|
||||
#define PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME 143
|
||||
#define PCRE2_ERROR_INVALID_SUBPATTERN_NAME 144
|
||||
#define PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE 145
|
||||
#define PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY 146
|
||||
#define PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY 147
|
||||
#define PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG 148
|
||||
#define PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS 149
|
||||
#define PCRE2_ERROR_CLASS_INVALID_RANGE 150
|
||||
#define PCRE2_ERROR_OCTAL_BYTE_TOO_BIG 151
|
||||
#define PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE 152
|
||||
#define PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN 153
|
||||
#define PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES 154
|
||||
#define PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE 155
|
||||
#define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE 156
|
||||
#define PCRE2_ERROR_BACKSLASH_G_SYNTAX 157
|
||||
#define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158
|
||||
/* Error 159 is obsolete and should now never occur */
|
||||
#define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED 159
|
||||
#define PCRE2_ERROR_VERB_UNKNOWN 160
|
||||
#define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG 161
|
||||
#define PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED 162
|
||||
#define PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW 163
|
||||
#define PCRE2_ERROR_INVALID_OCTAL 164
|
||||
#define PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH 165
|
||||
#define PCRE2_ERROR_MARK_MISSING_ARGUMENT 166
|
||||
#define PCRE2_ERROR_INVALID_HEXADECIMAL 167
|
||||
#define PCRE2_ERROR_BACKSLASH_C_SYNTAX 168
|
||||
#define PCRE2_ERROR_BACKSLASH_K_SYNTAX 169
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS 170
|
||||
#define PCRE2_ERROR_BACKSLASH_N_IN_CLASS 171
|
||||
#define PCRE2_ERROR_CALLOUT_STRING_TOO_LONG 172
|
||||
#define PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT 173
|
||||
#define PCRE2_ERROR_UTF_IS_DISABLED 174
|
||||
#define PCRE2_ERROR_UCP_IS_DISABLED 175
|
||||
#define PCRE2_ERROR_VERB_NAME_TOO_LONG 176
|
||||
#define PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG 177
|
||||
#define PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS 178
|
||||
#define PCRE2_ERROR_VERSION_CONDITION_SYNTAX 179
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS 180
|
||||
#define PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER 181
|
||||
#define PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER 182
|
||||
#define PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED 183
|
||||
#define PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP 184
|
||||
#define PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED 185
|
||||
#define PCRE2_ERROR_PATTERN_TOO_COMPLICATED 186
|
||||
#define PCRE2_ERROR_LOOKBEHIND_TOO_LONG 187
|
||||
#define PCRE2_ERROR_PATTERN_STRING_TOO_LONG 188
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE 189
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190
|
||||
#define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191
|
||||
#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192
|
||||
#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193
|
||||
#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194
|
||||
#define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195
|
||||
#define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196
|
||||
#define PCRE2_ERROR_TOO_MANY_CAPTURES 197
|
||||
#define PCRE2_ERROR_MISSING_OCTAL_DIGIT 198
|
||||
#define PCRE2_ERROR_BACKSLASH_K_IN_LOOKAROUND 199
|
||||
#define PCRE2_ERROR_MAX_VAR_LOOKBEHIND_EXCEEDED 200
|
||||
#define PCRE2_ERROR_PATTERN_COMPILED_SIZE_TOO_BIG 201
|
||||
#define PCRE2_ERROR_OVERSIZE_PYTHON_OCTAL 202
|
||||
#define PCRE2_ERROR_CALLOUT_CALLER_DISABLED 203
|
||||
#define PCRE2_ERROR_EXTRA_CASING_REQUIRES_UNICODE 204
|
||||
#define PCRE2_ERROR_TURKISH_CASING_REQUIRES_UTF 205
|
||||
#define PCRE2_ERROR_EXTRA_CASING_INCOMPATIBLE 206
|
||||
#define PCRE2_ERROR_ECLASS_NEST_TOO_DEEP 207
|
||||
#define PCRE2_ERROR_ECLASS_INVALID_OPERATOR 208
|
||||
#define PCRE2_ERROR_ECLASS_UNEXPECTED_OPERATOR 209
|
||||
#define PCRE2_ERROR_ECLASS_EXPECTED_OPERAND 210
|
||||
#define PCRE2_ERROR_ECLASS_MIXED_OPERATORS 211
|
||||
#define PCRE2_ERROR_ECLASS_HINT_SQUARE_BRACKET 212
|
||||
#define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_EXPR 213
|
||||
#define PCRE2_ERROR_PERL_ECLASS_EMPTY_EXPR 214
|
||||
#define PCRE2_ERROR_PERL_ECLASS_MISSING_CLOSE 215
|
||||
#define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_CHAR 216
|
||||
|
||||
/* "Expected" matching error codes: no match and partial match. */
|
||||
|
||||
#define PCRE2_ERROR_NOMATCH (-1)
|
||||
#define PCRE2_ERROR_PARTIAL (-2)
|
||||
|
||||
/* Error codes for UTF-8 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF8_ERR1 (-3)
|
||||
#define PCRE2_ERROR_UTF8_ERR2 (-4)
|
||||
#define PCRE2_ERROR_UTF8_ERR3 (-5)
|
||||
#define PCRE2_ERROR_UTF8_ERR4 (-6)
|
||||
#define PCRE2_ERROR_UTF8_ERR5 (-7)
|
||||
#define PCRE2_ERROR_UTF8_ERR6 (-8)
|
||||
#define PCRE2_ERROR_UTF8_ERR7 (-9)
|
||||
#define PCRE2_ERROR_UTF8_ERR8 (-10)
|
||||
#define PCRE2_ERROR_UTF8_ERR9 (-11)
|
||||
#define PCRE2_ERROR_UTF8_ERR10 (-12)
|
||||
#define PCRE2_ERROR_UTF8_ERR11 (-13)
|
||||
#define PCRE2_ERROR_UTF8_ERR12 (-14)
|
||||
#define PCRE2_ERROR_UTF8_ERR13 (-15)
|
||||
#define PCRE2_ERROR_UTF8_ERR14 (-16)
|
||||
#define PCRE2_ERROR_UTF8_ERR15 (-17)
|
||||
#define PCRE2_ERROR_UTF8_ERR16 (-18)
|
||||
#define PCRE2_ERROR_UTF8_ERR17 (-19)
|
||||
#define PCRE2_ERROR_UTF8_ERR18 (-20)
|
||||
#define PCRE2_ERROR_UTF8_ERR19 (-21)
|
||||
#define PCRE2_ERROR_UTF8_ERR20 (-22)
|
||||
#define PCRE2_ERROR_UTF8_ERR21 (-23)
|
||||
|
||||
/* Error codes for UTF-16 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF16_ERR1 (-24)
|
||||
#define PCRE2_ERROR_UTF16_ERR2 (-25)
|
||||
#define PCRE2_ERROR_UTF16_ERR3 (-26)
|
||||
|
||||
/* Error codes for UTF-32 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF32_ERR1 (-27)
|
||||
#define PCRE2_ERROR_UTF32_ERR2 (-28)
|
||||
|
||||
/* Miscellaneous error codes for pcre2[_dfa]_match(), substring extraction
|
||||
functions, context functions, and serializing functions. They are in numerical
|
||||
order. Originally they were in alphabetical order too, but now that PCRE2 is
|
||||
released, the numbers must not be changed. */
|
||||
|
||||
#define PCRE2_ERROR_BADDATA (-29)
|
||||
#define PCRE2_ERROR_MIXEDTABLES (-30) /* Name was changed */
|
||||
#define PCRE2_ERROR_BADMAGIC (-31)
|
||||
#define PCRE2_ERROR_BADMODE (-32)
|
||||
#define PCRE2_ERROR_BADOFFSET (-33)
|
||||
#define PCRE2_ERROR_BADOPTION (-34)
|
||||
#define PCRE2_ERROR_BADREPLACEMENT (-35)
|
||||
#define PCRE2_ERROR_BADUTFOFFSET (-36)
|
||||
#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
|
||||
#define PCRE2_ERROR_DFA_BADRESTART (-38)
|
||||
#define PCRE2_ERROR_DFA_RECURSE (-39)
|
||||
#define PCRE2_ERROR_DFA_UCOND (-40)
|
||||
#define PCRE2_ERROR_DFA_UFUNC (-41)
|
||||
#define PCRE2_ERROR_DFA_UITEM (-42)
|
||||
#define PCRE2_ERROR_DFA_WSSIZE (-43)
|
||||
#define PCRE2_ERROR_INTERNAL (-44)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-45)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-46)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-47)
|
||||
#define PCRE2_ERROR_NOMEMORY (-48)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-49)
|
||||
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-50)
|
||||
#define PCRE2_ERROR_NULL (-51)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-52)
|
||||
#define PCRE2_ERROR_DEPTHLIMIT (-53)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-53) /* Obsolete synonym */
|
||||
#define PCRE2_ERROR_UNAVAILABLE (-54)
|
||||
#define PCRE2_ERROR_UNSET (-55)
|
||||
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
|
||||
#define PCRE2_ERROR_BADREPESCAPE (-57)
|
||||
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
|
||||
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
|
||||
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
|
||||
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
|
||||
#define PCRE2_ERROR_HEAPLIMIT (-63)
|
||||
#define PCRE2_ERROR_CONVERT_SYNTAX (-64)
|
||||
#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65)
|
||||
#define PCRE2_ERROR_DFA_UINVALID_UTF (-66)
|
||||
#define PCRE2_ERROR_INVALIDOFFSET (-67)
|
||||
#define PCRE2_ERROR_JIT_UNSUPPORTED (-68)
|
||||
#define PCRE2_ERROR_REPLACECASE (-69)
|
||||
#define PCRE2_ERROR_TOOLARGEREPLACE (-70)
|
||||
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
#define PCRE2_INFO_ALLOPTIONS 0
|
||||
#define PCRE2_INFO_ARGOPTIONS 1
|
||||
#define PCRE2_INFO_BACKREFMAX 2
|
||||
#define PCRE2_INFO_BSR 3
|
||||
#define PCRE2_INFO_CAPTURECOUNT 4
|
||||
#define PCRE2_INFO_FIRSTCODEUNIT 5
|
||||
#define PCRE2_INFO_FIRSTCODETYPE 6
|
||||
#define PCRE2_INFO_FIRSTBITMAP 7
|
||||
#define PCRE2_INFO_HASCRORLF 8
|
||||
#define PCRE2_INFO_JCHANGED 9
|
||||
#define PCRE2_INFO_JITSIZE 10
|
||||
#define PCRE2_INFO_LASTCODEUNIT 11
|
||||
#define PCRE2_INFO_LASTCODETYPE 12
|
||||
#define PCRE2_INFO_MATCHEMPTY 13
|
||||
#define PCRE2_INFO_MATCHLIMIT 14
|
||||
#define PCRE2_INFO_MAXLOOKBEHIND 15
|
||||
#define PCRE2_INFO_MINLENGTH 16
|
||||
#define PCRE2_INFO_NAMECOUNT 17
|
||||
#define PCRE2_INFO_NAMEENTRYSIZE 18
|
||||
#define PCRE2_INFO_NAMETABLE 19
|
||||
#define PCRE2_INFO_NEWLINE 20
|
||||
#define PCRE2_INFO_DEPTHLIMIT 21
|
||||
#define PCRE2_INFO_RECURSIONLIMIT 21 /* Obsolete synonym */
|
||||
#define PCRE2_INFO_SIZE 22
|
||||
#define PCRE2_INFO_HASBACKSLASHC 23
|
||||
#define PCRE2_INFO_FRAMESIZE 24
|
||||
#define PCRE2_INFO_HEAPLIMIT 25
|
||||
#define PCRE2_INFO_EXTRAOPTIONS 26
|
||||
|
||||
/* Request types for pcre2_config(). */
|
||||
|
||||
#define PCRE2_CONFIG_BSR 0
|
||||
#define PCRE2_CONFIG_JIT 1
|
||||
#define PCRE2_CONFIG_JITTARGET 2
|
||||
#define PCRE2_CONFIG_LINKSIZE 3
|
||||
#define PCRE2_CONFIG_MATCHLIMIT 4
|
||||
#define PCRE2_CONFIG_NEWLINE 5
|
||||
#define PCRE2_CONFIG_PARENSLIMIT 6
|
||||
#define PCRE2_CONFIG_DEPTHLIMIT 7
|
||||
#define PCRE2_CONFIG_RECURSIONLIMIT 7 /* Obsolete synonym */
|
||||
#define PCRE2_CONFIG_STACKRECURSE 8 /* Obsolete */
|
||||
#define PCRE2_CONFIG_UNICODE 9
|
||||
#define PCRE2_CONFIG_UNICODE_VERSION 10
|
||||
#define PCRE2_CONFIG_VERSION 11
|
||||
#define PCRE2_CONFIG_HEAPLIMIT 12
|
||||
#define PCRE2_CONFIG_NEVER_BACKSLASH_C 13
|
||||
#define PCRE2_CONFIG_COMPILED_WIDTHS 14
|
||||
#define PCRE2_CONFIG_TABLES_LENGTH 15
|
||||
|
||||
/* Optimization directives for pcre2_set_optimize().
|
||||
For binary compatibility, only add to this list; do not renumber. */
|
||||
|
||||
#define PCRE2_OPTIMIZATION_NONE 0
|
||||
#define PCRE2_OPTIMIZATION_FULL 1
|
||||
|
||||
#define PCRE2_AUTO_POSSESS 64
|
||||
#define PCRE2_AUTO_POSSESS_OFF 65
|
||||
#define PCRE2_DOTSTAR_ANCHOR 66
|
||||
#define PCRE2_DOTSTAR_ANCHOR_OFF 67
|
||||
#define PCRE2_START_OPTIMIZE 68
|
||||
#define PCRE2_START_OPTIMIZE_OFF 69
|
||||
|
||||
/* Types used in pcre2_set_substitute_case_callout().
|
||||
|
||||
PCRE2_SUBSTITUTE_CASE_LOWER and PCRE2_SUBSTITUTE_CASE_UPPER are passed to the
|
||||
callout to indicate that the case of the entire callout input should be
|
||||
case-transformed. PCRE2_SUBSTITUTE_CASE_TITLE_FIRST is passed to indicate that
|
||||
only the first character or glyph should be transformed to Unicode titlecase,
|
||||
and the rest to lowercase. */
|
||||
|
||||
#define PCRE2_SUBSTITUTE_CASE_LOWER 1
|
||||
#define PCRE2_SUBSTITUTE_CASE_UPPER 2
|
||||
#define PCRE2_SUBSTITUTE_CASE_TITLE_FIRST 3
|
||||
|
||||
/* Types for code units in patterns and subject strings. */
|
||||
|
||||
typedef uint8_t PCRE2_UCHAR8;
|
||||
typedef uint16_t PCRE2_UCHAR16;
|
||||
typedef uint32_t PCRE2_UCHAR32;
|
||||
|
||||
typedef const PCRE2_UCHAR8 *PCRE2_SPTR8;
|
||||
typedef const PCRE2_UCHAR16 *PCRE2_SPTR16;
|
||||
typedef const PCRE2_UCHAR32 *PCRE2_SPTR32;
|
||||
|
||||
/* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE2,
|
||||
including pattern offsets for errors and subject offsets after a match. We
|
||||
define special values to indicate zero-terminated strings and unset offsets in
|
||||
the offset vector (ovector). */
|
||||
|
||||
#define PCRE2_SIZE size_t
|
||||
#define PCRE2_SIZE_MAX SIZE_MAX
|
||||
#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0)
|
||||
#define PCRE2_UNSET (~(PCRE2_SIZE)0)
|
||||
|
||||
/* Generic types for opaque structures and JIT callback functions. These
|
||||
declarations are defined in a macro that is expanded for each width later. */
|
||||
|
||||
#define PCRE2_TYPES_LIST \
|
||||
struct pcre2_real_general_context; \
|
||||
typedef struct pcre2_real_general_context pcre2_general_context; \
|
||||
\
|
||||
struct pcre2_real_compile_context; \
|
||||
typedef struct pcre2_real_compile_context pcre2_compile_context; \
|
||||
\
|
||||
struct pcre2_real_match_context; \
|
||||
typedef struct pcre2_real_match_context pcre2_match_context; \
|
||||
\
|
||||
struct pcre2_real_convert_context; \
|
||||
typedef struct pcre2_real_convert_context pcre2_convert_context; \
|
||||
\
|
||||
struct pcre2_real_code; \
|
||||
typedef struct pcre2_real_code pcre2_code; \
|
||||
\
|
||||
struct pcre2_real_match_data; \
|
||||
typedef struct pcre2_real_match_data pcre2_match_data; \
|
||||
\
|
||||
struct pcre2_real_jit_stack; \
|
||||
typedef struct pcre2_real_jit_stack pcre2_jit_stack; \
|
||||
\
|
||||
typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *);
|
||||
|
||||
|
||||
/* The structures for passing out data via callout functions. We use structures
|
||||
so that new fields can be added on the end in future versions, without changing
|
||||
the API of the function, thereby allowing old clients to work without
|
||||
modification. Define the generic versions in a macro; the width-specific
|
||||
versions are generated from this macro below. */
|
||||
|
||||
/* Flags for the callout_flags field. These are cleared after a callout. */
|
||||
|
||||
#define PCRE2_CALLOUT_STARTMATCH 0x00000001u /* Set for each bumpalong */
|
||||
#define PCRE2_CALLOUT_BACKTRACK 0x00000002u /* Set after a backtrack */
|
||||
|
||||
#define PCRE2_STRUCTURE_LIST \
|
||||
typedef struct pcre2_callout_block { \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||
uint32_t capture_top; /* Max current capture */ \
|
||||
uint32_t capture_last; /* Most recently closed capture */ \
|
||||
PCRE2_SIZE *offset_vector; /* The offset vector */ \
|
||||
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
|
||||
PCRE2_SPTR subject; /* The subject being matched */ \
|
||||
PCRE2_SIZE subject_length; /* The length of the subject */ \
|
||||
PCRE2_SIZE start_match; /* Offset to start of this match attempt */ \
|
||||
PCRE2_SIZE current_position; /* Where we currently are in the subject */ \
|
||||
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||
/* ------------------- Added for Version 1 -------------------------- */ \
|
||||
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
|
||||
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||
/* ------------------- Added for Version 2 -------------------------- */ \
|
||||
uint32_t callout_flags; /* See above for list */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_callout_block; \
|
||||
\
|
||||
typedef struct pcre2_callout_enumerate_block { \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
|
||||
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_callout_enumerate_block; \
|
||||
\
|
||||
typedef struct pcre2_substitute_callout_block { \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
PCRE2_SPTR input; /* Pointer to input subject string */ \
|
||||
PCRE2_SPTR output; /* Pointer to output buffer */ \
|
||||
PCRE2_SIZE output_offsets[2]; /* Changed portion of the output */ \
|
||||
PCRE2_SIZE *ovector; /* Pointer to current ovector */ \
|
||||
uint32_t oveccount; /* Count of pairs set in ovector */ \
|
||||
uint32_t subscount; /* Substitution number */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_substitute_callout_block;
|
||||
|
||||
|
||||
/* List the generic forms of all other functions in macros, which will be
|
||||
expanded for each width below. Start with functions that give general
|
||||
information. */
|
||||
|
||||
#define PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_config(uint32_t, void *);
|
||||
|
||||
|
||||
/* Functions for manipulating contexts. */
|
||||
|
||||
#define PCRE2_GENERAL_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_general_context_copy(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_general_context_create(void *(*)(size_t, void *), \
|
||||
void (*)(void *, void *), void *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_general_context_free(pcre2_general_context *);
|
||||
|
||||
#define PCRE2_COMPILE_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_compile_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_compile_context_copy(pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL pcre2_compile_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_compile_context_create(pcre2_general_context *);\
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_compile_context_free(pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_character_tables(pcre2_compile_context *, const uint8_t *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_max_pattern_compiled_length(pcre2_compile_context *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_max_varlookbehind(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_newline(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_compile_recursion_guard(pcre2_compile_context *, \
|
||||
int (*)(uint32_t, void *), void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_optimize(pcre2_compile_context *, uint32_t);
|
||||
|
||||
#define PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_context_copy(pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_context_create(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_context_free(pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_callout(pcre2_match_context *, \
|
||||
int (*)(pcre2_callout_block *, void *), void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_substitute_callout(pcre2_match_context *, \
|
||||
int (*)(pcre2_substitute_callout_block *, void *), void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_substitute_case_callout(pcre2_match_context *, \
|
||||
PCRE2_SIZE (*)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE, int, \
|
||||
void *), \
|
||||
void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_offset_limit(pcre2_match_context *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_recursion_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_recursion_memory_management(pcre2_match_context *, \
|
||||
void *(*)(size_t, void *), void (*)(void *, void *), void *);
|
||||
|
||||
#define PCRE2_CONVERT_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_convert_context_copy(pcre2_convert_context *); \
|
||||
PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \
|
||||
pcre2_convert_context_create(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_convert_context_free(pcre2_convert_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_glob_separator(pcre2_convert_context *, uint32_t);
|
||||
|
||||
|
||||
/* Functions concerned with compiling a pattern to PCRE internal code. */
|
||||
|
||||
#define PCRE2_COMPILE_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
|
||||
pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \
|
||||
pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_code_free(pcre2_code *); \
|
||||
PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
|
||||
pcre2_code_copy(const pcre2_code *); \
|
||||
PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
|
||||
pcre2_code_copy_with_tables(const pcre2_code *);
|
||||
|
||||
|
||||
/* Functions that give information about a compiled pattern. */
|
||||
|
||||
#define PCRE2_PATTERN_INFO_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_pattern_info(const pcre2_code *, uint32_t, void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_callout_enumerate(const pcre2_code *, \
|
||||
int (*)(pcre2_callout_enumerate_block *, void *), void *);
|
||||
|
||||
|
||||
/* Functions for running a match and inspecting the result. */
|
||||
|
||||
#define PCRE2_MATCH_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_match_data *PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_data_create(uint32_t, pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL pcre2_match_data *PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_data_create_from_pattern(const pcre2_code *, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *, int *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_data_free(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_mark(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_match_data_size(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_match_data_heapframes_size(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_ovector_count(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE *PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_ovector_pointer(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_startchar(pcre2_match_data *);
|
||||
|
||||
|
||||
/* Convenience functions for handling matched substrings. */
|
||||
|
||||
#define PCRE2_SUBSTRING_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_copy_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR *, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_copy_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR *, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_free(PCRE2_UCHAR *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_get_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR **, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_get_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR **, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_length_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_length_bynumber(pcre2_match_data *, uint32_t, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_nametable_scan(const pcre2_code *, PCRE2_SPTR, PCRE2_SPTR *, \
|
||||
PCRE2_SPTR *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_number_from_name(const pcre2_code *, PCRE2_SPTR); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_list_free(PCRE2_UCHAR **); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **);
|
||||
|
||||
|
||||
/* Functions for serializing / deserializing compiled patterns. */
|
||||
|
||||
#define PCRE2_SERIALIZE_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_encode(const pcre2_code **, int32_t, uint8_t **, \
|
||||
PCRE2_SIZE *, pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_decode(pcre2_code **, int32_t, const uint8_t *, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_get_number_of_codes(const uint8_t *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_free(uint8_t *);
|
||||
|
||||
|
||||
/* Convenience function for match + substitute. */
|
||||
|
||||
#define PCRE2_SUBSTITUTE_FUNCTION \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substitute(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *, PCRE2_SPTR, \
|
||||
PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *);
|
||||
|
||||
|
||||
/* Functions for converting pattern source strings. */
|
||||
|
||||
#define PCRE2_CONVERT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_pattern_convert(PCRE2_SPTR, PCRE2_SIZE, uint32_t, PCRE2_UCHAR **, \
|
||||
PCRE2_SIZE *, pcre2_convert_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_converted_pattern_free(PCRE2_UCHAR *);
|
||||
|
||||
|
||||
/* Functions for JIT processing */
|
||||
|
||||
#define PCRE2_JIT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_compile(pcre2_code *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_free_unused_memory(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL pcre2_jit_stack *PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_stack_create(size_t, size_t, pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_stack_free(pcre2_jit_stack *);
|
||||
|
||||
|
||||
/* Other miscellaneous functions. */
|
||||
|
||||
#define PCRE2_OTHER_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL const uint8_t *PCRE2_CALL_CONVENTION \
|
||||
pcre2_maketables(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_maketables_free(pcre2_general_context *, const uint8_t *);
|
||||
|
||||
/* Define macros that generate width-specific names from generic versions. The
|
||||
three-level macro scheme is necessary to get the macros expanded when we want
|
||||
them to be. First we get the width from PCRE2_LOCAL_WIDTH, which is used for
|
||||
generating three versions of everything below. After that, PCRE2_SUFFIX will be
|
||||
re-defined to use PCRE2_CODE_UNIT_WIDTH, for use when macros such as
|
||||
pcre2_compile are called by application code. */
|
||||
|
||||
#define PCRE2_JOIN(a,b) a ## b
|
||||
#define PCRE2_GLUE(a,b) PCRE2_JOIN(a,b)
|
||||
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a,PCRE2_LOCAL_WIDTH)
|
||||
|
||||
|
||||
/* Data types */
|
||||
|
||||
#define PCRE2_UCHAR PCRE2_SUFFIX(PCRE2_UCHAR)
|
||||
#define PCRE2_SPTR PCRE2_SUFFIX(PCRE2_SPTR)
|
||||
|
||||
#define pcre2_code PCRE2_SUFFIX(pcre2_code_)
|
||||
#define pcre2_jit_callback PCRE2_SUFFIX(pcre2_jit_callback_)
|
||||
#define pcre2_jit_stack PCRE2_SUFFIX(pcre2_jit_stack_)
|
||||
|
||||
#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_)
|
||||
#define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_)
|
||||
#define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_)
|
||||
#define pcre2_real_convert_context PCRE2_SUFFIX(pcre2_real_convert_context_)
|
||||
#define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_)
|
||||
#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_)
|
||||
#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_)
|
||||
|
||||
|
||||
/* Data blocks */
|
||||
|
||||
#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_)
|
||||
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
|
||||
#define pcre2_substitute_callout_block PCRE2_SUFFIX(pcre2_substitute_callout_block_)
|
||||
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
|
||||
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
|
||||
#define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_)
|
||||
#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_)
|
||||
#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_)
|
||||
|
||||
|
||||
/* Functions: the complete list in alphabetical order */
|
||||
|
||||
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
||||
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
|
||||
#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_)
|
||||
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
||||
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
||||
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
||||
#define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_)
|
||||
#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_)
|
||||
#define pcre2_config PCRE2_SUFFIX(pcre2_config_)
|
||||
#define pcre2_convert_context_copy PCRE2_SUFFIX(pcre2_convert_context_copy_)
|
||||
#define pcre2_convert_context_create PCRE2_SUFFIX(pcre2_convert_context_create_)
|
||||
#define pcre2_convert_context_free PCRE2_SUFFIX(pcre2_convert_context_free_)
|
||||
#define pcre2_converted_pattern_free PCRE2_SUFFIX(pcre2_converted_pattern_free_)
|
||||
#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_)
|
||||
#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_)
|
||||
#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_)
|
||||
#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_)
|
||||
#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_)
|
||||
#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_)
|
||||
#define pcre2_get_match_data_heapframes_size PCRE2_SUFFIX(pcre2_get_match_data_heapframes_size_)
|
||||
#define pcre2_get_match_data_size PCRE2_SUFFIX(pcre2_get_match_data_size_)
|
||||
#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
|
||||
#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_)
|
||||
#define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_)
|
||||
#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_)
|
||||
#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_)
|
||||
#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_)
|
||||
#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_)
|
||||
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
|
||||
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
|
||||
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
|
||||
#define pcre2_maketables_free PCRE2_SUFFIX(pcre2_maketables_free_)
|
||||
#define pcre2_match PCRE2_SUFFIX(pcre2_match_)
|
||||
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
|
||||
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)
|
||||
#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_)
|
||||
#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_)
|
||||
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
|
||||
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
|
||||
#define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_)
|
||||
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
|
||||
#define pcre2_serialize_decode PCRE2_SUFFIX(pcre2_serialize_decode_)
|
||||
#define pcre2_serialize_encode PCRE2_SUFFIX(pcre2_serialize_encode_)
|
||||
#define pcre2_serialize_free PCRE2_SUFFIX(pcre2_serialize_free_)
|
||||
#define pcre2_serialize_get_number_of_codes PCRE2_SUFFIX(pcre2_serialize_get_number_of_codes_)
|
||||
#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_)
|
||||
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
|
||||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||
#define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_)
|
||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
|
||||
#define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_)
|
||||
#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
|
||||
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
|
||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||
#define pcre2_set_max_varlookbehind PCRE2_SUFFIX(pcre2_set_max_varlookbehind_)
|
||||
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||
#define pcre2_set_max_pattern_compiled_length PCRE2_SUFFIX(pcre2_set_max_pattern_compiled_length_)
|
||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
|
||||
#define pcre2_set_optimize PCRE2_SUFFIX(pcre2_set_optimize_)
|
||||
#define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_)
|
||||
#define pcre2_set_substitute_case_callout PCRE2_SUFFIX(pcre2_set_substitute_case_callout_)
|
||||
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
|
||||
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
|
||||
#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)
|
||||
#define pcre2_substring_free PCRE2_SUFFIX(pcre2_substring_free_)
|
||||
#define pcre2_substring_get_byname PCRE2_SUFFIX(pcre2_substring_get_byname_)
|
||||
#define pcre2_substring_get_bynumber PCRE2_SUFFIX(pcre2_substring_get_bynumber_)
|
||||
#define pcre2_substring_length_byname PCRE2_SUFFIX(pcre2_substring_length_byname_)
|
||||
#define pcre2_substring_length_bynumber PCRE2_SUFFIX(pcre2_substring_length_bynumber_)
|
||||
#define pcre2_substring_list_get PCRE2_SUFFIX(pcre2_substring_list_get_)
|
||||
#define pcre2_substring_list_free PCRE2_SUFFIX(pcre2_substring_list_free_)
|
||||
#define pcre2_substring_nametable_scan PCRE2_SUFFIX(pcre2_substring_nametable_scan_)
|
||||
#define pcre2_substring_number_from_name PCRE2_SUFFIX(pcre2_substring_number_from_name_)
|
||||
|
||||
/* Keep this old function name for backwards compatibility */
|
||||
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
|
||||
|
||||
/* Keep this obsolete function for backwards compatibility: it is now a noop. */
|
||||
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
|
||||
|
||||
/* Now generate all three sets of width-specific structures and function
|
||||
prototypes. */
|
||||
|
||||
#define PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS \
|
||||
PCRE2_TYPES_LIST \
|
||||
PCRE2_STRUCTURE_LIST \
|
||||
PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||
PCRE2_GENERAL_CONTEXT_FUNCTIONS \
|
||||
PCRE2_COMPILE_CONTEXT_FUNCTIONS \
|
||||
PCRE2_CONVERT_CONTEXT_FUNCTIONS \
|
||||
PCRE2_CONVERT_FUNCTIONS \
|
||||
PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
||||
PCRE2_COMPILE_FUNCTIONS \
|
||||
PCRE2_PATTERN_INFO_FUNCTIONS \
|
||||
PCRE2_MATCH_FUNCTIONS \
|
||||
PCRE2_SUBSTRING_FUNCTIONS \
|
||||
PCRE2_SERIALIZE_FUNCTIONS \
|
||||
PCRE2_SUBSTITUTE_FUNCTION \
|
||||
PCRE2_JIT_FUNCTIONS \
|
||||
PCRE2_OTHER_FUNCTIONS
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 8
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 16
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 32
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
/* Undefine the list macros; they are no longer needed. */
|
||||
|
||||
#undef PCRE2_TYPES_LIST
|
||||
#undef PCRE2_STRUCTURE_LIST
|
||||
#undef PCRE2_GENERAL_INFO_FUNCTIONS
|
||||
#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_CONVERT_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_MATCH_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_COMPILE_FUNCTIONS
|
||||
#undef PCRE2_PATTERN_INFO_FUNCTIONS
|
||||
#undef PCRE2_MATCH_FUNCTIONS
|
||||
#undef PCRE2_SUBSTRING_FUNCTIONS
|
||||
#undef PCRE2_SERIALIZE_FUNCTIONS
|
||||
#undef PCRE2_SUBSTITUTE_FUNCTION
|
||||
#undef PCRE2_JIT_FUNCTIONS
|
||||
#undef PCRE2_OTHER_FUNCTIONS
|
||||
#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
|
||||
/* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine
|
||||
PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make
|
||||
PCRE2_SUFFIX a no-op. Otherwise, generate an error. */
|
||||
|
||||
#undef PCRE2_SUFFIX
|
||||
#ifndef PCRE2_CODE_UNIT_WIDTH
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h.
|
||||
#error Use 8, 16, or 32; or 0 for a multi-width application.
|
||||
#else /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8 || \
|
||||
PCRE2_CODE_UNIT_WIDTH == 16 || \
|
||||
PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH)
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 0
|
||||
#undef PCRE2_JOIN
|
||||
#undef PCRE2_GLUE
|
||||
#define PCRE2_SUFFIX(a) a
|
||||
#else
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32.
|
||||
#endif
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* PCRE2_H_IDEMPOTENT_GUARD */
|
||||
|
||||
/* End of pcre2.h */
|
||||
1412
3rd/pcre2/src/pcre2_auto_possess.c
Normal file
1412
3rd/pcre2/src/pcre2_auto_possess.c
Normal file
@@ -0,0 +1,1412 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains functions that scan a compiled pattern and change
|
||||
repeats into possessive repeats where possible. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
/* This macro represents the max size of list[] and that is used to keep
|
||||
track of UCD info in several places, it should be kept on sync with the
|
||||
value used by GenerateUcd.py */
|
||||
#define MAX_LIST 8
|
||||
|
||||
/*************************************************
|
||||
* Tables for auto-possessification *
|
||||
*************************************************/
|
||||
|
||||
/* This table is used to check whether auto-possessification is possible
|
||||
between adjacent character-type opcodes. The left-hand (repeated) opcode is
|
||||
used to select the row, and the right-hand opcode is use to select the column.
|
||||
A value of 1 means that auto-possessification is OK. For example, the second
|
||||
value in the first row means that \D+\d can be turned into \D++\d.
|
||||
|
||||
The Unicode property types (\P and \p) have to be present to fill out the table
|
||||
because of what their opcode values are, but the table values should always be
|
||||
zero because property types are handled separately in the code. The last four
|
||||
columns apply to items that cannot be repeated, so there is no need to have
|
||||
rows for them. Note that OP_DIGIT etc. are generated only when PCRE2_UCP is
|
||||
*not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
|
||||
|
||||
#define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1)
|
||||
#define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1)
|
||||
|
||||
static const uint8_t autoposstab[APTROWS][APTCOLS] = {
|
||||
/* \D \d \S \s \W \w . .+ \C \P \p \R \H \h \V \v \X \Z \z $ $M */
|
||||
{ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \D */
|
||||
{ 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \d */
|
||||
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \S */
|
||||
{ 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \s */
|
||||
{ 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \W */
|
||||
{ 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \w */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* . */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* .+ */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \C */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \P */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \p */
|
||||
{ 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \R */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \H */
|
||||
{ 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \h */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \V */
|
||||
{ 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 }, /* \v */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 } /* \X */
|
||||
};
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/* This table is used to check whether auto-possessification is possible
|
||||
between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The
|
||||
left-hand (repeated) opcode is used to select the row, and the right-hand
|
||||
opcode is used to select the column. The values are as follows:
|
||||
|
||||
0 Always return FALSE (never auto-possessify)
|
||||
1 Character groups are distinct (possessify if both are OP_PROP)
|
||||
2 Check character categories in the same group (general or particular)
|
||||
3 TRUE if the two opcodes are not the same (PROP vs NOTPROP)
|
||||
|
||||
4 Check left general category vs right particular category
|
||||
5 Check right general category vs left particular category
|
||||
|
||||
6 Left alphanum vs right general category
|
||||
7 Left space vs right general category
|
||||
8 Left word vs right general category
|
||||
|
||||
9 Right alphanum vs left general category
|
||||
10 Right space vs left general category
|
||||
11 Right word vs left general category
|
||||
|
||||
12 Left alphanum vs right particular category
|
||||
13 Left space vs right particular category
|
||||
14 Left word vs right particular category
|
||||
|
||||
15 Right alphanum vs left particular category
|
||||
16 Right space vs left particular category
|
||||
17 Right word vs left particular category
|
||||
*/
|
||||
|
||||
static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
|
||||
/* LAMP GC PC SC SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */
|
||||
{ 3, 0, 0, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_LAMP */
|
||||
{ 0, 2, 4, 0, 0, 9, 10, 10, 11, 0, 0, 0, 0 }, /* PT_GC */
|
||||
{ 0, 5, 2, 0, 0, 15, 16, 16, 17, 0, 0, 0, 0 }, /* PT_PC */
|
||||
{ 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SC */
|
||||
{ 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SCX */
|
||||
{ 3, 6, 12, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_ALNUM */
|
||||
{ 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_SPACE */
|
||||
{ 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_PXSPACE */
|
||||
{ 0, 8, 14, 0, 0, 0, 1, 1, 3, 0, 0, 0, 0 }, /* PT_WORD */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0 }, /* PT_UCNC */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_BIDICL */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* PT_BOOL */
|
||||
/* PT_ANY does not need a record. */
|
||||
};
|
||||
|
||||
/* This table is used to check whether auto-possessification is possible
|
||||
between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP) when one
|
||||
specifies a general category and the other specifies a particular category. The
|
||||
row is selected by the general category and the column by the particular
|
||||
category. The value is 1 if the particular category is not part of the general
|
||||
category. */
|
||||
|
||||
static const uint8_t catposstab[7][30] = {
|
||||
/* Cc Cf Cn Co Cs Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc Pd Pe Pf Pi Po Ps Sc Sk Sm So Zl Zp Zs */
|
||||
{ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* C */
|
||||
{ 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* L */
|
||||
{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* M */
|
||||
{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* N */
|
||||
{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 }, /* P */
|
||||
{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1 }, /* S */
|
||||
{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 } /* Z */
|
||||
};
|
||||
|
||||
/* This table is used when checking ALNUM, (PX)SPACE, SPACE, and WORD against
|
||||
a general or particular category. The properties in each row are those
|
||||
that apply to the character set in question. Duplication means that a little
|
||||
unnecessary work is done when checking, but this keeps things much simpler
|
||||
because they can all use the same code. For more details see the comment where
|
||||
this table is used.
|
||||
|
||||
Note: SPACE and PXSPACE used to be different because Perl excluded VT from
|
||||
"space", but from Perl 5.18 it's included, so both categories are treated the
|
||||
same here. */
|
||||
|
||||
static const uint8_t posspropstab[3][4] = {
|
||||
{ ucp_L, ucp_N, ucp_N, ucp_Nl }, /* ALNUM, 3rd and 4th values redundant */
|
||||
{ ucp_Z, ucp_Z, ucp_C, ucp_Cc }, /* SPACE and PXSPACE, 2nd value redundant */
|
||||
{ ucp_L, ucp_N, ucp_P, ucp_Po } /* WORD */
|
||||
};
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/*************************************************
|
||||
* Check a character and a property *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called by compare_opcodes() when a property item is
|
||||
adjacent to a fixed character.
|
||||
|
||||
Arguments:
|
||||
c the character
|
||||
ptype the property type
|
||||
pdata the data for the type
|
||||
negated TRUE if it's a negated property (\P or \p{^)
|
||||
|
||||
Returns: TRUE if auto-possessifying is OK
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
|
||||
BOOL negated)
|
||||
{
|
||||
BOOL ok, rc;
|
||||
const uint32_t *p;
|
||||
const ucd_record *prop = GET_UCD(c);
|
||||
|
||||
switch(ptype)
|
||||
{
|
||||
case PT_LAMP:
|
||||
return (prop->chartype == ucp_Lu ||
|
||||
prop->chartype == ucp_Ll ||
|
||||
prop->chartype == ucp_Lt) == negated;
|
||||
|
||||
case PT_GC:
|
||||
return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
|
||||
|
||||
case PT_PC:
|
||||
return (pdata == prop->chartype) == negated;
|
||||
|
||||
case PT_SC:
|
||||
return (pdata == prop->script) == negated;
|
||||
|
||||
case PT_SCX:
|
||||
ok = (pdata == prop->script
|
||||
|| MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0);
|
||||
return ok == negated;
|
||||
|
||||
/* These are specials */
|
||||
|
||||
case PT_ALNUM:
|
||||
return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
|
||||
|
||||
/* Perl space used to exclude VT, but from Perl 5.18 it is included, which
|
||||
means that Perl space and POSIX space are now identical. PCRE was changed
|
||||
at release 8.34. */
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
rc = negated;
|
||||
break;
|
||||
|
||||
default:
|
||||
rc = (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
|
||||
}
|
||||
return rc;
|
||||
|
||||
case PT_WORD:
|
||||
return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
|
||||
c == CHAR_UNDERSCORE) == negated;
|
||||
|
||||
case PT_CLIST:
|
||||
p = PRIV(ucd_caseless_sets) + prop->caseset;
|
||||
for (;;)
|
||||
{
|
||||
if (c < *p) return !negated;
|
||||
if (c == *p++) return negated;
|
||||
}
|
||||
PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
|
||||
break;
|
||||
|
||||
/* Haven't yet thought these through. */
|
||||
|
||||
case PT_BIDICL:
|
||||
return FALSE;
|
||||
|
||||
case PT_BOOL:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Base opcode of repeated opcodes *
|
||||
*************************************************/
|
||||
|
||||
/* Returns the base opcode for repeated single character type opcodes. If the
|
||||
opcode is not a repeated character type, it returns with the original value.
|
||||
|
||||
Arguments: c opcode
|
||||
Returns: base opcode for the type
|
||||
*/
|
||||
|
||||
static PCRE2_UCHAR
|
||||
get_repeat_base(PCRE2_UCHAR c)
|
||||
{
|
||||
return (c > OP_TYPEPOSUPTO)? c :
|
||||
(c >= OP_TYPESTAR)? OP_TYPESTAR :
|
||||
(c >= OP_NOTSTARI)? OP_NOTSTARI :
|
||||
(c >= OP_NOTSTAR)? OP_NOTSTAR :
|
||||
(c >= OP_STARI)? OP_STARI :
|
||||
OP_STAR;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Fill the character property list *
|
||||
*************************************************/
|
||||
|
||||
/* Checks whether the code points to an opcode that can take part in auto-
|
||||
possessification, and if so, fills a list with its properties.
|
||||
|
||||
Arguments:
|
||||
code points to start of expression
|
||||
utf TRUE if in UTF mode
|
||||
ucp TRUE if in UCP mode
|
||||
fcc points to the case-flipping table
|
||||
list points to output list
|
||||
list[0] will be filled with the opcode
|
||||
list[1] will be non-zero if this opcode
|
||||
can match an empty character string
|
||||
list[2..7] depends on the opcode
|
||||
|
||||
Returns: points to the start of the next opcode if *code is accepted
|
||||
NULL if *code is not accepted
|
||||
*/
|
||||
|
||||
static PCRE2_SPTR
|
||||
get_chr_property_list(PCRE2_SPTR code, BOOL utf, BOOL ucp, const uint8_t *fcc,
|
||||
uint32_t *list)
|
||||
{
|
||||
PCRE2_UCHAR c = *code;
|
||||
PCRE2_UCHAR base;
|
||||
PCRE2_SPTR end;
|
||||
PCRE2_SPTR class_end;
|
||||
uint32_t chr;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
uint32_t *clist_dest;
|
||||
const uint32_t *clist_src;
|
||||
#else
|
||||
(void)utf; /* Suppress "unused parameter" compiler warnings */
|
||||
(void)ucp;
|
||||
#endif
|
||||
|
||||
list[0] = c;
|
||||
list[1] = FALSE;
|
||||
code++;
|
||||
|
||||
if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
|
||||
{
|
||||
base = get_repeat_base(c);
|
||||
c -= (base - OP_STAR);
|
||||
|
||||
if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
|
||||
code += IMM2_SIZE;
|
||||
|
||||
list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
|
||||
c != OP_POSPLUS);
|
||||
|
||||
switch(base)
|
||||
{
|
||||
case OP_STAR:
|
||||
list[0] = OP_CHAR;
|
||||
break;
|
||||
|
||||
case OP_STARI:
|
||||
list[0] = OP_CHARI;
|
||||
break;
|
||||
|
||||
case OP_NOTSTAR:
|
||||
list[0] = OP_NOT;
|
||||
break;
|
||||
|
||||
case OP_NOTSTARI:
|
||||
list[0] = OP_NOTI;
|
||||
break;
|
||||
|
||||
case OP_TYPESTAR:
|
||||
list[0] = *code;
|
||||
code++;
|
||||
break;
|
||||
}
|
||||
c = list[0];
|
||||
}
|
||||
|
||||
switch(c)
|
||||
{
|
||||
case OP_NOT_DIGIT:
|
||||
case OP_DIGIT:
|
||||
case OP_NOT_WHITESPACE:
|
||||
case OP_WHITESPACE:
|
||||
case OP_NOT_WORDCHAR:
|
||||
case OP_WORDCHAR:
|
||||
case OP_ANY:
|
||||
case OP_ALLANY:
|
||||
case OP_ANYNL:
|
||||
case OP_NOT_HSPACE:
|
||||
case OP_HSPACE:
|
||||
case OP_NOT_VSPACE:
|
||||
case OP_VSPACE:
|
||||
case OP_EXTUNI:
|
||||
case OP_EODN:
|
||||
case OP_EOD:
|
||||
case OP_DOLL:
|
||||
case OP_DOLLM:
|
||||
return code;
|
||||
|
||||
case OP_CHAR:
|
||||
case OP_NOT:
|
||||
GETCHARINCTEST(chr, code);
|
||||
list[2] = chr;
|
||||
list[3] = NOTACHAR;
|
||||
return code;
|
||||
|
||||
case OP_CHARI:
|
||||
case OP_NOTI:
|
||||
list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
|
||||
GETCHARINCTEST(chr, code);
|
||||
list[2] = chr;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (chr < 128 || (chr < 256 && !utf && !ucp))
|
||||
list[3] = fcc[chr];
|
||||
else
|
||||
list[3] = UCD_OTHERCASE(chr);
|
||||
#elif defined SUPPORT_WIDE_CHARS
|
||||
list[3] = (chr < 256) ? fcc[chr] : chr;
|
||||
#else
|
||||
list[3] = fcc[chr];
|
||||
#endif
|
||||
|
||||
/* The othercase might be the same value. */
|
||||
|
||||
if (chr == list[3])
|
||||
list[3] = NOTACHAR;
|
||||
else
|
||||
list[4] = NOTACHAR;
|
||||
return code;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case OP_PROP:
|
||||
case OP_NOTPROP:
|
||||
if (code[0] != PT_CLIST)
|
||||
{
|
||||
list[2] = code[0];
|
||||
list[3] = code[1];
|
||||
return code + 2;
|
||||
}
|
||||
|
||||
/* Convert only if we have enough space. */
|
||||
|
||||
clist_src = PRIV(ucd_caseless_sets) + code[1];
|
||||
clist_dest = list + 2;
|
||||
code += 2;
|
||||
|
||||
do {
|
||||
if (clist_dest >= list + MAX_LIST)
|
||||
{
|
||||
/* Early return if there is not enough space. GenerateUcd.py
|
||||
generated a list with more than 5 characters and something
|
||||
must be done about that going forward. */
|
||||
PCRE2_DEBUG_UNREACHABLE(); /* Remove if it ever triggers */
|
||||
list[2] = code[0];
|
||||
list[3] = code[1];
|
||||
return code;
|
||||
}
|
||||
*clist_dest++ = *clist_src;
|
||||
}
|
||||
while(*clist_src++ != NOTACHAR);
|
||||
|
||||
/* All characters are stored. The terminating NOTACHAR is copied from the
|
||||
clist itself. */
|
||||
|
||||
list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
|
||||
return code;
|
||||
#endif
|
||||
|
||||
case OP_NCLASS:
|
||||
case OP_CLASS:
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_XCLASS:
|
||||
case OP_ECLASS:
|
||||
if (c == OP_XCLASS || c == OP_ECLASS)
|
||||
end = code + GET(code, 0) - 1;
|
||||
else
|
||||
#endif
|
||||
end = code + 32 / sizeof(PCRE2_UCHAR);
|
||||
class_end = end;
|
||||
|
||||
switch(*end)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
case OP_CRPOSSTAR:
|
||||
case OP_CRPOSQUERY:
|
||||
list[1] = TRUE;
|
||||
end++;
|
||||
break;
|
||||
|
||||
case OP_CRPLUS:
|
||||
case OP_CRMINPLUS:
|
||||
case OP_CRPOSPLUS:
|
||||
end++;
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
case OP_CRPOSRANGE:
|
||||
list[1] = (GET2(end, 1) == 0);
|
||||
end += 1 + 2 * IMM2_SIZE;
|
||||
break;
|
||||
}
|
||||
list[2] = (uint32_t)(end - code);
|
||||
list[3] = (uint32_t)(end - class_end);
|
||||
return end;
|
||||
}
|
||||
|
||||
return NULL; /* Opcode not accepted */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Scan further character sets for match *
|
||||
*************************************************/
|
||||
|
||||
/* Checks whether the base and the current opcode have a common character, in
|
||||
which case the base cannot be possessified.
|
||||
|
||||
Arguments:
|
||||
code points to the byte code
|
||||
utf TRUE in UTF mode
|
||||
ucp TRUE in UCP mode
|
||||
cb compile data block
|
||||
base_list the data list of the base opcode
|
||||
base_end the end of the base opcode
|
||||
rec_limit points to recursion depth counter
|
||||
|
||||
Returns: TRUE if the auto-possessification is possible
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
compare_opcodes(PCRE2_SPTR code, BOOL utf, BOOL ucp, const compile_block *cb,
|
||||
const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit)
|
||||
{
|
||||
PCRE2_UCHAR c;
|
||||
uint32_t list[MAX_LIST];
|
||||
const uint32_t *chr_ptr;
|
||||
const uint32_t *ochr_ptr;
|
||||
const uint32_t *list_ptr;
|
||||
PCRE2_SPTR next_code;
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
PCRE2_SPTR xclass_flags;
|
||||
#endif
|
||||
const uint8_t *class_bitset;
|
||||
const uint8_t *set1, *set2, *set_end;
|
||||
uint32_t chr;
|
||||
BOOL accepted, invert_bits;
|
||||
BOOL entered_a_group = FALSE;
|
||||
|
||||
if (--(*rec_limit) <= 0) return FALSE; /* Recursion has gone too deep */
|
||||
|
||||
/* Note: the base_list[1] contains whether the current opcode has a greedy
|
||||
(represented by a non-zero value) quantifier. This is a different from
|
||||
other character type lists, which store here that the character iterator
|
||||
matches to an empty string (also represented by a non-zero value). */
|
||||
|
||||
for(;;)
|
||||
{
|
||||
PCRE2_SPTR bracode;
|
||||
|
||||
/* All operations move the code pointer forward.
|
||||
Therefore infinite recursions are not possible. */
|
||||
|
||||
c = *code;
|
||||
|
||||
/* Skip over callouts */
|
||||
|
||||
if (c == OP_CALLOUT)
|
||||
{
|
||||
code += PRIV(OP_lengths)[c];
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == OP_CALLOUT_STR)
|
||||
{
|
||||
code += GET(code, 1 + 2*LINK_SIZE);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* At the end of a branch, skip to the end of the group and process it. */
|
||||
|
||||
if (c == OP_ALT)
|
||||
{
|
||||
do code += GET(code, 1); while (*code == OP_ALT);
|
||||
c = *code;
|
||||
}
|
||||
|
||||
/* Inspect the next opcode. */
|
||||
|
||||
switch(c)
|
||||
{
|
||||
/* We can always possessify a greedy iterator at the end of the pattern,
|
||||
which is reached after skipping over the final OP_KET. A non-greedy
|
||||
iterator must never be possessified. */
|
||||
|
||||
case OP_END:
|
||||
return base_list[1] != 0;
|
||||
|
||||
/* When an iterator is at the end of certain kinds of group we can inspect
|
||||
what follows the group by skipping over the closing ket. Note that this
|
||||
does not apply to OP_KETRMAX or OP_KETRMIN because what follows any given
|
||||
iteration is variable (could be another iteration or could be the next
|
||||
item). As these two opcodes are not listed in the next switch, they will
|
||||
end up as the next code to inspect, and return FALSE by virtue of being
|
||||
unsupported. */
|
||||
|
||||
case OP_KET:
|
||||
case OP_KETRPOS:
|
||||
/* The non-greedy case cannot be converted to a possessive form. */
|
||||
|
||||
if (base_list[1] == 0) return FALSE;
|
||||
|
||||
/* If the bracket is capturing it might be referenced by an OP_RECURSE
|
||||
so its last iterator can never be possessified if the pattern contains
|
||||
recursions. (This could be improved by keeping a list of group numbers that
|
||||
are called by recursion.) */
|
||||
|
||||
bracode = code - GET(code, 1);
|
||||
switch(*bracode)
|
||||
{
|
||||
case OP_CBRA:
|
||||
case OP_SCBRA:
|
||||
case OP_CBRAPOS:
|
||||
case OP_SCBRAPOS:
|
||||
if (cb->had_recurse) return FALSE;
|
||||
break;
|
||||
|
||||
/* A script run might have to backtrack if the iterated item can match
|
||||
characters from more than one script. So give up unless repeating an
|
||||
explicit character. */
|
||||
|
||||
case OP_SCRIPT_RUN:
|
||||
if (base_list[0] != OP_CHAR && base_list[0] != OP_CHARI)
|
||||
return FALSE;
|
||||
break;
|
||||
|
||||
/* Atomic sub-patterns and forward assertions can always auto-possessify
|
||||
their last iterator. However, if the group was entered as a result of
|
||||
checking a previous iterator, this is not possible. */
|
||||
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ONCE:
|
||||
return !entered_a_group;
|
||||
|
||||
/* Fixed-length lookbehinds can be treated the same way, but variable
|
||||
length lookbehinds must not auto-possessify their last iterator. Note
|
||||
that in order to identify a variable length lookbehind we must check
|
||||
through all branches, because some may be of fixed length. */
|
||||
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
do
|
||||
{
|
||||
if (bracode[1+LINK_SIZE] == OP_VREVERSE) return FALSE; /* Variable */
|
||||
bracode += GET(bracode, 1);
|
||||
}
|
||||
while (*bracode == OP_ALT);
|
||||
return !entered_a_group; /* Not variable length */
|
||||
|
||||
/* Non-atomic assertions - don't possessify last iterator. This needs
|
||||
more thought. */
|
||||
|
||||
case OP_ASSERT_NA:
|
||||
case OP_ASSERTBACK_NA:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Skip over the bracket and inspect what comes next. */
|
||||
|
||||
code += PRIV(OP_lengths)[c];
|
||||
continue;
|
||||
|
||||
/* Handle cases where the next item is a group. */
|
||||
|
||||
case OP_ONCE:
|
||||
case OP_BRA:
|
||||
case OP_CBRA:
|
||||
next_code = code + GET(code, 1);
|
||||
code += PRIV(OP_lengths)[c];
|
||||
|
||||
/* Check each branch. We have to recurse a level for all but the last
|
||||
branch. */
|
||||
|
||||
while (*next_code == OP_ALT)
|
||||
{
|
||||
if (!compare_opcodes(code, utf, ucp, cb, base_list, base_end, rec_limit))
|
||||
return FALSE;
|
||||
code = next_code + 1 + LINK_SIZE;
|
||||
next_code += GET(next_code, 1);
|
||||
}
|
||||
|
||||
entered_a_group = TRUE;
|
||||
continue;
|
||||
|
||||
case OP_BRAZERO:
|
||||
case OP_BRAMINZERO:
|
||||
|
||||
next_code = code + 1;
|
||||
if (*next_code != OP_BRA && *next_code != OP_CBRA &&
|
||||
*next_code != OP_ONCE) return FALSE;
|
||||
|
||||
do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
|
||||
|
||||
/* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
|
||||
|
||||
next_code += 1 + LINK_SIZE;
|
||||
if (!compare_opcodes(next_code, utf, ucp, cb, base_list, base_end,
|
||||
rec_limit))
|
||||
return FALSE;
|
||||
|
||||
code += PRIV(OP_lengths)[c];
|
||||
continue;
|
||||
|
||||
/* The next opcode does not need special handling; fall through and use it
|
||||
to see if the base can be possessified. */
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* We now have the next appropriate opcode to compare with the base. Check
|
||||
for a supported opcode, and load its properties. */
|
||||
|
||||
code = get_chr_property_list(code, utf, ucp, cb->fcc, list);
|
||||
if (code == NULL) return FALSE; /* Unsupported */
|
||||
|
||||
/* If either opcode is a small character list, set pointers for comparing
|
||||
characters from that list with another list, or with a property. */
|
||||
|
||||
if (base_list[0] == OP_CHAR)
|
||||
{
|
||||
chr_ptr = base_list + 2;
|
||||
list_ptr = list;
|
||||
}
|
||||
else if (list[0] == OP_CHAR)
|
||||
{
|
||||
chr_ptr = list + 2;
|
||||
list_ptr = base_list;
|
||||
}
|
||||
|
||||
/* Character bitsets can also be compared to certain opcodes. */
|
||||
|
||||
else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
/* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */
|
||||
|| (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
|
||||
#endif
|
||||
)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
|
||||
#else
|
||||
if (base_list[0] == OP_CLASS)
|
||||
#endif
|
||||
{
|
||||
set1 = (const uint8_t *)(base_end - base_list[2]);
|
||||
list_ptr = list;
|
||||
}
|
||||
else
|
||||
{
|
||||
set1 = (const uint8_t *)(code - list[2]);
|
||||
list_ptr = base_list;
|
||||
}
|
||||
|
||||
invert_bits = FALSE;
|
||||
switch(list_ptr[0])
|
||||
{
|
||||
case OP_CLASS:
|
||||
case OP_NCLASS:
|
||||
set2 = (const uint8_t *)
|
||||
((list_ptr == list ? code : base_end) - list_ptr[2]);
|
||||
break;
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_XCLASS:
|
||||
xclass_flags = (list_ptr == list ? code : base_end) -
|
||||
list_ptr[2] + LINK_SIZE;
|
||||
if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE;
|
||||
if ((*xclass_flags & XCL_MAP) == 0)
|
||||
{
|
||||
/* No bits are set for characters < 256. */
|
||||
if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0;
|
||||
/* Might be an empty repeat. */
|
||||
continue;
|
||||
}
|
||||
set2 = (const uint8_t *)(xclass_flags + 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
invert_bits = TRUE;
|
||||
/* Fall through */
|
||||
case OP_DIGIT:
|
||||
set2 = (const uint8_t *)(cb->cbits + cbit_digit);
|
||||
break;
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
invert_bits = TRUE;
|
||||
/* Fall through */
|
||||
case OP_WHITESPACE:
|
||||
set2 = (const uint8_t *)(cb->cbits + cbit_space);
|
||||
break;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
invert_bits = TRUE;
|
||||
/* Fall through */
|
||||
case OP_WORDCHAR:
|
||||
set2 = (const uint8_t *)(cb->cbits + cbit_word);
|
||||
break;
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Because the bit sets are unaligned bytes, we need to perform byte
|
||||
comparison here. */
|
||||
|
||||
set_end = set1 + 32;
|
||||
if (invert_bits)
|
||||
{
|
||||
do
|
||||
{
|
||||
if ((*set1++ & ~(*set2++)) != 0) return FALSE;
|
||||
}
|
||||
while (set1 < set_end);
|
||||
}
|
||||
else
|
||||
{
|
||||
do
|
||||
{
|
||||
if ((*set1++ & *set2++) != 0) return FALSE;
|
||||
}
|
||||
while (set1 < set_end);
|
||||
}
|
||||
|
||||
if (list[1] == 0) return TRUE;
|
||||
/* Might be an empty repeat. */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Some property combinations also acceptable. Unicode property opcodes are
|
||||
processed specially; the rest can be handled with a lookup table. */
|
||||
|
||||
else
|
||||
{
|
||||
uint32_t leftop, rightop;
|
||||
|
||||
leftop = base_list[0];
|
||||
rightop = list[0];
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
accepted = FALSE; /* Always set in non-unicode case. */
|
||||
if (leftop == OP_PROP || leftop == OP_NOTPROP)
|
||||
{
|
||||
if (rightop == OP_EOD)
|
||||
accepted = TRUE;
|
||||
else if (rightop == OP_PROP || rightop == OP_NOTPROP)
|
||||
{
|
||||
int n;
|
||||
const uint8_t *p;
|
||||
BOOL same = leftop == rightop;
|
||||
BOOL lisprop = leftop == OP_PROP;
|
||||
BOOL risprop = rightop == OP_PROP;
|
||||
BOOL bothprop = lisprop && risprop;
|
||||
|
||||
/* There's a table that specifies how each combination is to be
|
||||
processed:
|
||||
0 Always return FALSE (never auto-possessify)
|
||||
1 Character groups are distinct (possessify if both are OP_PROP)
|
||||
2 Check character categories in the same group (general or particular)
|
||||
3 Return TRUE if the two opcodes are not the same
|
||||
... see comments below
|
||||
*/
|
||||
|
||||
n = propposstab[base_list[2]][list[2]];
|
||||
switch(n)
|
||||
{
|
||||
case 0: break;
|
||||
case 1: accepted = bothprop; break;
|
||||
case 2: accepted = (base_list[3] == list[3]) != same; break;
|
||||
case 3: accepted = !same; break;
|
||||
|
||||
case 4: /* Left general category, right particular category */
|
||||
accepted = risprop && catposstab[base_list[3]][list[3]] == same;
|
||||
break;
|
||||
|
||||
case 5: /* Right general category, left particular category */
|
||||
accepted = lisprop && catposstab[list[3]][base_list[3]] == same;
|
||||
break;
|
||||
|
||||
/* This code is logically tricky. Think hard before fiddling with it.
|
||||
The posspropstab table has four entries per row. Each row relates to
|
||||
one of PCRE's special properties such as ALNUM or SPACE or WORD.
|
||||
Only WORD actually needs all four entries, but using repeats for the
|
||||
others means they can all use the same code below.
|
||||
|
||||
The first two entries in each row are Unicode general categories, and
|
||||
apply always, because all the characters they include are part of the
|
||||
PCRE character set. The third and fourth entries are a general and a
|
||||
particular category, respectively, that include one or more relevant
|
||||
characters. One or the other is used, depending on whether the check
|
||||
is for a general or a particular category. However, in both cases the
|
||||
category contains more characters than the specials that are defined
|
||||
for the property being tested against. Therefore, it cannot be used
|
||||
in a NOTPROP case.
|
||||
|
||||
Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po.
|
||||
Underscore is covered by ucp_P or ucp_Po. */
|
||||
|
||||
case 6: /* Left alphanum vs right general category */
|
||||
case 7: /* Left space vs right general category */
|
||||
case 8: /* Left word vs right general category */
|
||||
p = posspropstab[n-6];
|
||||
accepted = risprop && lisprop ==
|
||||
(list[3] != p[0] &&
|
||||
list[3] != p[1] &&
|
||||
(list[3] != p[2] || !lisprop));
|
||||
break;
|
||||
|
||||
case 9: /* Right alphanum vs left general category */
|
||||
case 10: /* Right space vs left general category */
|
||||
case 11: /* Right word vs left general category */
|
||||
p = posspropstab[n-9];
|
||||
accepted = lisprop && risprop ==
|
||||
(base_list[3] != p[0] &&
|
||||
base_list[3] != p[1] &&
|
||||
(base_list[3] != p[2] || !risprop));
|
||||
break;
|
||||
|
||||
case 12: /* Left alphanum vs right particular category */
|
||||
case 13: /* Left space vs right particular category */
|
||||
case 14: /* Left word vs right particular category */
|
||||
p = posspropstab[n-12];
|
||||
accepted = risprop && lisprop ==
|
||||
(catposstab[p[0]][list[3]] &&
|
||||
catposstab[p[1]][list[3]] &&
|
||||
(list[3] != p[3] || !lisprop));
|
||||
break;
|
||||
|
||||
case 15: /* Right alphanum vs left particular category */
|
||||
case 16: /* Right space vs left particular category */
|
||||
case 17: /* Right word vs left particular category */
|
||||
p = posspropstab[n-15];
|
||||
accepted = lisprop && risprop ==
|
||||
(catposstab[p[0]][base_list[3]] &&
|
||||
catposstab[p[1]][base_list[3]] &&
|
||||
(base_list[3] != p[3] || !risprop));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
|
||||
rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
|
||||
autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
|
||||
|
||||
if (!accepted) return FALSE;
|
||||
|
||||
if (list[1] == 0) return TRUE;
|
||||
/* Might be an empty repeat. */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Control reaches here only if one of the items is a small character list.
|
||||
All characters are checked against the other side. */
|
||||
|
||||
do
|
||||
{
|
||||
chr = *chr_ptr;
|
||||
|
||||
switch(list_ptr[0])
|
||||
{
|
||||
case OP_CHAR:
|
||||
ochr_ptr = list_ptr + 2;
|
||||
do
|
||||
{
|
||||
if (chr == *ochr_ptr) return FALSE;
|
||||
ochr_ptr++;
|
||||
}
|
||||
while(*ochr_ptr != NOTACHAR);
|
||||
break;
|
||||
|
||||
case OP_NOT:
|
||||
ochr_ptr = list_ptr + 2;
|
||||
do
|
||||
{
|
||||
if (chr == *ochr_ptr)
|
||||
break;
|
||||
ochr_ptr++;
|
||||
}
|
||||
while(*ochr_ptr != NOTACHAR);
|
||||
if (*ochr_ptr == NOTACHAR) return FALSE; /* Not found */
|
||||
break;
|
||||
|
||||
/* Note that OP_DIGIT etc. are generated only when PCRE2_UCP is *not*
|
||||
set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
|
||||
|
||||
case OP_DIGIT:
|
||||
if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE;
|
||||
break;
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE;
|
||||
break;
|
||||
|
||||
case OP_WHITESPACE:
|
||||
if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE;
|
||||
break;
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE;
|
||||
break;
|
||||
|
||||
case OP_WORDCHAR:
|
||||
if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE;
|
||||
break;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE;
|
||||
break;
|
||||
|
||||
case OP_HSPACE:
|
||||
switch(chr)
|
||||
{
|
||||
HSPACE_CASES: return FALSE;
|
||||
default: break;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_NOT_HSPACE:
|
||||
switch(chr)
|
||||
{
|
||||
HSPACE_CASES: break;
|
||||
default: return FALSE;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_ANYNL:
|
||||
case OP_VSPACE:
|
||||
switch(chr)
|
||||
{
|
||||
VSPACE_CASES: return FALSE;
|
||||
default: break;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_NOT_VSPACE:
|
||||
switch(chr)
|
||||
{
|
||||
VSPACE_CASES: break;
|
||||
default: return FALSE;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_DOLL:
|
||||
case OP_EODN:
|
||||
switch (chr)
|
||||
{
|
||||
case CHAR_CR:
|
||||
case CHAR_LF:
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
case CHAR_NEL:
|
||||
#ifndef EBCDIC
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
#endif /* Not EBCDIC */
|
||||
return FALSE;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_EOD: /* Can always possessify before \z */
|
||||
break;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case OP_PROP:
|
||||
case OP_NOTPROP:
|
||||
if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
|
||||
list_ptr[0] == OP_NOTPROP))
|
||||
return FALSE;
|
||||
break;
|
||||
#endif
|
||||
|
||||
case OP_NCLASS:
|
||||
if (chr > 255) return FALSE;
|
||||
/* Fall through */
|
||||
|
||||
case OP_CLASS:
|
||||
if (chr > 255) break;
|
||||
class_bitset = (const uint8_t *)
|
||||
((list_ptr == list ? code : base_end) - list_ptr[2]);
|
||||
if ((class_bitset[chr >> 3] & (1u << (chr & 7))) != 0) return FALSE;
|
||||
break;
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_XCLASS:
|
||||
if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
|
||||
list_ptr[2] + LINK_SIZE, (const uint8_t*)cb->start_code, utf))
|
||||
return FALSE;
|
||||
break;
|
||||
|
||||
case OP_ECLASS:
|
||||
if (PRIV(eclass)(chr,
|
||||
(list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE,
|
||||
(list_ptr == list ? code : base_end) - list_ptr[3],
|
||||
(const uint8_t*)cb->start_code, utf))
|
||||
return FALSE;
|
||||
break;
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
chr_ptr++;
|
||||
}
|
||||
while(*chr_ptr != NOTACHAR);
|
||||
|
||||
/* At least one character must be matched from this opcode. */
|
||||
|
||||
if (list[1] == 0) return TRUE;
|
||||
}
|
||||
|
||||
PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
|
||||
return FALSE; /* Avoid compiler warnings */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Scan compiled regex for auto-possession *
|
||||
*************************************************/
|
||||
|
||||
/* Replaces single character iterations with their possessive alternatives
|
||||
if appropriate. This function modifies the compiled opcode! Hitting a
|
||||
non-existent opcode may indicate a bug in PCRE2, but it can also be caused if a
|
||||
bad UTF string was compiled with PCRE2_NO_UTF_CHECK. The rec_limit catches
|
||||
overly complicated or large patterns. In these cases, the check just stops,
|
||||
leaving the remainder of the pattern unpossessified.
|
||||
|
||||
Arguments:
|
||||
code points to start of the byte code
|
||||
cb compile data block
|
||||
|
||||
Returns: 0 for success
|
||||
-1 if a non-existant opcode is encountered
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(auto_possessify)(PCRE2_UCHAR *code, const compile_block *cb)
|
||||
{
|
||||
PCRE2_UCHAR c;
|
||||
PCRE2_SPTR end;
|
||||
PCRE2_UCHAR *repeat_opcode;
|
||||
uint32_t list[MAX_LIST];
|
||||
int rec_limit = 1000; /* Was 10,000 but clang+ASAN uses a lot of stack. */
|
||||
BOOL utf = (cb->external_options & PCRE2_UTF) != 0;
|
||||
BOOL ucp = (cb->external_options & PCRE2_UCP) != 0;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
c = *code;
|
||||
|
||||
if (c >= OP_TABLE_LENGTH)
|
||||
{
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return -1; /* Something gone wrong */
|
||||
}
|
||||
|
||||
if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
|
||||
{
|
||||
c -= get_repeat_base(c) - OP_STAR;
|
||||
end = (c <= OP_MINUPTO) ?
|
||||
get_chr_property_list(code, utf, ucp, cb->fcc, list) : NULL;
|
||||
list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
|
||||
|
||||
if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end,
|
||||
&rec_limit))
|
||||
{
|
||||
switch(c)
|
||||
{
|
||||
case OP_STAR:
|
||||
*code += OP_POSSTAR - OP_STAR;
|
||||
break;
|
||||
|
||||
case OP_MINSTAR:
|
||||
*code += OP_POSSTAR - OP_MINSTAR;
|
||||
break;
|
||||
|
||||
case OP_PLUS:
|
||||
*code += OP_POSPLUS - OP_PLUS;
|
||||
break;
|
||||
|
||||
case OP_MINPLUS:
|
||||
*code += OP_POSPLUS - OP_MINPLUS;
|
||||
break;
|
||||
|
||||
case OP_QUERY:
|
||||
*code += OP_POSQUERY - OP_QUERY;
|
||||
break;
|
||||
|
||||
case OP_MINQUERY:
|
||||
*code += OP_POSQUERY - OP_MINQUERY;
|
||||
break;
|
||||
|
||||
case OP_UPTO:
|
||||
*code += OP_POSUPTO - OP_UPTO;
|
||||
break;
|
||||
|
||||
case OP_MINUPTO:
|
||||
*code += OP_POSUPTO - OP_MINUPTO;
|
||||
break;
|
||||
}
|
||||
}
|
||||
c = *code;
|
||||
}
|
||||
else if (c == OP_CLASS || c == OP_NCLASS
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
|| c == OP_XCLASS || c == OP_ECLASS
|
||||
#endif
|
||||
)
|
||||
{
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
if (c == OP_XCLASS || c == OP_ECLASS)
|
||||
repeat_opcode = code + GET(code, 1);
|
||||
else
|
||||
#endif
|
||||
repeat_opcode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
|
||||
|
||||
c = *repeat_opcode;
|
||||
if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
|
||||
{
|
||||
/* The return from get_chr_property_list() will never be NULL when
|
||||
*code (aka c) is one of the four class opcodes. However, gcc with
|
||||
-fanalyzer notes that a NULL return is possible, and grumbles. Hence we
|
||||
put in a check. */
|
||||
|
||||
end = get_chr_property_list(code, utf, ucp, cb->fcc, list);
|
||||
list[1] = (c & 1) == 0;
|
||||
|
||||
if (end != NULL &&
|
||||
compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit))
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
*repeat_opcode = OP_CRPOSSTAR;
|
||||
break;
|
||||
|
||||
case OP_CRPLUS:
|
||||
case OP_CRMINPLUS:
|
||||
*repeat_opcode = OP_CRPOSPLUS;
|
||||
break;
|
||||
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
*repeat_opcode = OP_CRPOSQUERY;
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
*repeat_opcode = OP_CRPOSRANGE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
c = *code;
|
||||
}
|
||||
|
||||
switch(c)
|
||||
{
|
||||
case OP_END:
|
||||
return 0;
|
||||
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEPOSQUERY:
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
|
||||
break;
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEPOSUPTO:
|
||||
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
|
||||
code += 2;
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
code += GET(code, 1 + 2*LINK_SIZE);
|
||||
break;
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_XCLASS:
|
||||
case OP_ECLASS:
|
||||
code += GET(code, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
code += code[1];
|
||||
break;
|
||||
}
|
||||
|
||||
/* Add in the fixed length from the table */
|
||||
|
||||
code += PRIV(OP_lengths)[c];
|
||||
|
||||
/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
|
||||
followed by a multi-byte character. The length in the table is a minimum, so
|
||||
we have to arrange to skip the extra code units. */
|
||||
|
||||
#ifdef MAYBE_UTF_MULTI
|
||||
if (utf) switch(c)
|
||||
{
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
case OP_NOT:
|
||||
case OP_NOTI:
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_EXACT:
|
||||
case OP_POSSTAR:
|
||||
case OP_POSPLUS:
|
||||
case OP_POSQUERY:
|
||||
case OP_POSUPTO:
|
||||
case OP_STARI:
|
||||
case OP_MINSTARI:
|
||||
case OP_PLUSI:
|
||||
case OP_MINPLUSI:
|
||||
case OP_QUERYI:
|
||||
case OP_MINQUERYI:
|
||||
case OP_UPTOI:
|
||||
case OP_MINUPTOI:
|
||||
case OP_EXACTI:
|
||||
case OP_POSSTARI:
|
||||
case OP_POSPLUSI:
|
||||
case OP_POSQUERYI:
|
||||
case OP_POSUPTOI:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTPOSQUERY:
|
||||
case OP_NOTPOSUPTO:
|
||||
case OP_NOTSTARI:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_NOTEXACTI:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_NOTPOSQUERYI:
|
||||
case OP_NOTPOSUPTOI:
|
||||
if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
|
||||
break;
|
||||
}
|
||||
#else
|
||||
(void)(utf); /* Keep compiler happy by referencing function argument */
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_auto_possess.c */
|
||||
196
3rd/pcre2/src/pcre2_chartables.c.dist
Normal file
196
3rd/pcre2/src/pcre2_chartables.c.dist
Normal file
@@ -0,0 +1,196 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This file was automatically written by the pcre2_dftables auxiliary
|
||||
program. It contains character tables that are used when no external
|
||||
tables are passed to PCRE2 by the application that calls it. The tables
|
||||
are used only for characters whose code values are less than 256, and
|
||||
only relevant if not in UCP mode. */
|
||||
|
||||
/* This set of tables was written in the C locale. */
|
||||
|
||||
/* The pcre2_ftables program (which is distributed with PCRE2) can be used
|
||||
to build alternative versions of this file. This is necessary if you are
|
||||
running in an EBCDIC environment, or if you want to default to a different
|
||||
encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates
|
||||
these tables in the "C" locale by default. This happens automatically if
|
||||
PCRE2 is configured with --enable-rebuild-chartables. However, you can run
|
||||
pcre2_dftables manually with the -L option to build tables using the LC_ALL
|
||||
locale. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
const uint8_t PRIV(default_tables)[] = {
|
||||
|
||||
/* This table is a lower casing table. */
|
||||
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122, 91, 92, 93, 94, 95,
|
||||
96, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,
|
||||
136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,
|
||||
152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,
|
||||
168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,
|
||||
184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,
|
||||
200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,
|
||||
216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,
|
||||
232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,
|
||||
248,249,250,251,252,253,254,255,
|
||||
|
||||
/* This table is a case flipping table. */
|
||||
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122, 91, 92, 93, 94, 95,
|
||||
96, 65, 66, 67, 68, 69, 70, 71,
|
||||
72, 73, 74, 75, 76, 77, 78, 79,
|
||||
80, 81, 82, 83, 84, 85, 86, 87,
|
||||
88, 89, 90,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,
|
||||
136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,
|
||||
152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,
|
||||
168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,
|
||||
184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,
|
||||
200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,
|
||||
216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,
|
||||
232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,
|
||||
248,249,250,251,252,253,254,255,
|
||||
|
||||
/* This table contains bit maps for various character classes. Each map is 32
|
||||
bytes long and the bits run from the least significant end of each byte. The
|
||||
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
|
||||
graph, print, punct, and cntrl. Other classes are built from combinations. */
|
||||
|
||||
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, /* space */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* xdigit */
|
||||
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* digit */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* upper */
|
||||
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* lower */
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* word */
|
||||
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, /* graph */
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, /* print */
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, /* punct */
|
||||
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, /* cntrl */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
/* This table identifies various classes of character by individual bits:
|
||||
0x01 white space character
|
||||
0x02 letter
|
||||
0x04 lower case letter
|
||||
0x08 decimal digit
|
||||
0x10 word (alphanumeric or '_')
|
||||
*/
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
|
||||
0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
|
||||
0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */
|
||||
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, /* 0 - 7 */
|
||||
0x18,0x18,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */
|
||||
0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* @ - G */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
|
||||
0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x10, /* X - _ */
|
||||
0x00,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* ` - g */
|
||||
0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* h - o */
|
||||
0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* p - w */
|
||||
0x16,0x16,0x16,0x00,0x00,0x00,0x00,0x00, /* x -127 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
|
||||
|
||||
/* End of pcre2_chartables.c */
|
||||
94
3rd/pcre2/src/pcre2_chkdint.c
Normal file
94
3rd/pcre2/src/pcre2_chkdint.c
Normal file
@@ -0,0 +1,94 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 2023 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This file contains functions to implement checked integer operation */
|
||||
|
||||
#ifndef PCRE2_PCRE2TEST
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
#endif
|
||||
|
||||
/*************************************************
|
||||
* Checked Integer Multiplication *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
r A pointer to PCRE2_SIZE to store the answer
|
||||
a, b Two integers
|
||||
|
||||
Returns: Bool indicating if the operation overflows
|
||||
|
||||
It is modeled after C23's <stdckdint.h> interface
|
||||
The INT64_OR_DOUBLE type is a 64-bit integer type when available,
|
||||
otherwise double. */
|
||||
|
||||
BOOL
|
||||
PRIV(ckd_smul)(PCRE2_SIZE *r, int a, int b)
|
||||
{
|
||||
#ifdef HAVE_BUILTIN_MUL_OVERFLOW
|
||||
PCRE2_SIZE m;
|
||||
|
||||
if (__builtin_mul_overflow(a, b, &m)) return TRUE;
|
||||
|
||||
*r = m;
|
||||
#else
|
||||
INT64_OR_DOUBLE m;
|
||||
|
||||
PCRE2_ASSERT(a >= 0 && b >= 0);
|
||||
|
||||
m = (INT64_OR_DOUBLE)a * (INT64_OR_DOUBLE)b;
|
||||
|
||||
#if defined INT64_MAX || defined int64_t
|
||||
if (sizeof(m) > sizeof(*r) && m > (INT64_OR_DOUBLE)PCRE2_SIZE_MAX) return TRUE;
|
||||
*r = (PCRE2_SIZE)m;
|
||||
#else
|
||||
if (m > PCRE2_SIZE_MAX) return TRUE;
|
||||
*r = m;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* End of pcre2_chkdint.c */
|
||||
11101
3rd/pcre2/src/pcre2_compile.c
Normal file
11101
3rd/pcre2/src/pcre2_compile.c
Normal file
File diff suppressed because it is too large
Load Diff
280
3rd/pcre2/src/pcre2_compile.h
Normal file
280
3rd/pcre2/src/pcre2_compile.h
Normal file
@@ -0,0 +1,280 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE2 is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef PCRE2_COMPILE_H_IDEMPOTENT_GUARD
|
||||
#define PCRE2_COMPILE_H_IDEMPOTENT_GUARD
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
/* Compile time error code numbers. They are given names so that they can more
|
||||
easily be tracked. When a new number is added, the tables called eint1 and
|
||||
eint2 in pcre2posix.c may need to be updated, and a new error text must be
|
||||
added to compile_error_texts in pcre2_error.c. Also, the error codes in
|
||||
pcre2.h.in must be updated - their values are exactly 100 greater than these
|
||||
values. */
|
||||
|
||||
enum { ERR0 = COMPILE_ERROR_BASE,
|
||||
ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10,
|
||||
ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
|
||||
ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, ERR30,
|
||||
ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40,
|
||||
ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
|
||||
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
|
||||
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
|
||||
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
|
||||
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
|
||||
ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99, ERR100,
|
||||
ERR101,ERR102,ERR103,ERR104,ERR105,ERR106,ERR107,ERR108,ERR109,ERR110,
|
||||
ERR111,ERR112,ERR113,ERR114,ERR115,ERR116 };
|
||||
|
||||
/* Code values for parsed patterns, which are stored in a vector of 32-bit
|
||||
unsigned ints. Values less than META_END are literal data values. The coding
|
||||
for identifying the item is in the top 16-bits, leaving 16 bits for the
|
||||
additional data that some of them need. The META_CODE, META_DATA, and META_DIFF
|
||||
macros are used to manipulate parsed pattern elements.
|
||||
|
||||
NOTE: When these definitions are changed, the table of extra lengths for each
|
||||
code (meta_extra_lengths) must be updated to remain in step. */
|
||||
|
||||
#define META_END 0x80000000u /* End of pattern */
|
||||
|
||||
#define META_ALT 0x80010000u /* alternation */
|
||||
#define META_ATOMIC 0x80020000u /* atomic group */
|
||||
#define META_BACKREF 0x80030000u /* Back ref */
|
||||
#define META_BACKREF_BYNAME 0x80040000u /* \k'name' */
|
||||
#define META_BIGVALUE 0x80050000u /* Next is a literal > META_END */
|
||||
#define META_CALLOUT_NUMBER 0x80060000u /* (?C with numerical argument */
|
||||
#define META_CALLOUT_STRING 0x80070000u /* (?C with string argument */
|
||||
#define META_CAPTURE 0x80080000u /* Capturing parenthesis */
|
||||
#define META_CIRCUMFLEX 0x80090000u /* ^ metacharacter */
|
||||
#define META_CLASS 0x800a0000u /* start non-empty class */
|
||||
#define META_CLASS_EMPTY 0x800b0000u /* empty class */
|
||||
#define META_CLASS_EMPTY_NOT 0x800c0000u /* negative empty class */
|
||||
#define META_CLASS_END 0x800d0000u /* end of non-empty class */
|
||||
#define META_CLASS_NOT 0x800e0000u /* start non-empty negative class */
|
||||
#define META_COND_ASSERT 0x800f0000u /* (?(?assertion)... */
|
||||
#define META_COND_DEFINE 0x80100000u /* (?(DEFINE)... */
|
||||
#define META_COND_NAME 0x80110000u /* (?(<name>)... */
|
||||
#define META_COND_NUMBER 0x80120000u /* (?(digits)... */
|
||||
#define META_COND_RNAME 0x80130000u /* (?(R&name)... */
|
||||
#define META_COND_RNUMBER 0x80140000u /* (?(Rdigits)... */
|
||||
#define META_COND_VERSION 0x80150000u /* (?(VERSION<op>x.y)... */
|
||||
#define META_OFFSET 0x80160000u /* Setting offset for various
|
||||
META codes (e.g. META_SCS_NAME) */
|
||||
#define META_SCS 0x80170000u /* (*scan_substring:... */
|
||||
#define META_SCS_NAME 0x80180000u /* Next <name> of scan_substring */
|
||||
#define META_SCS_NUMBER 0x80190000u /* Next digits of scan_substring */
|
||||
#define META_DOLLAR 0x801a0000u /* $ metacharacter */
|
||||
#define META_DOT 0x801b0000u /* . metacharacter */
|
||||
#define META_ESCAPE 0x801c0000u /* \d and friends */
|
||||
#define META_KET 0x801d0000u /* closing parenthesis */
|
||||
#define META_NOCAPTURE 0x801e0000u /* no capture parens */
|
||||
#define META_OPTIONS 0x801f0000u /* (?i) and friends */
|
||||
#define META_POSIX 0x80200000u /* POSIX class item */
|
||||
#define META_POSIX_NEG 0x80210000u /* negative POSIX class item */
|
||||
#define META_RANGE_ESCAPED 0x80220000u /* range with at least one escape */
|
||||
#define META_RANGE_LITERAL 0x80230000u /* range defined literally */
|
||||
#define META_RECURSE 0x80240000u /* Recursion */
|
||||
#define META_RECURSE_BYNAME 0x80250000u /* (?&name) */
|
||||
#define META_SCRIPT_RUN 0x80260000u /* (*script_run:...) */
|
||||
|
||||
/* These must be kept together to make it easy to check that an assertion
|
||||
is present where expected in a conditional group. */
|
||||
|
||||
#define META_LOOKAHEAD 0x80270000u /* (?= */
|
||||
#define META_LOOKAHEADNOT 0x80280000u /* (?! */
|
||||
#define META_LOOKBEHIND 0x80290000u /* (?<= */
|
||||
#define META_LOOKBEHINDNOT 0x802a0000u /* (?<! */
|
||||
|
||||
/* These cannot be conditions */
|
||||
|
||||
#define META_LOOKAHEAD_NA 0x802b0000u /* (*napla: */
|
||||
#define META_LOOKBEHIND_NA 0x802c0000u /* (*naplb: */
|
||||
|
||||
/* These must be kept in this order, with consecutive values, and the _ARG
|
||||
versions of COMMIT, PRUNE, SKIP, and THEN immediately after their non-argument
|
||||
versions. */
|
||||
|
||||
#define META_MARK 0x802d0000u /* (*MARK) */
|
||||
#define META_ACCEPT 0x802e0000u /* (*ACCEPT) */
|
||||
#define META_FAIL 0x802f0000u /* (*FAIL) */
|
||||
#define META_COMMIT 0x80300000u /* These */
|
||||
#define META_COMMIT_ARG 0x80310000u /* pairs */
|
||||
#define META_PRUNE 0x80320000u /* must */
|
||||
#define META_PRUNE_ARG 0x80330000u /* be */
|
||||
#define META_SKIP 0x80340000u /* kept */
|
||||
#define META_SKIP_ARG 0x80350000u /* in */
|
||||
#define META_THEN 0x80360000u /* this */
|
||||
#define META_THEN_ARG 0x80370000u /* order */
|
||||
|
||||
/* These must be kept in groups of adjacent 3 values, and all together. */
|
||||
|
||||
#define META_ASTERISK 0x80380000u /* * */
|
||||
#define META_ASTERISK_PLUS 0x80390000u /* *+ */
|
||||
#define META_ASTERISK_QUERY 0x803a0000u /* *? */
|
||||
#define META_PLUS 0x803b0000u /* + */
|
||||
#define META_PLUS_PLUS 0x803c0000u /* ++ */
|
||||
#define META_PLUS_QUERY 0x803d0000u /* +? */
|
||||
#define META_QUERY 0x803e0000u /* ? */
|
||||
#define META_QUERY_PLUS 0x803f0000u /* ?+ */
|
||||
#define META_QUERY_QUERY 0x80400000u /* ?? */
|
||||
#define META_MINMAX 0x80410000u /* {n,m} repeat */
|
||||
#define META_MINMAX_PLUS 0x80420000u /* {n,m}+ repeat */
|
||||
#define META_MINMAX_QUERY 0x80430000u /* {n,m}? repeat */
|
||||
|
||||
/* These meta codes must be kept in a group, with the OR/SUB/XOR in
|
||||
this order, and AND/NOT at the start/end. */
|
||||
|
||||
#define META_ECLASS_AND 0x80440000u /* && (or &) in a class */
|
||||
#define META_ECLASS_OR 0x80450000u /* || (or |, +) in a class */
|
||||
#define META_ECLASS_SUB 0x80460000u /* -- (or -) in a class */
|
||||
#define META_ECLASS_XOR 0x80470000u /* ~~ (or ^) in a class */
|
||||
#define META_ECLASS_NOT 0x80480000u /* ! in a class */
|
||||
|
||||
/* Convenience aliases. */
|
||||
|
||||
#define META_FIRST_QUANTIFIER META_ASTERISK
|
||||
#define META_LAST_QUANTIFIER META_MINMAX_QUERY
|
||||
|
||||
/* This is a special "meta code" that is used only to distinguish (*asr: from
|
||||
(*sr: in the table of alphabetic assertions. It is never stored in the parsed
|
||||
pattern because (*asr: is turned into (*sr:(*atomic: at that stage. There is
|
||||
therefore no need for it to have a length entry, so use a high value. */
|
||||
|
||||
#define META_ATOMIC_SCRIPT_RUN 0x8fff0000u
|
||||
|
||||
/* Macros for manipulating elements of the parsed pattern vector. */
|
||||
|
||||
#define META_CODE(x) (x & 0xffff0000u)
|
||||
#define META_DATA(x) (x & 0x0000ffffu)
|
||||
#define META_DIFF(x,y) ((x-y)>>16)
|
||||
|
||||
/* Extended class management flags. */
|
||||
|
||||
#define CLASS_IS_ECLASS 0x1
|
||||
|
||||
/* Macro for the highest character value. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define MAX_UCHAR_VALUE 0xffu
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
#define MAX_UCHAR_VALUE 0xffffu
|
||||
#else
|
||||
#define MAX_UCHAR_VALUE 0xffffffffu
|
||||
#endif
|
||||
|
||||
#define GET_MAX_CHAR_VALUE(utf) \
|
||||
((utf) ? MAX_UTF_CODE_POINT : MAX_UCHAR_VALUE)
|
||||
|
||||
/* Macro for setting individual bits in class bitmaps. */
|
||||
|
||||
#define SETBIT(a,b) a[(b) >> 3] |= (uint8_t)(1u << ((b) & 0x7))
|
||||
|
||||
/* Macro for 8 bit specific checks. */
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define SELECT_VALUE8(value8, value) (value8)
|
||||
#else
|
||||
#define SELECT_VALUE8(value8, value) (value)
|
||||
#endif
|
||||
|
||||
/* Macro for aligning data. */
|
||||
#define CLIST_ALIGN_TO(base, align) \
|
||||
((base + ((size_t)(align) - 1)) & ~((size_t)(align) - 1))
|
||||
|
||||
/* Structure for holding information about an OP_ECLASS internal operand.
|
||||
An "operand" here could be just a single OP_[X]CLASS, or it could be some
|
||||
complex expression; but it's some sequence of ECL_* codes which pushes one
|
||||
value to the stack. */
|
||||
typedef struct {
|
||||
/* The position of the operand - or NULL if (lengthptr != NULL). */
|
||||
PCRE2_UCHAR *code_start;
|
||||
PCRE2_SIZE length;
|
||||
/* The operand's type if it is a single code (ECL_XCLASS, ECL_ANY, ECL_NONE);
|
||||
otherwise zero if the operand is not atomic. */
|
||||
uint8_t op_single_type;
|
||||
/* Regardless of whether it's a single code or not, we fully constant-fold
|
||||
the bitmap for code points < 256. */
|
||||
class_bits_storage bits;
|
||||
} eclass_op_info;
|
||||
|
||||
/* Macros for the definitions below, to prevent name collisions. */
|
||||
|
||||
#define _pcre2_posix_class_maps PCRE2_SUFFIX(_pcre2_posix_class_maps)
|
||||
#define _pcre2_update_classbits PCRE2_SUFFIX(_pcre2_update_classbits_)
|
||||
#define _pcre2_compile_class_nested PCRE2_SUFFIX(_pcre2_compile_class_nested_)
|
||||
#define _pcre2_compile_class_not_nested PCRE2_SUFFIX(_pcre2_compile_class_not_nested_)
|
||||
|
||||
|
||||
/* Indices of the POSIX classes in posix_names, posix_name_lengths,
|
||||
posix_class_maps, and posix_substitutes. They must be kept in sync. */
|
||||
|
||||
#define PC_DIGIT 7
|
||||
#define PC_GRAPH 8
|
||||
#define PC_PRINT 9
|
||||
#define PC_PUNCT 10
|
||||
#define PC_XDIGIT 13
|
||||
|
||||
extern const int PRIV(posix_class_maps)[];
|
||||
|
||||
|
||||
/* Set bits in classbits according to the property type */
|
||||
|
||||
void PRIV(update_classbits)(uint32_t ptype, uint32_t pdata, BOOL negated,
|
||||
uint8_t *classbits);
|
||||
|
||||
/* Compile the META codes from start_ptr...end_ptr, writing a single OP_CLASS
|
||||
OP_CLASS, OP_NCLASS, OP_XCLASS, or OP_ALLANY into pcode. */
|
||||
|
||||
uint32_t *PRIV(compile_class_not_nested)(uint32_t options, uint32_t xoptions,
|
||||
uint32_t *start_ptr, PCRE2_UCHAR **pcode, BOOL negate_class, BOOL* has_bitmap,
|
||||
int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr);
|
||||
|
||||
/* Compile the META codes in pptr into opcodes written to pcode. The pptr must
|
||||
start at a META_CLASS or META_CLASS_NOT.
|
||||
|
||||
The pptr will be left pointing at the matching META_CLASS_END. */
|
||||
|
||||
BOOL PRIV(compile_class_nested)(uint32_t options, uint32_t xoptions,
|
||||
uint32_t **pptr, PCRE2_UCHAR **pcode, int *errorcodeptr,
|
||||
compile_block *cb, PCRE2_SIZE *lengthptr);
|
||||
|
||||
#endif /* PCRE2_COMPILE_H_IDEMPOTENT_GUARD */
|
||||
|
||||
/* End of pcre2_compile.h */
|
||||
2737
3rd/pcre2/src/pcre2_compile_class.c
Normal file
2737
3rd/pcre2/src/pcre2_compile_class.c
Normal file
@@ -0,0 +1,2737 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_compile.h"
|
||||
|
||||
typedef struct {
|
||||
/* Option bits for eclass. */
|
||||
uint32_t options;
|
||||
uint32_t xoptions;
|
||||
/* Rarely used members. */
|
||||
int *errorcodeptr;
|
||||
compile_block *cb;
|
||||
/* Bitmap is needed. */
|
||||
BOOL needs_bitmap;
|
||||
} eclass_context;
|
||||
|
||||
/* Checks the allowed tokens at the end of a class structure in debug mode.
|
||||
When a new token is not processed by all loops, and the token is equals to
|
||||
a) one of the cases here:
|
||||
the compiler will complain about a duplicated case value.
|
||||
b) none of the cases here:
|
||||
the loop without the handler will stop with an assertion failure. */
|
||||
|
||||
#ifdef PCRE2_DEBUG
|
||||
#define CLASS_END_CASES(meta) \
|
||||
default: \
|
||||
PCRE2_ASSERT((meta) <= META_END); \
|
||||
/* Fall through */ \
|
||||
case META_CLASS: \
|
||||
case META_CLASS_NOT: \
|
||||
case META_CLASS_EMPTY: \
|
||||
case META_CLASS_EMPTY_NOT: \
|
||||
case META_CLASS_END: \
|
||||
case META_ECLASS_AND: \
|
||||
case META_ECLASS_OR: \
|
||||
case META_ECLASS_SUB: \
|
||||
case META_ECLASS_XOR: \
|
||||
case META_ECLASS_NOT:
|
||||
#else
|
||||
#define CLASS_END_CASES(meta) \
|
||||
default:
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
|
||||
/* Heapsort algorithm. */
|
||||
|
||||
static void do_heapify(uint32_t *buffer, size_t size, size_t i)
|
||||
{
|
||||
size_t max;
|
||||
size_t left;
|
||||
size_t right;
|
||||
uint32_t tmp1, tmp2;
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
max = i;
|
||||
left = (i << 1) + 2;
|
||||
right = left + 2;
|
||||
|
||||
if (left < size && buffer[left] > buffer[max]) max = left;
|
||||
if (right < size && buffer[right] > buffer[max]) max = right;
|
||||
if (i == max) return;
|
||||
|
||||
/* Swap items. */
|
||||
tmp1 = buffer[i];
|
||||
tmp2 = buffer[i + 1];
|
||||
buffer[i] = buffer[max];
|
||||
buffer[i + 1] = buffer[max + 1];
|
||||
buffer[max] = tmp1;
|
||||
buffer[max + 1] = tmp2;
|
||||
i = max;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
|
||||
#define PARSE_CLASS_UTF 0x1
|
||||
#define PARSE_CLASS_CASELESS_UTF 0x2
|
||||
#define PARSE_CLASS_RESTRICTED_UTF 0x4
|
||||
#define PARSE_CLASS_TURKISH_UTF 0x8
|
||||
|
||||
/* Get the range of nocase characters which includes the
|
||||
'c' character passed as argument, or directly follows 'c'. */
|
||||
|
||||
static const uint32_t*
|
||||
get_nocase_range(uint32_t c)
|
||||
{
|
||||
uint32_t left = 0;
|
||||
uint32_t right = PRIV(ucd_nocase_ranges_size);
|
||||
uint32_t middle;
|
||||
|
||||
if (c > MAX_UTF_CODE_POINT) return PRIV(ucd_nocase_ranges) + right;
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
/* Range end of the middle element. */
|
||||
middle = ((left + right) >> 1) | 0x1;
|
||||
|
||||
if (PRIV(ucd_nocase_ranges)[middle] <= c)
|
||||
left = middle + 1;
|
||||
else if (middle > 1 && PRIV(ucd_nocase_ranges)[middle - 2] > c)
|
||||
right = middle - 1;
|
||||
else
|
||||
return PRIV(ucd_nocase_ranges) + (middle - 1);
|
||||
}
|
||||
}
|
||||
|
||||
/* Get the list of othercase characters, which belongs to the passed range.
|
||||
Create ranges from these characters, and append them to the buffer argument. */
|
||||
|
||||
static size_t
|
||||
utf_caseless_extend(uint32_t start, uint32_t end, uint32_t options,
|
||||
uint32_t *buffer)
|
||||
{
|
||||
uint32_t new_start = start;
|
||||
uint32_t new_end = end;
|
||||
uint32_t c = start;
|
||||
const uint32_t *list;
|
||||
uint32_t tmp[3];
|
||||
size_t result = 2;
|
||||
const uint32_t *skip_range = get_nocase_range(c);
|
||||
uint32_t skip_start = skip_range[0];
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
PCRE2_ASSERT(options & PARSE_CLASS_UTF);
|
||||
#endif
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (end > MAX_UTF_CODE_POINT) end = MAX_UTF_CODE_POINT;
|
||||
#endif
|
||||
|
||||
while (c <= end)
|
||||
{
|
||||
uint32_t co;
|
||||
|
||||
if (c > skip_start)
|
||||
{
|
||||
c = skip_range[1];
|
||||
skip_range += 2;
|
||||
skip_start = skip_range[0];
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Compute caseless set. */
|
||||
|
||||
if ((options & (PARSE_CLASS_TURKISH_UTF|PARSE_CLASS_RESTRICTED_UTF)) ==
|
||||
PARSE_CLASS_TURKISH_UTF &&
|
||||
UCD_ANY_I(c))
|
||||
{
|
||||
co = PRIV(ucd_turkish_dotted_i_caseset) + (UCD_DOTTED_I(c)? 0 : 3);
|
||||
}
|
||||
else if ((co = UCD_CASESET(c)) != 0 &&
|
||||
(options & PARSE_CLASS_RESTRICTED_UTF) != 0 &&
|
||||
PRIV(ucd_caseless_sets)[co] < 128)
|
||||
{
|
||||
co = 0; /* Ignore the caseless set if it's restricted. */
|
||||
}
|
||||
|
||||
if (co != 0)
|
||||
list = PRIV(ucd_caseless_sets) + co;
|
||||
else
|
||||
{
|
||||
co = UCD_OTHERCASE(c);
|
||||
list = tmp;
|
||||
tmp[0] = c;
|
||||
tmp[1] = NOTACHAR;
|
||||
|
||||
if (co != c)
|
||||
{
|
||||
tmp[1] = co;
|
||||
tmp[2] = NOTACHAR;
|
||||
}
|
||||
}
|
||||
c++;
|
||||
|
||||
/* Add characters. */
|
||||
do
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 16
|
||||
if (!(options & PARSE_CLASS_UTF) && *list > 0xffff) continue;
|
||||
#endif
|
||||
|
||||
if (*list < new_start)
|
||||
{
|
||||
if (*list + 1 == new_start)
|
||||
{
|
||||
new_start--;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (*list > new_end)
|
||||
{
|
||||
if (*list - 1 == new_end)
|
||||
{
|
||||
new_end++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else continue;
|
||||
|
||||
result += 2;
|
||||
if (buffer != NULL)
|
||||
{
|
||||
buffer[0] = *list;
|
||||
buffer[1] = *list;
|
||||
buffer += 2;
|
||||
}
|
||||
}
|
||||
while (*(++list) != NOTACHAR);
|
||||
}
|
||||
|
||||
if (buffer != NULL)
|
||||
{
|
||||
buffer[0] = new_start;
|
||||
buffer[1] = new_end;
|
||||
buffer += 2;
|
||||
(void)buffer;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* Add a character list to a buffer. */
|
||||
|
||||
static size_t
|
||||
append_char_list(const uint32_t *p, uint32_t *buffer)
|
||||
{
|
||||
const uint32_t *n;
|
||||
size_t result = 0;
|
||||
|
||||
while (*p != NOTACHAR)
|
||||
{
|
||||
n = p;
|
||||
while (n[0] == n[1] - 1) n++;
|
||||
|
||||
PCRE2_ASSERT(*p < 0xffff);
|
||||
|
||||
if (buffer != NULL)
|
||||
{
|
||||
buffer[0] = *p;
|
||||
buffer[1] = *n;
|
||||
buffer += 2;
|
||||
}
|
||||
|
||||
result += 2;
|
||||
p = n + 1;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
get_highest_char(uint32_t options)
|
||||
{
|
||||
(void)options; /* Avoid compiler warning. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
return MAX_UTF_CODE_POINT;
|
||||
#else
|
||||
#ifdef SUPPORT_UNICODE
|
||||
return GET_MAX_CHAR_VALUE((options & PARSE_CLASS_UTF) != 0);
|
||||
#else
|
||||
return MAX_UCHAR_VALUE;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Add a negated character list to a buffer. */
|
||||
static size_t
|
||||
append_negated_char_list(const uint32_t *p, uint32_t options, uint32_t *buffer)
|
||||
{
|
||||
const uint32_t *n;
|
||||
uint32_t start = 0;
|
||||
size_t result = 2;
|
||||
|
||||
PCRE2_ASSERT(*p > 0);
|
||||
|
||||
while (*p != NOTACHAR)
|
||||
{
|
||||
n = p;
|
||||
while (n[0] == n[1] - 1) n++;
|
||||
|
||||
PCRE2_ASSERT(*p < 0xffff);
|
||||
|
||||
if (buffer != NULL)
|
||||
{
|
||||
buffer[0] = start;
|
||||
buffer[1] = *p - 1;
|
||||
buffer += 2;
|
||||
}
|
||||
|
||||
result += 2;
|
||||
start = *n + 1;
|
||||
p = n + 1;
|
||||
}
|
||||
|
||||
if (buffer != NULL)
|
||||
{
|
||||
buffer[0] = start;
|
||||
buffer[1] = get_highest_char(options);
|
||||
buffer += 2;
|
||||
(void)buffer;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static uint32_t *
|
||||
append_non_ascii_range(uint32_t options, uint32_t *buffer)
|
||||
{
|
||||
if (buffer == NULL) return NULL;
|
||||
|
||||
buffer[0] = 0x100;
|
||||
buffer[1] = get_highest_char(options);
|
||||
return buffer + 2;
|
||||
}
|
||||
|
||||
static size_t
|
||||
parse_class(uint32_t *ptr, uint32_t options, uint32_t *buffer)
|
||||
{
|
||||
size_t total_size = 0;
|
||||
size_t size;
|
||||
uint32_t meta_arg;
|
||||
uint32_t start_char;
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
switch (META_CODE(*ptr))
|
||||
{
|
||||
case META_ESCAPE:
|
||||
meta_arg = META_DATA(*ptr);
|
||||
switch (meta_arg)
|
||||
{
|
||||
case ESC_D:
|
||||
case ESC_W:
|
||||
case ESC_S:
|
||||
buffer = append_non_ascii_range(options, buffer);
|
||||
total_size += 2;
|
||||
break;
|
||||
|
||||
case ESC_h:
|
||||
size = append_char_list(PRIV(hspace_list), buffer);
|
||||
total_size += size;
|
||||
if (buffer != NULL) buffer += size;
|
||||
break;
|
||||
|
||||
case ESC_H:
|
||||
size = append_negated_char_list(PRIV(hspace_list), options, buffer);
|
||||
total_size += size;
|
||||
if (buffer != NULL) buffer += size;
|
||||
break;
|
||||
|
||||
case ESC_v:
|
||||
size = append_char_list(PRIV(vspace_list), buffer);
|
||||
total_size += size;
|
||||
if (buffer != NULL) buffer += size;
|
||||
break;
|
||||
|
||||
case ESC_V:
|
||||
size = append_negated_char_list(PRIV(vspace_list), options, buffer);
|
||||
total_size += size;
|
||||
if (buffer != NULL) buffer += size;
|
||||
break;
|
||||
|
||||
case ESC_p:
|
||||
case ESC_P:
|
||||
ptr++;
|
||||
if (meta_arg == ESC_p && (*ptr >> 16) == PT_ANY)
|
||||
{
|
||||
if (buffer != NULL)
|
||||
{
|
||||
buffer[0] = 0;
|
||||
buffer[1] = get_highest_char(options);
|
||||
buffer += 2;
|
||||
}
|
||||
total_size += 2;
|
||||
}
|
||||
break;
|
||||
}
|
||||
ptr++;
|
||||
continue;
|
||||
case META_POSIX_NEG:
|
||||
buffer = append_non_ascii_range(options, buffer);
|
||||
total_size += 2;
|
||||
ptr += 2;
|
||||
continue;
|
||||
case META_POSIX:
|
||||
ptr += 2;
|
||||
continue;
|
||||
case META_BIGVALUE:
|
||||
/* Character literal */
|
||||
ptr++;
|
||||
break;
|
||||
CLASS_END_CASES(*ptr)
|
||||
if (*ptr >= META_END) return total_size;
|
||||
break;
|
||||
}
|
||||
|
||||
start_char = *ptr;
|
||||
|
||||
if (ptr[1] == META_RANGE_LITERAL || ptr[1] == META_RANGE_ESCAPED)
|
||||
{
|
||||
ptr += 2;
|
||||
PCRE2_ASSERT(*ptr < META_END || *ptr == META_BIGVALUE);
|
||||
|
||||
if (*ptr == META_BIGVALUE) ptr++;
|
||||
|
||||
#ifdef EBCDIC
|
||||
#error "Missing EBCDIC support"
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (options & PARSE_CLASS_CASELESS_UTF)
|
||||
{
|
||||
size = utf_caseless_extend(start_char, *ptr++, options, buffer);
|
||||
if (buffer != NULL) buffer += size;
|
||||
total_size += size;
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (buffer != NULL)
|
||||
{
|
||||
buffer[0] = start_char;
|
||||
buffer[1] = *ptr;
|
||||
buffer += 2;
|
||||
}
|
||||
|
||||
ptr++;
|
||||
total_size += 2;
|
||||
}
|
||||
|
||||
return total_size;
|
||||
}
|
||||
|
||||
/* Extra uint32_t values for storing the lengths of range lists in
|
||||
the worst case. Two uint32_t lengths and a range end for a range
|
||||
starting before 255 */
|
||||
#define CHAR_LIST_EXTRA_SIZE 3
|
||||
|
||||
/* Starting character values for each character list. */
|
||||
|
||||
static const uint32_t char_list_starts[] = {
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
XCL_CHAR_LIST_HIGH_32_START,
|
||||
#endif
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32 || defined SUPPORT_UNICODE
|
||||
XCL_CHAR_LIST_LOW_32_START,
|
||||
#endif
|
||||
XCL_CHAR_LIST_HIGH_16_START,
|
||||
/* Must be terminated by XCL_CHAR_LIST_LOW_16_START,
|
||||
which also represents the end of the bitset. */
|
||||
XCL_CHAR_LIST_LOW_16_START,
|
||||
};
|
||||
|
||||
static class_ranges *
|
||||
compile_optimize_class(uint32_t *start_ptr, uint32_t options,
|
||||
uint32_t xoptions, compile_block *cb)
|
||||
{
|
||||
class_ranges* cranges;
|
||||
uint32_t *ptr;
|
||||
uint32_t *buffer;
|
||||
uint32_t *dst;
|
||||
uint32_t class_options = 0;
|
||||
size_t range_list_size = 0, total_size, i;
|
||||
uint32_t tmp1, tmp2;
|
||||
const uint32_t *char_list_next;
|
||||
uint16_t *next_char;
|
||||
uint32_t char_list_start, char_list_end;
|
||||
uint32_t range_start, range_end;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (options & PCRE2_UTF)
|
||||
class_options |= PARSE_CLASS_UTF;
|
||||
|
||||
if ((options & PCRE2_CASELESS) && (options & (PCRE2_UTF|PCRE2_UCP)))
|
||||
class_options |= PARSE_CLASS_CASELESS_UTF;
|
||||
|
||||
if (xoptions & PCRE2_EXTRA_CASELESS_RESTRICT)
|
||||
class_options |= PARSE_CLASS_RESTRICTED_UTF;
|
||||
|
||||
if (xoptions & PCRE2_EXTRA_TURKISH_CASING)
|
||||
class_options |= PARSE_CLASS_TURKISH_UTF;
|
||||
#endif
|
||||
|
||||
/* Compute required space for the range. */
|
||||
|
||||
range_list_size = parse_class(start_ptr, class_options, NULL);
|
||||
PCRE2_ASSERT((range_list_size & 0x1) == 0);
|
||||
|
||||
/* Allocate buffer. The total_size also represents the end of the buffer. */
|
||||
|
||||
total_size = range_list_size +
|
||||
((range_list_size >= 2) ? CHAR_LIST_EXTRA_SIZE : 0);
|
||||
|
||||
cranges = cb->cx->memctl.malloc(
|
||||
sizeof(class_ranges) + total_size * sizeof(uint32_t),
|
||||
cb->cx->memctl.memory_data);
|
||||
|
||||
if (cranges == NULL) return NULL;
|
||||
|
||||
cranges->next = NULL;
|
||||
cranges->range_list_size = (uint16_t)range_list_size;
|
||||
cranges->char_lists_types = 0;
|
||||
cranges->char_lists_size = 0;
|
||||
cranges->char_lists_start = 0;
|
||||
|
||||
if (range_list_size == 0) return cranges;
|
||||
|
||||
buffer = (uint32_t*)(cranges + 1);
|
||||
parse_class(start_ptr, class_options, buffer);
|
||||
|
||||
/* Using <= instead of == to help static analysis. */
|
||||
if (range_list_size <= 2) return cranges;
|
||||
|
||||
/* In-place sorting of ranges. */
|
||||
|
||||
i = (((range_list_size >> 2) - 1) << 1);
|
||||
while (TRUE)
|
||||
{
|
||||
do_heapify(buffer, range_list_size, i);
|
||||
if (i == 0) break;
|
||||
i -= 2;
|
||||
}
|
||||
|
||||
i = range_list_size - 2;
|
||||
while (TRUE)
|
||||
{
|
||||
tmp1 = buffer[i];
|
||||
tmp2 = buffer[i + 1];
|
||||
buffer[i] = buffer[0];
|
||||
buffer[i + 1] = buffer[1];
|
||||
buffer[0] = tmp1;
|
||||
buffer[1] = tmp2;
|
||||
|
||||
do_heapify(buffer, i, 0);
|
||||
if (i == 0) break;
|
||||
i -= 2;
|
||||
}
|
||||
|
||||
/* Merge ranges whenever possible. */
|
||||
dst = buffer;
|
||||
ptr = buffer + 2;
|
||||
range_list_size -= 2;
|
||||
|
||||
/* The second condition is a very rare corner case, where the end of the last
|
||||
range is the maximum character. This range cannot be extended further. */
|
||||
|
||||
while (range_list_size > 0 && dst[1] != ~(uint32_t)0)
|
||||
{
|
||||
if (dst[1] + 1 < ptr[0])
|
||||
{
|
||||
dst += 2;
|
||||
dst[0] = ptr[0];
|
||||
dst[1] = ptr[1];
|
||||
}
|
||||
else if (dst[1] < ptr[1]) dst[1] = ptr[1];
|
||||
|
||||
ptr += 2;
|
||||
range_list_size -= 2;
|
||||
}
|
||||
|
||||
PCRE2_ASSERT(dst[1] <= get_highest_char(class_options));
|
||||
|
||||
/* When the number of ranges are less than six,
|
||||
they are not converted to range lists. */
|
||||
|
||||
ptr = buffer;
|
||||
while (ptr < dst && ptr[1] < 0x100) ptr += 2;
|
||||
if (dst - ptr < (2 * (6 - 1)))
|
||||
{
|
||||
cranges->range_list_size = (uint16_t)(dst + 2 - buffer);
|
||||
return cranges;
|
||||
}
|
||||
|
||||
/* Compute character lists structures. */
|
||||
|
||||
char_list_next = char_list_starts;
|
||||
char_list_start = *char_list_next++;
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
char_list_end = XCL_CHAR_LIST_HIGH_32_END;
|
||||
#elif defined SUPPORT_UNICODE
|
||||
char_list_end = XCL_CHAR_LIST_LOW_32_END;
|
||||
#else
|
||||
char_list_end = XCL_CHAR_LIST_HIGH_16_END;
|
||||
#endif
|
||||
next_char = (uint16_t*)(buffer + total_size);
|
||||
|
||||
tmp1 = 0;
|
||||
tmp2 = ((sizeof(char_list_starts) / sizeof(uint32_t)) - 1) * XCL_TYPE_BIT_LEN;
|
||||
PCRE2_ASSERT(tmp2 <= 3 * XCL_TYPE_BIT_LEN && tmp2 >= XCL_TYPE_BIT_LEN);
|
||||
range_start = dst[0];
|
||||
range_end = dst[1];
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
if (range_start >= char_list_start)
|
||||
{
|
||||
if (range_start == range_end || range_end < char_list_end)
|
||||
{
|
||||
tmp1++;
|
||||
next_char--;
|
||||
|
||||
if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
|
||||
*next_char = (uint16_t)((range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END);
|
||||
else
|
||||
*(uint32_t*)(--next_char) =
|
||||
(range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END;
|
||||
}
|
||||
|
||||
if (range_start < range_end)
|
||||
{
|
||||
if (range_start > char_list_start)
|
||||
{
|
||||
tmp1++;
|
||||
next_char--;
|
||||
|
||||
if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
|
||||
*next_char = (uint16_t)(range_start << XCL_CHAR_SHIFT);
|
||||
else
|
||||
*(uint32_t*)(--next_char) = (range_start << XCL_CHAR_SHIFT);
|
||||
}
|
||||
else
|
||||
cranges->char_lists_types |= XCL_BEGIN_WITH_RANGE << tmp2;
|
||||
}
|
||||
|
||||
PCRE2_ASSERT((uint32_t*)next_char >= dst + 2);
|
||||
|
||||
if (dst > buffer)
|
||||
{
|
||||
dst -= 2;
|
||||
range_start = dst[0];
|
||||
range_end = dst[1];
|
||||
continue;
|
||||
}
|
||||
|
||||
range_start = 0;
|
||||
range_end = 0;
|
||||
}
|
||||
|
||||
if (range_end >= char_list_start)
|
||||
{
|
||||
PCRE2_ASSERT(range_start < char_list_start);
|
||||
|
||||
if (range_end < char_list_end)
|
||||
{
|
||||
tmp1++;
|
||||
next_char--;
|
||||
|
||||
if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
|
||||
*next_char = (uint16_t)((range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END);
|
||||
else
|
||||
*(uint32_t*)(--next_char) =
|
||||
(range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END;
|
||||
|
||||
PCRE2_ASSERT((uint32_t*)next_char >= dst + 2);
|
||||
}
|
||||
|
||||
cranges->char_lists_types |= XCL_BEGIN_WITH_RANGE << tmp2;
|
||||
}
|
||||
|
||||
if (tmp1 >= XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
cranges->char_lists_types |= XCL_ITEM_COUNT_MASK << tmp2;
|
||||
next_char--;
|
||||
|
||||
if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
|
||||
*next_char = (uint16_t)tmp1;
|
||||
else
|
||||
*(uint32_t*)(--next_char) = tmp1;
|
||||
}
|
||||
else
|
||||
cranges->char_lists_types |= tmp1 << tmp2;
|
||||
|
||||
if (range_start < XCL_CHAR_LIST_LOW_16_START) break;
|
||||
|
||||
PCRE2_ASSERT(tmp2 >= XCL_TYPE_BIT_LEN);
|
||||
char_list_end = char_list_start - 1;
|
||||
char_list_start = *char_list_next++;
|
||||
tmp1 = 0;
|
||||
tmp2 -= XCL_TYPE_BIT_LEN;
|
||||
}
|
||||
|
||||
if (dst[0] < XCL_CHAR_LIST_LOW_16_START) dst += 2;
|
||||
PCRE2_ASSERT((uint16_t*)dst <= next_char);
|
||||
|
||||
cranges->char_lists_size =
|
||||
(size_t)((uint8_t*)(buffer + total_size) - (uint8_t*)next_char);
|
||||
cranges->char_lists_start = (size_t)((uint8_t*)next_char - (uint8_t*)buffer);
|
||||
cranges->range_list_size = (uint16_t)(dst - buffer);
|
||||
return cranges;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
|
||||
void PRIV(update_classbits)(uint32_t ptype, uint32_t pdata, BOOL negated,
|
||||
uint8_t *classbits)
|
||||
{
|
||||
/* Update PRIV(xclass) when this function is changed. */
|
||||
int c, chartype;
|
||||
const ucd_record *prop;
|
||||
uint32_t gentype;
|
||||
BOOL set_bit;
|
||||
|
||||
if (ptype == PT_ANY)
|
||||
{
|
||||
if (!negated) memset(classbits, 0xff, 32);
|
||||
return;
|
||||
}
|
||||
|
||||
for (c = 0; c < 256; c++)
|
||||
{
|
||||
prop = GET_UCD(c);
|
||||
set_bit = FALSE;
|
||||
(void)set_bit;
|
||||
|
||||
switch (ptype)
|
||||
{
|
||||
case PT_LAMP:
|
||||
chartype = prop->chartype;
|
||||
set_bit = (chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt);
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
set_bit = (PRIV(ucp_gentype)[prop->chartype] == pdata);
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
set_bit = (prop->chartype == pdata);
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
set_bit = (prop->script == pdata);
|
||||
break;
|
||||
|
||||
case PT_SCX:
|
||||
set_bit = (prop->script == pdata ||
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0);
|
||||
break;
|
||||
|
||||
case PT_ALNUM:
|
||||
gentype = PRIV(ucp_gentype)[prop->chartype];
|
||||
set_bit = (gentype == ucp_L || gentype == ucp_N);
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_BYTE_CASES:
|
||||
VSPACE_BYTE_CASES:
|
||||
set_bit = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
set_bit = (PRIV(ucp_gentype)[prop->chartype] == ucp_Z);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
chartype = prop->chartype;
|
||||
gentype = PRIV(ucp_gentype)[chartype];
|
||||
set_bit = (gentype == ucp_L || gentype == ucp_N ||
|
||||
chartype == ucp_Mn || chartype == ucp_Pc);
|
||||
break;
|
||||
|
||||
case PT_UCNC:
|
||||
set_bit = (c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
||||
c == CHAR_GRAVE_ACCENT || c >= 0xa0);
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
set_bit = (UCD_BIDICLASS_PROP(prop) == pdata);
|
||||
break;
|
||||
|
||||
case PT_BOOL:
|
||||
set_bit = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||
UCD_BPROPS_PROP(prop), pdata) != 0;
|
||||
break;
|
||||
|
||||
case PT_PXGRAPH:
|
||||
chartype = prop->chartype;
|
||||
gentype = PRIV(ucp_gentype)[chartype];
|
||||
set_bit = (gentype != ucp_Z && (gentype != ucp_C || chartype == ucp_Cf));
|
||||
break;
|
||||
|
||||
case PT_PXPRINT:
|
||||
chartype = prop->chartype;
|
||||
set_bit = (chartype != ucp_Zl && chartype != ucp_Zp &&
|
||||
(PRIV(ucp_gentype)[chartype] != ucp_C || chartype == ucp_Cf));
|
||||
break;
|
||||
|
||||
case PT_PXPUNCT:
|
||||
gentype = PRIV(ucp_gentype)[prop->chartype];
|
||||
set_bit = (gentype == ucp_P || (c < 128 && gentype == ucp_S));
|
||||
break;
|
||||
|
||||
default:
|
||||
PCRE2_ASSERT(ptype == PT_PXXDIGIT);
|
||||
set_bit = (c >= CHAR_0 && c <= CHAR_9) ||
|
||||
(c >= CHAR_A && c <= CHAR_F) ||
|
||||
(c >= CHAR_a && c <= CHAR_f);
|
||||
break;
|
||||
}
|
||||
|
||||
if (negated) set_bit = !set_bit;
|
||||
if (set_bit) *classbits |= (uint8_t)(1 << (c & 0x7));
|
||||
if ((c & 0x7) == 0x7) classbits++;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
|
||||
/*************************************************
|
||||
* XClass related properties *
|
||||
*************************************************/
|
||||
|
||||
/* XClass needs to be generated. */
|
||||
#define XCLASS_REQUIRED 0x1
|
||||
/* XClass has 8 bit character. */
|
||||
#define XCLASS_HAS_8BIT_CHARS 0x2
|
||||
/* XClass has properties. */
|
||||
#define XCLASS_HAS_PROPS 0x4
|
||||
/* XClass has character lists. */
|
||||
#define XCLASS_HAS_CHAR_LISTS 0x8
|
||||
/* XClass matches to all >= 256 characters. */
|
||||
#define XCLASS_HIGH_ANY 0x10
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Internal entry point for add range to class *
|
||||
*************************************************/
|
||||
|
||||
/* This function sets the overall range for characters < 256.
|
||||
It also handles non-utf case folding.
|
||||
|
||||
Arguments:
|
||||
options the options bits
|
||||
xoptions the extra options bits
|
||||
cb compile data
|
||||
start start of range character
|
||||
end end of range character
|
||||
|
||||
Returns: cb->classbits is updated
|
||||
*/
|
||||
|
||||
static void
|
||||
add_to_class(uint32_t options, uint32_t xoptions, compile_block *cb,
|
||||
uint32_t start, uint32_t end)
|
||||
{
|
||||
uint8_t *classbits = cb->classbits.classbits;
|
||||
uint32_t c, byte_start, byte_end;
|
||||
uint32_t classbits_end = (end <= 0xff ? end : 0xff);
|
||||
|
||||
/* If caseless matching is required, scan the range and process alternate
|
||||
cases. In Unicode, there are 8-bit characters that have alternate cases that
|
||||
are greater than 255 and vice-versa (though these may be ignored if caseless
|
||||
restriction is in force). Sometimes we can just extend the original range. */
|
||||
|
||||
if ((options & PCRE2_CASELESS) != 0)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/* UTF mode. This branch is taken if we don't support wide characters (e.g.
|
||||
8-bit library, without UTF), but we do treat those characters as Unicode
|
||||
(if UCP flag is set). In this case, we only need to expand the character class
|
||||
set to include the case pairs which are in the 0-255 codepoint range. */
|
||||
if ((options & (PCRE2_UTF|PCRE2_UCP)) != 0)
|
||||
{
|
||||
BOOL turkish_i = (xoptions & (PCRE2_EXTRA_TURKISH_CASING|PCRE2_EXTRA_CASELESS_RESTRICT)) ==
|
||||
PCRE2_EXTRA_TURKISH_CASING;
|
||||
if (start < 128)
|
||||
{
|
||||
uint32_t lo_end = (classbits_end < 127 ? classbits_end : 127);
|
||||
for (c = start; c <= lo_end; c++)
|
||||
{
|
||||
if (turkish_i && UCD_ANY_I(c)) continue;
|
||||
SETBIT(classbits, cb->fcc[c]);
|
||||
}
|
||||
}
|
||||
if (classbits_end >= 128)
|
||||
{
|
||||
uint32_t hi_start = (start > 128 ? start : 128);
|
||||
for (c = hi_start; c <= classbits_end; c++)
|
||||
{
|
||||
uint32_t co = UCD_OTHERCASE(c);
|
||||
if (co <= 0xff) SETBIT(classbits, co);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Not UTF mode */
|
||||
{
|
||||
for (c = start; c <= classbits_end; c++)
|
||||
SETBIT(classbits, cb->fcc[c]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Use the bitmap for characters < 256. Otherwise use extra data. */
|
||||
|
||||
byte_start = (start + 7) >> 3;
|
||||
byte_end = (classbits_end + 1) >> 3;
|
||||
|
||||
if (byte_start >= byte_end)
|
||||
{
|
||||
for (c = start; c <= classbits_end; c++)
|
||||
/* Regardless of start, c will always be <= 255. */
|
||||
SETBIT(classbits, c);
|
||||
return;
|
||||
}
|
||||
|
||||
for (c = byte_start; c < byte_end; c++)
|
||||
classbits[c] = 0xff;
|
||||
|
||||
byte_start <<= 3;
|
||||
byte_end <<= 3;
|
||||
|
||||
for (c = start; c < byte_start; c++)
|
||||
SETBIT(classbits, c);
|
||||
|
||||
for (c = byte_end; c <= classbits_end; c++)
|
||||
SETBIT(classbits, c);
|
||||
}
|
||||
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
/*************************************************
|
||||
* Internal entry point for add list to class *
|
||||
*************************************************/
|
||||
|
||||
/* This function is used for adding a list of horizontal or vertical whitespace
|
||||
characters to a class. The list must be in order so that ranges of characters
|
||||
can be detected and handled appropriately. This function sets the overall range
|
||||
so that the internal functions can try to avoid duplication when handling
|
||||
case-independence.
|
||||
|
||||
Arguments:
|
||||
options the options bits
|
||||
xoptions the extra options bits
|
||||
cb contains pointers to tables etc.
|
||||
p points to row of 32-bit values, terminated by NOTACHAR
|
||||
|
||||
Returns: cb->classbits is updated
|
||||
*/
|
||||
|
||||
static void
|
||||
add_list_to_class(uint32_t options, uint32_t xoptions, compile_block *cb,
|
||||
const uint32_t *p)
|
||||
{
|
||||
while (p[0] < 256)
|
||||
{
|
||||
unsigned int n = 0;
|
||||
|
||||
while(p[n+1] == p[0] + n + 1) n++;
|
||||
add_to_class(options, xoptions, cb, p[0], p[n]);
|
||||
|
||||
p += n + 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Add characters not in a list to a class *
|
||||
*************************************************/
|
||||
|
||||
/* This function is used for adding the complement of a list of horizontal or
|
||||
vertical whitespace to a class. The list must be in order.
|
||||
|
||||
Arguments:
|
||||
options the options bits
|
||||
xoptions the extra options bits
|
||||
cb contains pointers to tables etc.
|
||||
p points to row of 32-bit values, terminated by NOTACHAR
|
||||
|
||||
Returns: cb->classbits is updated
|
||||
*/
|
||||
|
||||
static void
|
||||
add_not_list_to_class(uint32_t options, uint32_t xoptions, compile_block *cb,
|
||||
const uint32_t *p)
|
||||
{
|
||||
if (p[0] > 0)
|
||||
add_to_class(options, xoptions, cb, 0, p[0] - 1);
|
||||
while (p[0] < 256)
|
||||
{
|
||||
while (p[1] == p[0] + 1) p++;
|
||||
add_to_class(options, xoptions, cb, p[0] + 1, (p[1] > 255) ? 255 : p[1] - 1);
|
||||
p++;
|
||||
}
|
||||
}
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Main entry-point to compile a character class *
|
||||
*************************************************/
|
||||
|
||||
/* This function consumes a "leaf", which is a set of characters that will
|
||||
become a single OP_CLASS OP_NCLASS, OP_XCLASS, or OP_ALLANY. */
|
||||
|
||||
uint32_t *
|
||||
PRIV(compile_class_not_nested)(uint32_t options, uint32_t xoptions,
|
||||
uint32_t *start_ptr, PCRE2_UCHAR **pcode, BOOL negate_class, BOOL* has_bitmap,
|
||||
int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
uint32_t *pptr = start_ptr;
|
||||
PCRE2_UCHAR *code = *pcode;
|
||||
BOOL should_flip_negation;
|
||||
const uint8_t *cbits = cb->cbits;
|
||||
/* Some functions such as add_to_class() or eclass processing
|
||||
expects that the bitset is stored in cb->classbits.classbits. */
|
||||
uint8_t *const classbits = cb->classbits.classbits;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf = (options & PCRE2_UTF) != 0;
|
||||
#else /* No Unicode support */
|
||||
BOOL utf = FALSE;
|
||||
#endif
|
||||
|
||||
/* Helper variables for OP_XCLASS opcode (for characters > 255). */
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
uint32_t xclass_props;
|
||||
PCRE2_UCHAR *class_uchardata;
|
||||
class_ranges* cranges;
|
||||
#endif
|
||||
|
||||
/* If an XClass contains a negative special such as \S, we need to flip the
|
||||
negation flag at the end, so that support for characters > 255 works correctly
|
||||
(they are all included in the class). An XClass may need to insert specific
|
||||
matching or non-matching code for wide characters.
|
||||
*/
|
||||
|
||||
should_flip_negation = FALSE;
|
||||
|
||||
/* XClass will be used when characters > 255 might match. */
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
xclass_props = 0;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
cranges = NULL;
|
||||
|
||||
if (utf)
|
||||
#endif
|
||||
{
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
cranges = compile_optimize_class(pptr, options, xoptions, cb);
|
||||
|
||||
if (cranges == NULL)
|
||||
{
|
||||
*errorcodeptr = ERR21;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Caching the pre-processed character ranges. */
|
||||
if (cb->next_cranges != NULL)
|
||||
cb->next_cranges->next = cranges;
|
||||
else
|
||||
cb->cranges = cranges;
|
||||
|
||||
cb->next_cranges = cranges;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Reuse the pre-processed character ranges. */
|
||||
cranges = cb->cranges;
|
||||
PCRE2_ASSERT(cranges != NULL);
|
||||
cb->cranges = cranges->next;
|
||||
}
|
||||
|
||||
if (cranges->range_list_size > 0)
|
||||
{
|
||||
const uint32_t *ranges = (const uint32_t*)(cranges + 1);
|
||||
|
||||
if (ranges[0] <= 255)
|
||||
xclass_props |= XCLASS_HAS_8BIT_CHARS;
|
||||
|
||||
if (ranges[cranges->range_list_size - 1] == GET_MAX_CHAR_VALUE(utf) &&
|
||||
ranges[cranges->range_list_size - 2] <= 256)
|
||||
xclass_props |= XCLASS_HIGH_ANY;
|
||||
}
|
||||
}
|
||||
|
||||
class_uchardata = code + LINK_SIZE + 2; /* For XCLASS items */
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
|
||||
/* Initialize the 256-bit (32-byte) bit map to all zeros. We build the map
|
||||
in a temporary bit of memory, in case the class contains fewer than two
|
||||
8-bit characters because in that case the compiled code doesn't use the bit
|
||||
map. */
|
||||
|
||||
memset(classbits, 0, 32);
|
||||
|
||||
/* Process items until end_ptr is reached. */
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
uint32_t meta = *(pptr++);
|
||||
BOOL local_negate;
|
||||
int posix_class;
|
||||
int taboffset, tabopt;
|
||||
class_bits_storage pbits;
|
||||
uint32_t escape, c;
|
||||
|
||||
/* Handle POSIX classes such as [:alpha:] etc. */
|
||||
switch (META_CODE(meta))
|
||||
{
|
||||
case META_POSIX:
|
||||
case META_POSIX_NEG:
|
||||
|
||||
local_negate = (meta == META_POSIX_NEG);
|
||||
posix_class = *(pptr++);
|
||||
|
||||
if (local_negate) should_flip_negation = TRUE; /* Note negative special */
|
||||
|
||||
/* If matching is caseless, upper and lower are converted to alpha.
|
||||
This relies on the fact that the class table starts with alpha,
|
||||
lower, upper as the first 3 entries. */
|
||||
|
||||
if ((options & PCRE2_CASELESS) != 0 && posix_class <= 2)
|
||||
posix_class = 0;
|
||||
|
||||
/* When PCRE2_UCP is set, some of the POSIX classes are converted to
|
||||
different escape sequences that use Unicode properties \p or \P.
|
||||
Others that are not available via \p or \P have to generate
|
||||
XCL_PROP/XCL_NOTPROP directly, which is done here. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/* TODO This entire block of code here appears to be unreachable!? I simply
|
||||
can't see how it can be hit, given that the frontend parser doesn't emit
|
||||
META_POSIX for GRAPH/PRINT/PUNCT when UCP is set. */
|
||||
if ((options & PCRE2_UCP) != 0 &&
|
||||
(xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0)
|
||||
{
|
||||
uint32_t ptype;
|
||||
|
||||
switch(posix_class)
|
||||
{
|
||||
case PC_GRAPH:
|
||||
case PC_PRINT:
|
||||
case PC_PUNCT:
|
||||
ptype = (posix_class == PC_GRAPH)? PT_PXGRAPH :
|
||||
(posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT;
|
||||
|
||||
PRIV(update_classbits)(ptype, 0, local_negate, classbits);
|
||||
|
||||
if ((xclass_props & XCLASS_HIGH_ANY) == 0)
|
||||
{
|
||||
if (lengthptr != NULL)
|
||||
*lengthptr += 3;
|
||||
else
|
||||
{
|
||||
*class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP;
|
||||
*class_uchardata++ = (PCRE2_UCHAR)ptype;
|
||||
*class_uchardata++ = 0;
|
||||
}
|
||||
xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_PROPS;
|
||||
}
|
||||
continue;
|
||||
|
||||
/* For the other POSIX classes (ex: ascii) we are going to
|
||||
fall through to the non-UCP case and build a bit map for
|
||||
characters with code points less than 256. However, if we are in
|
||||
a negated POSIX class, characters with code points greater than
|
||||
255 must either all match or all not match, depending on whether
|
||||
the whole class is not or is negated. For example, for
|
||||
[[:^ascii:]... they must all match, whereas for [^[:^ascii:]...
|
||||
they must not.
|
||||
|
||||
In the special case where there are no xclass items, this is
|
||||
automatically handled by the use of OP_CLASS or OP_NCLASS, but an
|
||||
explicit range is needed for OP_XCLASS. Setting a flag here
|
||||
causes the range to be generated later when it is known that
|
||||
OP_XCLASS is required. In the 8-bit library this is relevant only in
|
||||
utf mode, since no wide characters can exist otherwise. */
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* In the non-UCP case, or when UCP makes no difference, we build the
|
||||
bit map for the POSIX class in a chunk of local store because we may
|
||||
be adding and subtracting from it, and we don't want to subtract bits
|
||||
that may be in the main map already. At the end we or the result into
|
||||
the bit map that is being built. */
|
||||
|
||||
posix_class *= 3;
|
||||
|
||||
/* Copy in the first table (always present) */
|
||||
|
||||
memcpy(pbits.classbits, cbits + PRIV(posix_class_maps)[posix_class], 32);
|
||||
|
||||
/* If there is a second table, add or remove it as required. */
|
||||
|
||||
taboffset = PRIV(posix_class_maps)[posix_class + 1];
|
||||
tabopt = PRIV(posix_class_maps)[posix_class + 2];
|
||||
|
||||
if (taboffset >= 0)
|
||||
{
|
||||
if (tabopt >= 0)
|
||||
for (int i = 0; i < 32; i++)
|
||||
pbits.classbits[i] |= cbits[i + taboffset];
|
||||
else
|
||||
for (int i = 0; i < 32; i++)
|
||||
pbits.classbits[i] &= (uint8_t)(~cbits[i + taboffset]);
|
||||
}
|
||||
|
||||
/* Now see if we need to remove any special characters. An option
|
||||
value of 1 removes vertical space and 2 removes underscore. */
|
||||
|
||||
if (tabopt < 0) tabopt = -tabopt;
|
||||
if (tabopt == 1) pbits.classbits[1] &= ~0x3c;
|
||||
else if (tabopt == 2) pbits.classbits[11] &= 0x7f;
|
||||
|
||||
/* Add the POSIX table or its complement into the main table that is
|
||||
being built and we are done. */
|
||||
|
||||
{
|
||||
uint32_t *classwords = cb->classbits.classwords;
|
||||
|
||||
if (local_negate)
|
||||
for (int i = 0; i < 8; i++)
|
||||
classwords[i] |= (uint32_t)(~pbits.classwords[i]);
|
||||
else
|
||||
for (int i = 0; i < 8; i++)
|
||||
classwords[i] |= pbits.classwords[i];
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
/* Every class contains at least one < 256 character. */
|
||||
xclass_props |= XCLASS_HAS_8BIT_CHARS;
|
||||
#endif
|
||||
continue; /* End of POSIX handling */
|
||||
|
||||
/* Other than POSIX classes, the only items we should encounter are
|
||||
\d-type escapes and literal characters (possibly as ranges). */
|
||||
case META_BIGVALUE:
|
||||
meta = *(pptr++);
|
||||
break;
|
||||
|
||||
case META_ESCAPE:
|
||||
escape = META_DATA(meta);
|
||||
|
||||
switch(escape)
|
||||
{
|
||||
case ESC_d:
|
||||
for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit];
|
||||
break;
|
||||
|
||||
case ESC_D:
|
||||
should_flip_negation = TRUE;
|
||||
for (int i = 0; i < 32; i++)
|
||||
classbits[i] |= (uint8_t)(~cbits[i+cbit_digit]);
|
||||
break;
|
||||
|
||||
case ESC_w:
|
||||
for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word];
|
||||
break;
|
||||
|
||||
case ESC_W:
|
||||
should_flip_negation = TRUE;
|
||||
for (int i = 0; i < 32; i++)
|
||||
classbits[i] |= (uint8_t)(~cbits[i+cbit_word]);
|
||||
break;
|
||||
|
||||
/* Perl 5.004 onwards omitted VT from \s, but restored it at Perl
|
||||
5.18. Before PCRE 8.34, we had to preserve the VT bit if it was
|
||||
previously set by something earlier in the character class.
|
||||
Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so
|
||||
we could just adjust the appropriate bit. From PCRE 8.34 we no
|
||||
longer treat \s and \S specially. */
|
||||
|
||||
case ESC_s:
|
||||
for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space];
|
||||
break;
|
||||
|
||||
case ESC_S:
|
||||
should_flip_negation = TRUE;
|
||||
for (int i = 0; i < 32; i++)
|
||||
classbits[i] |= (uint8_t)(~cbits[i+cbit_space]);
|
||||
break;
|
||||
|
||||
/* When adding the horizontal or vertical space lists to a class, or
|
||||
their complements, disable PCRE2_CASELESS, because it justs wastes
|
||||
time, and in the "not-x" UTF cases can create unwanted duplicates in
|
||||
the XCLASS list (provoked by characters that have more than one other
|
||||
case and by both cases being in the same "not-x" sublist). */
|
||||
|
||||
case ESC_h:
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (cranges != NULL) break;
|
||||
#endif
|
||||
add_list_to_class(options & ~PCRE2_CASELESS, xoptions,
|
||||
cb, PRIV(hspace_list));
|
||||
#else
|
||||
PCRE2_ASSERT(cranges != NULL);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case ESC_H:
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (cranges != NULL) break;
|
||||
#endif
|
||||
add_not_list_to_class(options & ~PCRE2_CASELESS, xoptions,
|
||||
cb, PRIV(hspace_list));
|
||||
#else
|
||||
PCRE2_ASSERT(cranges != NULL);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case ESC_v:
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (cranges != NULL) break;
|
||||
#endif
|
||||
add_list_to_class(options & ~PCRE2_CASELESS, xoptions,
|
||||
cb, PRIV(vspace_list));
|
||||
#else
|
||||
PCRE2_ASSERT(cranges != NULL);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case ESC_V:
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (cranges != NULL) break;
|
||||
#endif
|
||||
add_not_list_to_class(options & ~PCRE2_CASELESS, xoptions,
|
||||
cb, PRIV(vspace_list));
|
||||
#else
|
||||
PCRE2_ASSERT(cranges != NULL);
|
||||
#endif
|
||||
break;
|
||||
|
||||
/* If Unicode is not supported, \P and \p are not allowed and are
|
||||
faulted at parse time, so will never appear here. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case ESC_p:
|
||||
case ESC_P:
|
||||
{
|
||||
uint32_t ptype = *pptr >> 16;
|
||||
uint32_t pdata = *(pptr++) & 0xffff;
|
||||
|
||||
/* The "Any" is processed by PRIV(update_classbits)(). */
|
||||
if (ptype == PT_ANY)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (!utf && escape == ESC_p) memset(classbits, 0xff, 32);
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
PRIV(update_classbits)(ptype, pdata, (escape == ESC_P), classbits);
|
||||
|
||||
if ((xclass_props & XCLASS_HIGH_ANY) == 0)
|
||||
{
|
||||
if (lengthptr != NULL)
|
||||
*lengthptr += 3;
|
||||
else
|
||||
{
|
||||
*class_uchardata++ = (escape == ESC_p)? XCL_PROP : XCL_NOTPROP;
|
||||
*class_uchardata++ = ptype;
|
||||
*class_uchardata++ = pdata;
|
||||
}
|
||||
xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_PROPS;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
/* Every non-property class contains at least one < 256 character. */
|
||||
xclass_props |= XCLASS_HAS_8BIT_CHARS;
|
||||
#endif
|
||||
/* End handling \d-type escapes */
|
||||
continue;
|
||||
|
||||
CLASS_END_CASES(meta)
|
||||
/* Literals. */
|
||||
if (meta < META_END) break;
|
||||
/* Non-literals: end of class contents. */
|
||||
goto END_PROCESSING;
|
||||
}
|
||||
|
||||
/* A literal character may be followed by a range meta. At parse time
|
||||
there are checks for out-of-order characters, for ranges where the two
|
||||
characters are equal, and for hyphens that cannot indicate a range. At
|
||||
this point, therefore, no checking is needed. */
|
||||
|
||||
c = meta;
|
||||
|
||||
/* Remember if \r or \n were explicitly used */
|
||||
|
||||
if (c == CHAR_CR || c == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF;
|
||||
|
||||
/* Process a character range */
|
||||
|
||||
if (*pptr == META_RANGE_LITERAL || *pptr == META_RANGE_ESCAPED)
|
||||
{
|
||||
uint32_t d;
|
||||
|
||||
#ifdef EBCDIC
|
||||
BOOL range_is_literal = (*pptr == META_RANGE_LITERAL);
|
||||
#endif
|
||||
++pptr;
|
||||
d = *(pptr++);
|
||||
if (d == META_BIGVALUE) d = *(pptr++);
|
||||
|
||||
/* Remember an explicit \r or \n, and add the range to the class. */
|
||||
|
||||
if (d == CHAR_CR || d == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (cranges != NULL) continue;
|
||||
xclass_props |= XCLASS_HAS_8BIT_CHARS;
|
||||
#endif
|
||||
|
||||
/* In an EBCDIC environment, Perl treats alphabetic ranges specially
|
||||
because there are holes in the encoding, and simply using the range
|
||||
A-Z (for example) would include the characters in the holes. This
|
||||
applies only to literal ranges; [\xC1-\xE9] is different to [A-Z]. */
|
||||
|
||||
#ifdef EBCDIC
|
||||
if (range_is_literal &&
|
||||
(cb->ctypes[c] & ctype_letter) != 0 &&
|
||||
(cb->ctypes[d] & ctype_letter) != 0 &&
|
||||
(c <= CHAR_z) == (d <= CHAR_z))
|
||||
{
|
||||
uint32_t uc = (d <= CHAR_z)? 0 : 64;
|
||||
uint32_t C = c - uc;
|
||||
uint32_t D = d - uc;
|
||||
|
||||
if (C <= CHAR_i)
|
||||
{
|
||||
add_to_class(options, xoptions, cb, C + uc,
|
||||
((D < CHAR_i)? D : CHAR_i) + uc);
|
||||
C = CHAR_j;
|
||||
}
|
||||
|
||||
if (C <= D && C <= CHAR_r)
|
||||
{
|
||||
add_to_class(options, xoptions, cb, C + uc,
|
||||
((D < CHAR_r)? D : CHAR_r) + uc);
|
||||
C = CHAR_s;
|
||||
}
|
||||
|
||||
if (C <= D)
|
||||
add_to_class(options, xoptions, cb, C + uc, D + uc);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
/* Not an EBCDIC special range */
|
||||
|
||||
add_to_class(options, xoptions, cb, c, d);
|
||||
#else
|
||||
PCRE2_ASSERT(cranges != NULL);
|
||||
#endif
|
||||
continue;
|
||||
} /* End of range handling */
|
||||
|
||||
/* Character ranges are ignored when class_ranges is present. */
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (cranges != NULL) continue;
|
||||
xclass_props |= XCLASS_HAS_8BIT_CHARS;
|
||||
#endif
|
||||
/* Handle a single character. */
|
||||
|
||||
add_to_class(options, xoptions, cb, meta, meta);
|
||||
#else
|
||||
PCRE2_ASSERT(cranges != NULL);
|
||||
#endif
|
||||
} /* End of main class-processing loop */
|
||||
|
||||
END_PROCESSING:
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
PCRE2_ASSERT((xclass_props & XCLASS_HAS_PROPS) == 0 ||
|
||||
(xclass_props & XCLASS_HIGH_ANY) == 0);
|
||||
|
||||
if (cranges != NULL)
|
||||
{
|
||||
uint32_t *range = (uint32_t*)(cranges + 1);
|
||||
uint32_t *end = range + cranges->range_list_size;
|
||||
|
||||
while (range < end && range[0] < 256)
|
||||
{
|
||||
PCRE2_ASSERT((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0);
|
||||
/* Add range to bitset. If we are in UTF or UCP mode, then clear the
|
||||
caseless bit, because the cranges handle caselessness (only) in this
|
||||
condition; see the condition for PARSE_CLASS_CASELESS_UTF in
|
||||
compile_optimize_class(). */
|
||||
add_to_class(((options & (PCRE2_UTF|PCRE2_UCP)) != 0)?
|
||||
(options & ~PCRE2_CASELESS) : options, xoptions, cb, range[0], range[1]);
|
||||
|
||||
if (range[1] > 255) break;
|
||||
range += 2;
|
||||
}
|
||||
|
||||
if (cranges->char_lists_size > 0)
|
||||
{
|
||||
/* The cranges structure is still used and freed later. */
|
||||
PCRE2_ASSERT((xclass_props & XCLASS_HIGH_ANY) == 0);
|
||||
xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_CHAR_LISTS;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((xclass_props & XCLASS_HIGH_ANY) != 0)
|
||||
{
|
||||
PCRE2_ASSERT(range + 2 == end && range[0] <= 256 &&
|
||||
range[1] >= GET_MAX_CHAR_VALUE(utf));
|
||||
should_flip_negation = TRUE;
|
||||
range = end;
|
||||
}
|
||||
|
||||
while (range < end)
|
||||
{
|
||||
uint32_t range_start = range[0];
|
||||
uint32_t range_end = range[1];
|
||||
|
||||
range += 2;
|
||||
xclass_props |= XCLASS_REQUIRED;
|
||||
|
||||
if (range_start < 256) range_start = 256;
|
||||
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
*lengthptr += 1;
|
||||
|
||||
if (range_start < range_end)
|
||||
*lengthptr += PRIV(ord2utf)(range_start, class_uchardata);
|
||||
|
||||
*lengthptr += PRIV(ord2utf)(range_end, class_uchardata);
|
||||
continue;
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
*lengthptr += range_start < range_end ? 3 : 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
if (range_start < range_end)
|
||||
{
|
||||
*class_uchardata++ = XCL_RANGE;
|
||||
class_uchardata += PRIV(ord2utf)(range_start, class_uchardata);
|
||||
}
|
||||
else
|
||||
*class_uchardata++ = XCL_SINGLE;
|
||||
|
||||
class_uchardata += PRIV(ord2utf)(range_end, class_uchardata);
|
||||
continue;
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Without UTF support, character values are constrained
|
||||
by the bit length, and can only be > 256 for 16-bit and
|
||||
32-bit libraries. */
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (range_start < range_end)
|
||||
{
|
||||
*class_uchardata++ = XCL_RANGE;
|
||||
*class_uchardata++ = range_start;
|
||||
}
|
||||
else
|
||||
*class_uchardata++ = XCL_SINGLE;
|
||||
|
||||
*class_uchardata++ = range_end;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
}
|
||||
|
||||
if (lengthptr == NULL)
|
||||
cb->cx->memctl.free(cranges, cb->cx->memctl.memory_data);
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
|
||||
/* If there are characters with values > 255, or Unicode property settings
|
||||
(\p or \P), we have to compile an extended class, with its own opcode,
|
||||
unless there were no property settings and there was a negated special such
|
||||
as \S in the class, and PCRE2_UCP is not set, because in that case all
|
||||
characters > 255 are in or not in the class, so any that were explicitly
|
||||
given as well can be ignored.
|
||||
|
||||
In the UCP case, if certain negated POSIX classes (ex: [:^ascii:]) were
|
||||
were present in a class, we either have to match or not match all wide
|
||||
characters (depending on whether the whole class is or is not negated).
|
||||
This requirement is indicated by match_all_or_no_wide_chars being true.
|
||||
We do this by including an explicit range, which works in both cases.
|
||||
This applies only in UTF and 16-bit and 32-bit non-UTF modes, since there
|
||||
cannot be any wide characters in 8-bit non-UTF mode.
|
||||
|
||||
When there *are* properties in a positive UTF-8 or any 16-bit or 32_bit
|
||||
class where \S etc is present without PCRE2_UCP, causing an extended class
|
||||
to be compiled, we make sure that all characters > 255 are included by
|
||||
forcing match_all_or_no_wide_chars to be true.
|
||||
|
||||
If, when generating an xclass, there are no characters < 256, we can omit
|
||||
the bitmap in the actual compiled code. */
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS /* Defined for 16/32 bits, or 8-bit with Unicode */
|
||||
if ((xclass_props & XCLASS_REQUIRED) != 0)
|
||||
{
|
||||
PCRE2_UCHAR *previous = code;
|
||||
|
||||
if ((xclass_props & XCLASS_HAS_CHAR_LISTS) == 0)
|
||||
*class_uchardata++ = XCL_END; /* Marks the end of extra data */
|
||||
*code++ = OP_XCLASS;
|
||||
code += LINK_SIZE;
|
||||
*code = negate_class? XCL_NOT:0;
|
||||
if ((xclass_props & XCLASS_HAS_PROPS) != 0) *code |= XCL_HASPROP;
|
||||
|
||||
/* If the map is required, move up the extra data to make room for it;
|
||||
otherwise just move the code pointer to the end of the extra data. */
|
||||
|
||||
if ((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0 || has_bitmap != NULL)
|
||||
{
|
||||
if (negate_class)
|
||||
{
|
||||
uint32_t *classwords = cb->classbits.classwords;
|
||||
for (int i = 0; i < 8; i++) classwords[i] = ~classwords[i];
|
||||
}
|
||||
|
||||
if (has_bitmap == NULL)
|
||||
{
|
||||
*code++ |= XCL_MAP;
|
||||
(void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,
|
||||
CU2BYTES(class_uchardata - code));
|
||||
memcpy(code, classbits, 32);
|
||||
code = class_uchardata + (32 / sizeof(PCRE2_UCHAR));
|
||||
}
|
||||
else
|
||||
{
|
||||
code = class_uchardata;
|
||||
if ((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0)
|
||||
*has_bitmap = TRUE;
|
||||
}
|
||||
}
|
||||
else code = class_uchardata;
|
||||
|
||||
if ((xclass_props & XCLASS_HAS_CHAR_LISTS) != 0)
|
||||
{
|
||||
/* Char lists size is an even number, because all items are 16 or 32
|
||||
bit values. The character list data is always aligned to 32 bits. */
|
||||
size_t char_lists_size = cranges->char_lists_size;
|
||||
PCRE2_ASSERT((char_lists_size & 0x1) == 0 &&
|
||||
(cb->char_lists_size & 0x3) == 0);
|
||||
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
char_lists_size = CLIST_ALIGN_TO(char_lists_size, sizeof(uint32_t));
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
*lengthptr += 2 + LINK_SIZE;
|
||||
#else
|
||||
*lengthptr += 1 + LINK_SIZE;
|
||||
#endif
|
||||
|
||||
cb->char_lists_size += char_lists_size;
|
||||
|
||||
char_lists_size /= sizeof(PCRE2_UCHAR);
|
||||
|
||||
/* Storage space for character lists is included
|
||||
in the maximum pattern size. */
|
||||
if (*lengthptr > MAX_PATTERN_SIZE ||
|
||||
MAX_PATTERN_SIZE - *lengthptr < char_lists_size)
|
||||
{
|
||||
*errorcodeptr = ERR20; /* Pattern is too large */
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
uint8_t *data;
|
||||
|
||||
PCRE2_ASSERT(cranges->char_lists_types <= XCL_TYPE_MASK);
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
/* Encode as high / low bytes. */
|
||||
code[0] = (uint8_t)(XCL_LIST |
|
||||
(cranges->char_lists_types >> 8));
|
||||
code[1] = (uint8_t)cranges->char_lists_types;
|
||||
code += 2;
|
||||
#else
|
||||
*code++ = (PCRE2_UCHAR)(XCL_LIST | cranges->char_lists_types);
|
||||
#endif
|
||||
|
||||
/* Character lists are stored in backwards direction from
|
||||
byte code start. The non-dfa/dfa matchers can access these
|
||||
lists using the byte code start stored in match blocks.
|
||||
Each list is aligned to 32 bit with an optional unused
|
||||
16 bit value at the beginning of the character list. */
|
||||
|
||||
cb->char_lists_size += char_lists_size;
|
||||
data = (uint8_t*)cb->start_code - cb->char_lists_size;
|
||||
|
||||
memcpy(data, (uint8_t*)(cranges + 1) + cranges->char_lists_start,
|
||||
char_lists_size);
|
||||
|
||||
/* Since character lists total size is less than MAX_PATTERN_SIZE,
|
||||
their starting offset fits into a value which size is LINK_SIZE. */
|
||||
|
||||
char_lists_size = cb->char_lists_size;
|
||||
PUT(code, 0, (uint32_t)(char_lists_size >> 1));
|
||||
code += LINK_SIZE;
|
||||
|
||||
#if defined PCRE2_DEBUG || defined SUPPORT_VALGRIND
|
||||
if ((char_lists_size & 0x2) != 0)
|
||||
{
|
||||
/* In debug the unused 16 bit value is set
|
||||
to a fixed value and marked unused. */
|
||||
((uint16_t*)data)[-1] = 0x5555;
|
||||
#ifdef SUPPORT_VALGRIND
|
||||
VALGRIND_MAKE_MEM_NOACCESS(data - 2, 2);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
cb->char_lists_size =
|
||||
CLIST_ALIGN_TO(char_lists_size, sizeof(uint32_t));
|
||||
|
||||
cb->cx->memctl.free(cranges, cb->cx->memctl.memory_data);
|
||||
}
|
||||
}
|
||||
|
||||
/* Now fill in the complete length of the item */
|
||||
|
||||
PUT(previous, 1, (int)(code - previous));
|
||||
goto DONE; /* End of class handling */
|
||||
}
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
|
||||
/* If there are no characters > 255, or they are all to be included or
|
||||
excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the
|
||||
whole class was negated and whether there were negative specials such as \S
|
||||
(non-UCP) in the class. Then copy the 32-byte map into the code vector,
|
||||
negating it if necessary. */
|
||||
|
||||
if (negate_class)
|
||||
{
|
||||
uint32_t *classwords = cb->classbits.classwords;
|
||||
|
||||
for (int i = 0; i < 8; i++) classwords[i] = ~classwords[i];
|
||||
}
|
||||
|
||||
if ((SELECT_VALUE8(!utf, 0) || negate_class != should_flip_negation) &&
|
||||
cb->classbits.classwords[0] == ~(uint32_t)0)
|
||||
{
|
||||
const uint32_t *classwords = cb->classbits.classwords;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
if (classwords[i] != ~(uint32_t)0) break;
|
||||
|
||||
if (i == 8)
|
||||
{
|
||||
*code++ = OP_ALLANY;
|
||||
goto DONE; /* End of class handling */
|
||||
}
|
||||
}
|
||||
|
||||
*code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
|
||||
memcpy(code, classbits, 32);
|
||||
code += 32 / sizeof(PCRE2_UCHAR);
|
||||
|
||||
DONE:
|
||||
*pcode = code;
|
||||
return pptr - 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* ===================================================================*/
|
||||
/* Here follows a block of ECLASS-compiling functions. You may well want to
|
||||
read them from top to bottom; they are ordered from leafmost (at the top) to
|
||||
outermost parser (at the bottom of the file). */
|
||||
|
||||
/* This function folds one operand using the negation operator.
|
||||
The new, combined chunk of stack code is written out to *pop_info. */
|
||||
|
||||
static void
|
||||
fold_negation(eclass_op_info *pop_info, PCRE2_SIZE *lengthptr,
|
||||
BOOL preserve_classbits)
|
||||
{
|
||||
/* If the chunk of stack code is already composed of multiple ops, we won't
|
||||
descend in and try and propagate the negation down the tree. (That would lead
|
||||
to O(n^2) compile-time, which could be exploitable with a malicious regex -
|
||||
although maybe that's not really too much of a worry in a library that offers
|
||||
an exponential-time matching function!) */
|
||||
|
||||
if (pop_info->op_single_type == 0)
|
||||
{
|
||||
if (lengthptr != NULL)
|
||||
*lengthptr += 1;
|
||||
else
|
||||
pop_info->code_start[pop_info->length] = ECL_NOT;
|
||||
pop_info->length += 1;
|
||||
}
|
||||
|
||||
/* Otherwise, it's a nice single-op item, so we can easily fold in the negation
|
||||
without needing to produce an ECL_NOT. */
|
||||
|
||||
else if (pop_info->op_single_type == ECL_ANY ||
|
||||
pop_info->op_single_type == ECL_NONE)
|
||||
{
|
||||
pop_info->op_single_type = (pop_info->op_single_type == ECL_NONE)?
|
||||
ECL_ANY : ECL_NONE;
|
||||
if (lengthptr == NULL)
|
||||
*(pop_info->code_start) = pop_info->op_single_type;
|
||||
}
|
||||
else
|
||||
{
|
||||
PCRE2_ASSERT(pop_info->op_single_type == ECL_XCLASS &&
|
||||
pop_info->length >= 1 + LINK_SIZE + 1);
|
||||
if (lengthptr == NULL)
|
||||
pop_info->code_start[1 + LINK_SIZE] ^= XCL_NOT;
|
||||
}
|
||||
|
||||
if (!preserve_classbits)
|
||||
{
|
||||
for (int i = 0; i < 8; i++)
|
||||
pop_info->bits.classwords[i] = ~pop_info->bits.classwords[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* This function folds together two operands using a binary operator.
|
||||
The new, combined chunk of stack code is written out to *lhs_op_info. */
|
||||
|
||||
static void
|
||||
fold_binary(int op, eclass_op_info *lhs_op_info, eclass_op_info *rhs_op_info,
|
||||
PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
/* ECL_AND truth table:
|
||||
|
||||
LHS RHS RESULT
|
||||
----------------
|
||||
ANY * RHS
|
||||
* ANY LHS
|
||||
NONE * NONE
|
||||
* NONE NONE
|
||||
X Y X & Y
|
||||
*/
|
||||
|
||||
case ECL_AND:
|
||||
if (rhs_op_info->op_single_type == ECL_ANY)
|
||||
{
|
||||
/* no-op: drop the RHS */
|
||||
}
|
||||
else if (lhs_op_info->op_single_type == ECL_ANY)
|
||||
{
|
||||
/* no-op: drop the LHS, and memmove the RHS into its place */
|
||||
if (lengthptr == NULL)
|
||||
memmove(lhs_op_info->code_start, rhs_op_info->code_start,
|
||||
CU2BYTES(rhs_op_info->length));
|
||||
lhs_op_info->length = rhs_op_info->length;
|
||||
lhs_op_info->op_single_type = rhs_op_info->op_single_type;
|
||||
}
|
||||
else if (rhs_op_info->op_single_type == ECL_NONE)
|
||||
{
|
||||
/* the result is ECL_NONE: write into the LHS */
|
||||
if (lengthptr == NULL)
|
||||
lhs_op_info->code_start[0] = ECL_NONE;
|
||||
lhs_op_info->length = 1;
|
||||
lhs_op_info->op_single_type = ECL_NONE;
|
||||
}
|
||||
else if (lhs_op_info->op_single_type == ECL_NONE)
|
||||
{
|
||||
/* the result is ECL_NONE: drop the RHS */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */
|
||||
if (lengthptr != NULL)
|
||||
*lengthptr += 1;
|
||||
else
|
||||
{
|
||||
PCRE2_ASSERT(rhs_op_info->code_start ==
|
||||
lhs_op_info->code_start + lhs_op_info->length);
|
||||
rhs_op_info->code_start[rhs_op_info->length] = ECL_AND;
|
||||
}
|
||||
lhs_op_info->length += rhs_op_info->length + 1;
|
||||
lhs_op_info->op_single_type = 0;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
lhs_op_info->bits.classwords[i] &= rhs_op_info->bits.classwords[i];
|
||||
break;
|
||||
|
||||
/* ECL_OR truth table:
|
||||
|
||||
LHS RHS RESULT
|
||||
----------------
|
||||
ANY * ANY
|
||||
* ANY ANY
|
||||
NONE * RHS
|
||||
* NONE LHS
|
||||
X Y X | Y
|
||||
*/
|
||||
|
||||
case ECL_OR:
|
||||
if (rhs_op_info->op_single_type == ECL_NONE)
|
||||
{
|
||||
/* no-op: drop the RHS */
|
||||
}
|
||||
else if (lhs_op_info->op_single_type == ECL_NONE)
|
||||
{
|
||||
/* no-op: drop the LHS, and memmove the RHS into its place */
|
||||
if (lengthptr == NULL)
|
||||
memmove(lhs_op_info->code_start, rhs_op_info->code_start,
|
||||
CU2BYTES(rhs_op_info->length));
|
||||
lhs_op_info->length = rhs_op_info->length;
|
||||
lhs_op_info->op_single_type = rhs_op_info->op_single_type;
|
||||
}
|
||||
else if (rhs_op_info->op_single_type == ECL_ANY)
|
||||
{
|
||||
/* the result is ECL_ANY: write into the LHS */
|
||||
if (lengthptr == NULL)
|
||||
lhs_op_info->code_start[0] = ECL_ANY;
|
||||
lhs_op_info->length = 1;
|
||||
lhs_op_info->op_single_type = ECL_ANY;
|
||||
}
|
||||
else if (lhs_op_info->op_single_type == ECL_ANY)
|
||||
{
|
||||
/* the result is ECL_ANY: drop the RHS */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */
|
||||
if (lengthptr != NULL)
|
||||
*lengthptr += 1;
|
||||
else
|
||||
{
|
||||
PCRE2_ASSERT(rhs_op_info->code_start ==
|
||||
lhs_op_info->code_start + lhs_op_info->length);
|
||||
rhs_op_info->code_start[rhs_op_info->length] = ECL_OR;
|
||||
}
|
||||
lhs_op_info->length += rhs_op_info->length + 1;
|
||||
lhs_op_info->op_single_type = 0;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
lhs_op_info->bits.classwords[i] |= rhs_op_info->bits.classwords[i];
|
||||
break;
|
||||
|
||||
/* ECL_XOR truth table:
|
||||
|
||||
LHS RHS RESULT
|
||||
----------------
|
||||
ANY * !RHS
|
||||
* ANY !LHS
|
||||
NONE * RHS
|
||||
* NONE LHS
|
||||
X Y X ^ Y
|
||||
*/
|
||||
|
||||
case ECL_XOR:
|
||||
if (rhs_op_info->op_single_type == ECL_NONE)
|
||||
{
|
||||
/* no-op: drop the RHS */
|
||||
}
|
||||
else if (lhs_op_info->op_single_type == ECL_NONE)
|
||||
{
|
||||
/* no-op: drop the LHS, and memmove the RHS into its place */
|
||||
if (lengthptr == NULL)
|
||||
memmove(lhs_op_info->code_start, rhs_op_info->code_start,
|
||||
CU2BYTES(rhs_op_info->length));
|
||||
lhs_op_info->length = rhs_op_info->length;
|
||||
lhs_op_info->op_single_type = rhs_op_info->op_single_type;
|
||||
}
|
||||
else if (rhs_op_info->op_single_type == ECL_ANY)
|
||||
{
|
||||
/* the result is !LHS: fold in the negation, and drop the RHS */
|
||||
/* Preserve the classbits, because we promise to deal with them later. */
|
||||
fold_negation(lhs_op_info, lengthptr, TRUE);
|
||||
}
|
||||
else if (lhs_op_info->op_single_type == ECL_ANY)
|
||||
{
|
||||
/* the result is !RHS: drop the LHS, memmove the RHS into its place, and
|
||||
fold in the negation */
|
||||
if (lengthptr == NULL)
|
||||
memmove(lhs_op_info->code_start, rhs_op_info->code_start,
|
||||
CU2BYTES(rhs_op_info->length));
|
||||
lhs_op_info->length = rhs_op_info->length;
|
||||
lhs_op_info->op_single_type = rhs_op_info->op_single_type;
|
||||
|
||||
/* Preserve the classbits, because we promise to deal with them later. */
|
||||
fold_negation(lhs_op_info, lengthptr, TRUE);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */
|
||||
if (lengthptr != NULL)
|
||||
*lengthptr += 1;
|
||||
else
|
||||
{
|
||||
PCRE2_ASSERT(rhs_op_info->code_start ==
|
||||
lhs_op_info->code_start + lhs_op_info->length);
|
||||
rhs_op_info->code_start[rhs_op_info->length] = ECL_XOR;
|
||||
}
|
||||
lhs_op_info->length += rhs_op_info->length + 1;
|
||||
lhs_op_info->op_single_type = 0;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
lhs_op_info->bits.classwords[i] ^= rhs_op_info->bits.classwords[i];
|
||||
break;
|
||||
|
||||
default:
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
static BOOL
|
||||
compile_eclass_nested(eclass_context *context, BOOL negated,
|
||||
uint32_t **pptr, PCRE2_UCHAR **pcode,
|
||||
eclass_op_info *pop_info, PCRE2_SIZE *lengthptr);
|
||||
|
||||
/* This function consumes a group of implicitly-unioned class elements.
|
||||
These can be characters, ranges, properties, or nested classes, as long
|
||||
as they are all joined by being placed adjacently. */
|
||||
|
||||
static BOOL
|
||||
compile_class_operand(eclass_context *context, BOOL negated,
|
||||
uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
|
||||
PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
uint32_t *ptr = *pptr;
|
||||
uint32_t *prev_ptr;
|
||||
PCRE2_UCHAR *code = *pcode;
|
||||
PCRE2_UCHAR *code_start = code;
|
||||
PCRE2_SIZE prev_length = (lengthptr != NULL)? *lengthptr : 0;
|
||||
PCRE2_SIZE extra_length;
|
||||
uint32_t meta = META_CODE(*ptr);
|
||||
|
||||
switch (meta)
|
||||
{
|
||||
case META_CLASS_EMPTY_NOT:
|
||||
case META_CLASS_EMPTY:
|
||||
++ptr;
|
||||
pop_info->length = 1;
|
||||
if ((meta == META_CLASS_EMPTY) == negated)
|
||||
{
|
||||
*code++ = pop_info->op_single_type = ECL_ANY;
|
||||
memset(pop_info->bits.classbits, 0xff, 32);
|
||||
}
|
||||
else
|
||||
{
|
||||
*code++ = pop_info->op_single_type = ECL_NONE;
|
||||
memset(pop_info->bits.classbits, 0, 32);
|
||||
}
|
||||
break;
|
||||
|
||||
case META_CLASS:
|
||||
case META_CLASS_NOT:
|
||||
if ((*ptr & CLASS_IS_ECLASS) != 0)
|
||||
{
|
||||
if (!compile_eclass_nested(context, negated, &ptr, &code,
|
||||
pop_info, lengthptr))
|
||||
return FALSE;
|
||||
|
||||
PCRE2_ASSERT(*ptr == META_CLASS_END);
|
||||
ptr++;
|
||||
goto DONE;
|
||||
}
|
||||
|
||||
ptr++;
|
||||
/* Fall through */
|
||||
|
||||
default:
|
||||
/* Scan forward characters, ranges, and properties.
|
||||
For example: inside [a-z_ -- m] we don't have brackets around "a-z_" but
|
||||
we still need to collect that fragment up into a "leaf" OP_CLASS. */
|
||||
|
||||
prev_ptr = ptr;
|
||||
ptr = PRIV(compile_class_not_nested)(
|
||||
context->options, context->xoptions, ptr, &code,
|
||||
(meta != META_CLASS_NOT) == negated, &context->needs_bitmap,
|
||||
context->errorcodeptr, context->cb, lengthptr);
|
||||
if (ptr == NULL) return FALSE;
|
||||
|
||||
/* We must have a 100% guarantee that ptr increases when
|
||||
compile_class_operand() returns, even on Release builds, so that we can
|
||||
statically prove our loops terminate. */
|
||||
if (ptr <= prev_ptr)
|
||||
{
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* If we fell through above, consume the closing ']'. */
|
||||
if (meta == META_CLASS || meta == META_CLASS_NOT)
|
||||
{
|
||||
PCRE2_ASSERT(*ptr == META_CLASS_END);
|
||||
ptr++;
|
||||
}
|
||||
|
||||
/* Regardless of whether (lengthptr == NULL), some data will still be written
|
||||
out to *pcode, which we need: we have to peek at it, to transform the opcode
|
||||
into the ECLASS version (since we need to hoist up the bitmaps). */
|
||||
PCRE2_ASSERT(code > code_start);
|
||||
extra_length = (lengthptr != NULL)? *lengthptr - prev_length : 0;
|
||||
|
||||
/* Easiest case: convert OP_ALLANY to ECL_ANY */
|
||||
|
||||
if (*code_start == OP_ALLANY)
|
||||
{
|
||||
PCRE2_ASSERT(code - code_start == 1 && extra_length == 0);
|
||||
pop_info->length = 1;
|
||||
*code_start = pop_info->op_single_type = ECL_ANY;
|
||||
memset(pop_info->bits.classbits, 0xff, 32);
|
||||
}
|
||||
|
||||
/* For OP_CLASS and OP_NCLASS, we hoist out the bitmap and convert to
|
||||
ECL_NONE / ECL_ANY respectively. */
|
||||
|
||||
else if (*code_start == OP_CLASS || *code_start == OP_NCLASS)
|
||||
{
|
||||
PCRE2_ASSERT(code - code_start == 1 + 32 / sizeof(PCRE2_UCHAR) &&
|
||||
extra_length == 0);
|
||||
pop_info->length = 1;
|
||||
*code_start = pop_info->op_single_type =
|
||||
(*code_start == OP_CLASS)? ECL_NONE : ECL_ANY;
|
||||
memcpy(pop_info->bits.classbits, code_start + 1, 32);
|
||||
/* Rewind the code pointer, but make sure we adjust *lengthptr, because we
|
||||
do need to reserve that space (even though we only use it temporarily). */
|
||||
if (lengthptr != NULL)
|
||||
*lengthptr += code - (code_start + 1);
|
||||
code = code_start + 1;
|
||||
|
||||
if (!context->needs_bitmap && *code_start == ECL_NONE)
|
||||
{
|
||||
uint32_t *classwords = pop_info->bits.classwords;
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
if (classwords[i] != 0)
|
||||
{
|
||||
context->needs_bitmap = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
context->needs_bitmap = TRUE;
|
||||
}
|
||||
|
||||
/* Finally, for OP_XCLASS we hoist out the bitmap (if any), and convert to
|
||||
ECL_XCLASS. */
|
||||
|
||||
else
|
||||
{
|
||||
PCRE2_ASSERT(*code_start == OP_XCLASS);
|
||||
*code_start = pop_info->op_single_type = ECL_XCLASS;
|
||||
|
||||
PCRE2_ASSERT(code - code_start >= 1 + LINK_SIZE + 1);
|
||||
|
||||
memcpy(pop_info->bits.classbits, context->cb->classbits.classbits, 32);
|
||||
pop_info->length = (code - code_start) + extra_length;
|
||||
}
|
||||
|
||||
break;
|
||||
} /* End of switch(meta) */
|
||||
|
||||
pop_info->code_start = (lengthptr == NULL)? code_start : NULL;
|
||||
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
*lengthptr += code - code_start;
|
||||
code = code_start;
|
||||
}
|
||||
|
||||
DONE:
|
||||
PCRE2_ASSERT(lengthptr == NULL || (code == code_start));
|
||||
|
||||
*pptr = ptr;
|
||||
*pcode = code;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* This function consumes a group of implicitly-unioned class elements.
|
||||
These can be characters, ranges, properties, or nested classes, as long
|
||||
as they are all joined by being placed adjacently. */
|
||||
|
||||
static BOOL
|
||||
compile_class_juxtaposition(eclass_context *context, BOOL negated,
|
||||
uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
|
||||
PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
uint32_t *ptr = *pptr;
|
||||
PCRE2_UCHAR *code = *pcode;
|
||||
#ifdef PCRE2_DEBUG
|
||||
PCRE2_UCHAR *start_code = *pcode;
|
||||
#endif
|
||||
|
||||
/* See compile_class_binary_loose() for comments on compile-time folding of
|
||||
the "negated" flag. */
|
||||
|
||||
/* Because it's a non-empty class, there must be an operand at the start. */
|
||||
if (!compile_class_operand(context, negated, &ptr, &code, pop_info, lengthptr))
|
||||
return FALSE;
|
||||
|
||||
while (*ptr != META_CLASS_END &&
|
||||
!(*ptr >= META_ECLASS_AND && *ptr <= META_ECLASS_NOT))
|
||||
{
|
||||
uint32_t op;
|
||||
BOOL rhs_negated;
|
||||
eclass_op_info rhs_op_info;
|
||||
|
||||
if (negated)
|
||||
{
|
||||
/* !(A juxtapose B) -> !A && !B */
|
||||
op = ECL_AND;
|
||||
rhs_negated = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* A juxtapose B -> A || B */
|
||||
op = ECL_OR;
|
||||
rhs_negated = FALSE;
|
||||
}
|
||||
|
||||
/* An operand must follow the operator. */
|
||||
if (!compile_class_operand(context, rhs_negated, &ptr, &code,
|
||||
&rhs_op_info, lengthptr))
|
||||
return FALSE;
|
||||
|
||||
/* Convert infix to postfix (RPN). */
|
||||
fold_binary(op, pop_info, &rhs_op_info, lengthptr);
|
||||
if (lengthptr == NULL)
|
||||
code = pop_info->code_start + pop_info->length;
|
||||
}
|
||||
|
||||
PCRE2_ASSERT(lengthptr == NULL || code == start_code);
|
||||
|
||||
*pptr = ptr;
|
||||
*pcode = code;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* This function consumes unary prefix operators. */
|
||||
|
||||
static BOOL
|
||||
compile_class_unary(eclass_context *context, BOOL negated,
|
||||
uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
|
||||
PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
uint32_t *ptr = *pptr;
|
||||
#ifdef PCRE2_DEBUG
|
||||
PCRE2_UCHAR *start_code = *pcode;
|
||||
#endif
|
||||
|
||||
while (*ptr == META_ECLASS_NOT)
|
||||
{
|
||||
++ptr;
|
||||
negated = !negated;
|
||||
}
|
||||
|
||||
*pptr = ptr;
|
||||
/* Because it's a non-empty class, there must be an operand. */
|
||||
if (!compile_class_juxtaposition(context, negated, pptr, pcode,
|
||||
pop_info, lengthptr))
|
||||
return FALSE;
|
||||
|
||||
PCRE2_ASSERT(lengthptr == NULL || *pcode == start_code);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* This function consumes tightly-binding binary operators. */
|
||||
|
||||
static BOOL
|
||||
compile_class_binary_tight(eclass_context *context, BOOL negated,
|
||||
uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
|
||||
PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
uint32_t *ptr = *pptr;
|
||||
PCRE2_UCHAR *code = *pcode;
|
||||
#ifdef PCRE2_DEBUG
|
||||
PCRE2_UCHAR *start_code = *pcode;
|
||||
#endif
|
||||
|
||||
/* See compile_class_binary_loose() for comments on compile-time folding of
|
||||
the "negated" flag. */
|
||||
|
||||
/* Because it's a non-empty class, there must be an operand at the start. */
|
||||
if (!compile_class_unary(context, negated, &ptr, &code, pop_info, lengthptr))
|
||||
return FALSE;
|
||||
|
||||
while (*ptr == META_ECLASS_AND)
|
||||
{
|
||||
uint32_t op;
|
||||
BOOL rhs_negated;
|
||||
eclass_op_info rhs_op_info;
|
||||
|
||||
if (negated)
|
||||
{
|
||||
/* !(A && B) -> !A || !B */
|
||||
op = ECL_OR;
|
||||
rhs_negated = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* A && B -> A && B */
|
||||
op = ECL_AND;
|
||||
rhs_negated = FALSE;
|
||||
}
|
||||
|
||||
++ptr;
|
||||
|
||||
/* An operand must follow the operator. */
|
||||
if (!compile_class_unary(context, rhs_negated, &ptr, &code,
|
||||
&rhs_op_info, lengthptr))
|
||||
return FALSE;
|
||||
|
||||
/* Convert infix to postfix (RPN). */
|
||||
fold_binary(op, pop_info, &rhs_op_info, lengthptr);
|
||||
if (lengthptr == NULL)
|
||||
code = pop_info->code_start + pop_info->length;
|
||||
}
|
||||
|
||||
PCRE2_ASSERT(lengthptr == NULL || code == start_code);
|
||||
|
||||
*pptr = ptr;
|
||||
*pcode = code;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* This function consumes loosely-binding binary operators. */
|
||||
|
||||
static BOOL
|
||||
compile_class_binary_loose(eclass_context *context, BOOL negated,
|
||||
uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
|
||||
PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
uint32_t *ptr = *pptr;
|
||||
PCRE2_UCHAR *code = *pcode;
|
||||
#ifdef PCRE2_DEBUG
|
||||
PCRE2_UCHAR *start_code = *pcode;
|
||||
#endif
|
||||
|
||||
/* We really want to fold the negation operator, if at all possible, so that
|
||||
simple cases can be reduced down. In particular, in 8-bit no-UTF mode, we want
|
||||
to produce a fully-folded expression, so that we can guarantee not to emit any
|
||||
OP_ECLASS codes (in the same way that we never emit OP_XCLASS in this mode).
|
||||
|
||||
This has the consequence that with a little ingenuity, we can in fact avoid
|
||||
emitting (nearly...) all cases of the "NOT" operator. Imagine that we have:
|
||||
!(A ...
|
||||
We have parsed the preceding "!", and we are about to parse the "A" operand. We
|
||||
don't know yet whether there will even be a following binary operand! Both of
|
||||
these are possibilities for what follows:
|
||||
!(A && B)
|
||||
!(A)
|
||||
However, we can still fold the "!" into the "A" operand, because no matter what
|
||||
the following binary operator will be, we can produce an expression which is
|
||||
equivalent. */
|
||||
|
||||
/* Because it's a non-empty class, there must be an operand at the start. */
|
||||
if (!compile_class_binary_tight(context, negated, &ptr, &code,
|
||||
pop_info, lengthptr))
|
||||
return FALSE;
|
||||
|
||||
while (*ptr >= META_ECLASS_OR && *ptr <= META_ECLASS_XOR)
|
||||
{
|
||||
uint32_t op;
|
||||
BOOL op_neg;
|
||||
BOOL rhs_negated;
|
||||
eclass_op_info rhs_op_info;
|
||||
|
||||
if (negated)
|
||||
{
|
||||
/* The whole expression is being negated; we respond by unconditionally
|
||||
negating the LHS A, before seeing what follows. And hooray! We can recover,
|
||||
no matter what follows. */
|
||||
/* !(A || B) -> !A && !B */
|
||||
/* !(A -- B) -> !(A && !B) -> !A || B */
|
||||
/* !(A XOR B) -> !(!A XOR !B) -> !A XNOR !B */
|
||||
op = (*ptr == META_ECLASS_OR )? ECL_AND :
|
||||
(*ptr == META_ECLASS_SUB)? ECL_OR :
|
||||
/*ptr == META_ECLASS_XOR*/ ECL_XOR;
|
||||
op_neg = (*ptr == META_ECLASS_XOR);
|
||||
rhs_negated = *ptr != META_ECLASS_SUB;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* A || B -> A || B */
|
||||
/* A -- B -> A && !B */
|
||||
/* A XOR B -> A XOR B */
|
||||
op = (*ptr == META_ECLASS_OR )? ECL_OR :
|
||||
(*ptr == META_ECLASS_SUB)? ECL_AND :
|
||||
/*ptr == META_ECLASS_XOR*/ ECL_XOR;
|
||||
op_neg = FALSE;
|
||||
rhs_negated = *ptr == META_ECLASS_SUB;
|
||||
}
|
||||
|
||||
++ptr;
|
||||
|
||||
/* An operand must follow the operator. */
|
||||
if (!compile_class_binary_tight(context, rhs_negated, &ptr, &code,
|
||||
&rhs_op_info, lengthptr))
|
||||
return FALSE;
|
||||
|
||||
/* Convert infix to postfix (RPN). */
|
||||
fold_binary(op, pop_info, &rhs_op_info, lengthptr);
|
||||
if (op_neg) fold_negation(pop_info, lengthptr, FALSE);
|
||||
if (lengthptr == NULL)
|
||||
code = pop_info->code_start + pop_info->length;
|
||||
}
|
||||
|
||||
PCRE2_ASSERT(lengthptr == NULL || code == start_code);
|
||||
|
||||
*pptr = ptr;
|
||||
*pcode = code;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* This function converts the META codes in pptr into opcodes written to
|
||||
pcode. The pptr must start at a META_CLASS or META_CLASS_NOT.
|
||||
|
||||
The class is compiled as a left-associative sequence of operator
|
||||
applications.
|
||||
|
||||
The pptr will be left pointing at the matching META_CLASS_END. */
|
||||
|
||||
static BOOL
|
||||
compile_eclass_nested(eclass_context *context, BOOL negated,
|
||||
uint32_t **pptr, PCRE2_UCHAR **pcode,
|
||||
eclass_op_info *pop_info, PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
uint32_t *ptr = *pptr;
|
||||
#ifdef PCRE2_DEBUG
|
||||
PCRE2_UCHAR *start_code = *pcode;
|
||||
#endif
|
||||
|
||||
/* The CLASS_IS_ECLASS bit must be set since it is a nested class. */
|
||||
PCRE2_ASSERT(*ptr == (META_CLASS | CLASS_IS_ECLASS) ||
|
||||
*ptr == (META_CLASS_NOT | CLASS_IS_ECLASS));
|
||||
|
||||
if (*ptr++ == (META_CLASS_NOT | CLASS_IS_ECLASS))
|
||||
negated = !negated;
|
||||
|
||||
(*pptr)++;
|
||||
|
||||
/* Because it's a non-empty class, there must be an operand at the start. */
|
||||
if (!compile_class_binary_loose(context, negated, pptr, pcode,
|
||||
pop_info, lengthptr))
|
||||
return FALSE;
|
||||
|
||||
PCRE2_ASSERT(**pptr == META_CLASS_END);
|
||||
PCRE2_ASSERT(lengthptr == NULL || *pcode == start_code);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
BOOL
|
||||
PRIV(compile_class_nested)(uint32_t options, uint32_t xoptions,
|
||||
uint32_t **pptr, PCRE2_UCHAR **pcode, int *errorcodeptr,
|
||||
compile_block *cb, PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
eclass_context context;
|
||||
eclass_op_info op_info;
|
||||
PCRE2_SIZE previous_length = (lengthptr != NULL)? *lengthptr : 0;
|
||||
PCRE2_UCHAR *code = *pcode;
|
||||
PCRE2_UCHAR *previous;
|
||||
BOOL allbitsone = TRUE;
|
||||
|
||||
context.needs_bitmap = FALSE;
|
||||
context.options = options;
|
||||
context.xoptions = xoptions;
|
||||
context.errorcodeptr = errorcodeptr;
|
||||
context.cb = cb;
|
||||
|
||||
previous = code;
|
||||
*code++ = OP_ECLASS;
|
||||
code += LINK_SIZE;
|
||||
*code++ = 0; /* Flags, currently zero. */
|
||||
if (!compile_eclass_nested(&context, FALSE, pptr, &code, &op_info, lengthptr))
|
||||
return FALSE;
|
||||
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
*lengthptr += code - previous;
|
||||
code = previous;
|
||||
/* (*lengthptr - previous_length) now holds the amount of buffer that
|
||||
we require to make the call to compile_class_nested() with
|
||||
lengthptr = NULL, and including the (1+LINK_SIZE+1) that we write out
|
||||
before that call. */
|
||||
}
|
||||
|
||||
/* Do some useful counting of what's in the bitmap. */
|
||||
for (int i = 0; i < 8; i++)
|
||||
if (op_info.bits.classwords[i] != 0xffffffff)
|
||||
{
|
||||
allbitsone = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
/* After constant-folding the extended class syntax, it may turn out to be
|
||||
a simple class after all. In that case, we can unwrap it from the
|
||||
OP_ECLASS container - and in fact, we must do so, because in 8-bit
|
||||
no-Unicode mode the matcher is compiled without support for OP_ECLASS. */
|
||||
|
||||
#ifndef SUPPORT_WIDE_CHARS
|
||||
PCRE2_ASSERT(op_info.op_single_type != 0);
|
||||
#else
|
||||
if (op_info.op_single_type != 0)
|
||||
#endif
|
||||
{
|
||||
/* Rewind back over the OP_ECLASS. */
|
||||
code = previous;
|
||||
|
||||
/* If the bits are all ones, and the "high characters" are all matched
|
||||
too, we use a special-cased encoding of OP_ALLANY. */
|
||||
|
||||
if (op_info.op_single_type == ECL_ANY && allbitsone)
|
||||
{
|
||||
/* Advancing code means rewinding lengthptr, at this point. */
|
||||
if (lengthptr != NULL) *lengthptr -= 1;
|
||||
*code++ = OP_ALLANY;
|
||||
}
|
||||
|
||||
/* If the high bits are all matched / all not-matched, then we emit an
|
||||
OP_NCLASS/OP_CLASS respectively. */
|
||||
|
||||
else if (op_info.op_single_type == ECL_ANY ||
|
||||
op_info.op_single_type == ECL_NONE)
|
||||
{
|
||||
PCRE2_SIZE required_len = 1 + (32 / sizeof(PCRE2_UCHAR));
|
||||
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
if (required_len > (*lengthptr - previous_length))
|
||||
*lengthptr = previous_length + required_len;
|
||||
}
|
||||
|
||||
/* Advancing code means rewinding lengthptr, at this point. */
|
||||
if (lengthptr != NULL) *lengthptr -= required_len;
|
||||
*code++ = (op_info.op_single_type == ECL_ANY)? OP_NCLASS : OP_CLASS;
|
||||
memcpy(code, op_info.bits.classbits, 32);
|
||||
code += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
|
||||
/* Otherwise, we have an ECL_XCLASS, so we have the OP_XCLASS data
|
||||
there, but, we pulled out its bitmap into op_info, so now we have to
|
||||
put that back into the OP_XCLASS. */
|
||||
|
||||
else
|
||||
{
|
||||
#ifndef SUPPORT_WIDE_CHARS
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
#else
|
||||
BOOL need_map = context.needs_bitmap;
|
||||
PCRE2_SIZE required_len;
|
||||
|
||||
PCRE2_ASSERT(op_info.op_single_type == ECL_XCLASS);
|
||||
required_len = op_info.length + (need_map? 32/sizeof(PCRE2_UCHAR) : 0);
|
||||
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
/* Don't unconditionally request all the space we need - we may
|
||||
already have asked for more during processing of the ECLASS. */
|
||||
if (required_len > (*lengthptr - previous_length))
|
||||
*lengthptr = previous_length + required_len;
|
||||
|
||||
/* The code we write out here won't be ignored, even during the
|
||||
(lengthptr != NULL) phase, because if there's a following quantifier
|
||||
it will peek backwards. So we do have to write out a (truncated)
|
||||
OP_XCLASS, even on this branch. */
|
||||
*lengthptr -= 1 + LINK_SIZE + 1;
|
||||
*code++ = OP_XCLASS;
|
||||
PUT(code, 0, 1 + LINK_SIZE + 1);
|
||||
code += LINK_SIZE;
|
||||
*code++ = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
PCRE2_UCHAR *rest;
|
||||
PCRE2_SIZE rest_len;
|
||||
PCRE2_UCHAR flags;
|
||||
|
||||
/* 1 unit: OP_XCLASS | LINK_SIZE units | 1 unit: flags | ...rest */
|
||||
PCRE2_ASSERT(op_info.length >= 1 + LINK_SIZE + 1);
|
||||
rest = op_info.code_start + 1 + LINK_SIZE + 1;
|
||||
rest_len = (op_info.code_start + op_info.length) - rest;
|
||||
|
||||
/* First read any data we use, before memmove splats it. */
|
||||
flags = op_info.code_start[1 + LINK_SIZE];
|
||||
PCRE2_ASSERT((flags & XCL_MAP) == 0);
|
||||
|
||||
/* Next do the memmove before any writes. */
|
||||
memmove(code + 1 + LINK_SIZE + 1 + (need_map? 32/sizeof(PCRE2_UCHAR) : 0),
|
||||
rest, CU2BYTES(rest_len));
|
||||
|
||||
/* Finally write the header data. */
|
||||
*code++ = OP_XCLASS;
|
||||
PUT(code, 0, (int)required_len);
|
||||
code += LINK_SIZE;
|
||||
*code++ = flags | (need_map? XCL_MAP : 0);
|
||||
if (need_map)
|
||||
{
|
||||
memcpy(code, op_info.bits.classbits, 32);
|
||||
code += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
code += rest_len;
|
||||
}
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
}
|
||||
}
|
||||
|
||||
/* Otherwise, we're going to keep the OP_ECLASS. However, again we need
|
||||
to do some adjustment to insert the bitmap if we have one. */
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
else
|
||||
{
|
||||
BOOL need_map = context.needs_bitmap;
|
||||
PCRE2_SIZE required_len =
|
||||
1 + LINK_SIZE + 1 + (need_map? 32/sizeof(PCRE2_UCHAR) : 0) + op_info.length;
|
||||
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
if (required_len > (*lengthptr - previous_length))
|
||||
*lengthptr = previous_length + required_len;
|
||||
|
||||
/* As for the XCLASS branch above, we do have to write out a dummy
|
||||
OP_ECLASS, because of the backwards peek by the quantifier code. Write
|
||||
out a (truncated) OP_ECLASS, even on this branch. */
|
||||
*lengthptr -= 1 + LINK_SIZE + 1;
|
||||
*code++ = OP_ECLASS;
|
||||
PUT(code, 0, 1 + LINK_SIZE + 1);
|
||||
code += LINK_SIZE;
|
||||
*code++ = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (need_map)
|
||||
{
|
||||
PCRE2_UCHAR *map_start = previous + 1 + LINK_SIZE + 1;
|
||||
previous[1 + LINK_SIZE] |= ECL_MAP;
|
||||
memmove(map_start + 32/sizeof(PCRE2_UCHAR), map_start,
|
||||
CU2BYTES(code - map_start));
|
||||
memcpy(map_start, op_info.bits.classbits, 32);
|
||||
code += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
PUT(previous, 1, (int)(code - previous));
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
|
||||
*pcode = code;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* End of pcre2_compile_class.c */
|
||||
252
3rd/pcre2/src/pcre2_config.c
Normal file
252
3rd/pcre2/src/pcre2_config.c
Normal file
@@ -0,0 +1,252 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes
|
||||
its value gets changed by pcre2_intmodedep.h (included by pcre2_internal.h) to
|
||||
be in code units. */
|
||||
|
||||
static int configured_link_size = LINK_SIZE;
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
/* These macros are the standard way of turning unquoted text into C strings.
|
||||
They allow macros like PCRE2_MAJOR to be defined without quotes, which is
|
||||
convenient for user programs that want to test their values. */
|
||||
|
||||
#define STRING(a) # a
|
||||
#define XSTRING(s) STRING(s)
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return info about what features are configured *
|
||||
*************************************************/
|
||||
|
||||
/* If where is NULL, the length of memory required is returned.
|
||||
|
||||
Arguments:
|
||||
what what information is required
|
||||
where where to put the information
|
||||
|
||||
Returns: 0 if a numerical value is returned
|
||||
>= 0 if a string value
|
||||
PCRE2_ERROR_BADOPTION if "where" not recognized
|
||||
or JIT target requested when JIT not enabled
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_config(uint32_t what, void *where)
|
||||
{
|
||||
if (where == NULL) /* Requests a length */
|
||||
{
|
||||
switch(what)
|
||||
{
|
||||
default:
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
case PCRE2_CONFIG_BSR:
|
||||
case PCRE2_CONFIG_COMPILED_WIDTHS:
|
||||
case PCRE2_CONFIG_DEPTHLIMIT:
|
||||
case PCRE2_CONFIG_HEAPLIMIT:
|
||||
case PCRE2_CONFIG_JIT:
|
||||
case PCRE2_CONFIG_LINKSIZE:
|
||||
case PCRE2_CONFIG_MATCHLIMIT:
|
||||
case PCRE2_CONFIG_NEVER_BACKSLASH_C:
|
||||
case PCRE2_CONFIG_NEWLINE:
|
||||
case PCRE2_CONFIG_PARENSLIMIT:
|
||||
case PCRE2_CONFIG_STACKRECURSE: /* Obsolete */
|
||||
case PCRE2_CONFIG_TABLES_LENGTH:
|
||||
case PCRE2_CONFIG_UNICODE:
|
||||
return sizeof(uint32_t);
|
||||
|
||||
/* These are handled below */
|
||||
|
||||
case PCRE2_CONFIG_JITTARGET:
|
||||
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||
case PCRE2_CONFIG_VERSION:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
switch (what)
|
||||
{
|
||||
default:
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
case PCRE2_CONFIG_BSR:
|
||||
#ifdef BSR_ANYCRLF
|
||||
*((uint32_t *)where) = PCRE2_BSR_ANYCRLF;
|
||||
#else
|
||||
*((uint32_t *)where) = PCRE2_BSR_UNICODE;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_COMPILED_WIDTHS:
|
||||
*((uint32_t *)where) = 0
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
+ 1
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
+ 2
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
+ 4
|
||||
#endif
|
||||
;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_DEPTHLIMIT:
|
||||
*((uint32_t *)where) = MATCH_LIMIT_DEPTH;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_HEAPLIMIT:
|
||||
*((uint32_t *)where) = HEAP_LIMIT;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_JIT:
|
||||
#ifdef SUPPORT_JIT
|
||||
*((uint32_t *)where) = 1;
|
||||
#else
|
||||
*((uint32_t *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_JITTARGET:
|
||||
#ifdef SUPPORT_JIT
|
||||
{
|
||||
const char *v = PRIV(jit_get_target)();
|
||||
return (int)(1 + ((where == NULL)?
|
||||
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
|
||||
}
|
||||
#else
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
#endif
|
||||
|
||||
case PCRE2_CONFIG_LINKSIZE:
|
||||
*((uint32_t *)where) = (uint32_t)configured_link_size;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_MATCHLIMIT:
|
||||
*((uint32_t *)where) = MATCH_LIMIT;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_NEWLINE:
|
||||
*((uint32_t *)where) = NEWLINE_DEFAULT;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_NEVER_BACKSLASH_C:
|
||||
#ifdef NEVER_BACKSLASH_C
|
||||
*((uint32_t *)where) = 1;
|
||||
#else
|
||||
*((uint32_t *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_PARENSLIMIT:
|
||||
*((uint32_t *)where) = PARENS_NEST_LIMIT;
|
||||
break;
|
||||
|
||||
/* This is now obsolete. The stack is no longer used via recursion for
|
||||
handling backtracking in pcre2_match(). */
|
||||
|
||||
case PCRE2_CONFIG_STACKRECURSE:
|
||||
*((uint32_t *)where) = 0;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_TABLES_LENGTH:
|
||||
*((uint32_t *)where) = TABLES_LENGTH;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||
{
|
||||
#if defined SUPPORT_UNICODE
|
||||
const char *v = PRIV(unicode_version);
|
||||
#else
|
||||
const char *v = "Unicode not supported";
|
||||
#endif
|
||||
return (int)(1 + ((where == NULL)?
|
||||
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
|
||||
}
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_UNICODE:
|
||||
#if defined SUPPORT_UNICODE
|
||||
*((uint32_t *)where) = 1;
|
||||
#else
|
||||
*((uint32_t *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
/* The hackery in setting "v" below is to cope with the case when
|
||||
PCRE2_PRERELEASE is set to an empty string (which it is for real releases).
|
||||
If the second alternative is used in this case, it does not leave a space
|
||||
before the date. On the other hand, if all four macros are put into a single
|
||||
XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted.
|
||||
There are problems using an "obvious" approach like this:
|
||||
|
||||
XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE2_MINOR)
|
||||
XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE2_DATE)
|
||||
|
||||
because, when PCRE2_PRERELEASE is empty, this leads to an attempted expansion
|
||||
of STRING(). The C standard states: "If (before argument substitution) any
|
||||
argument consists of no preprocessing tokens, the behavior is undefined." It
|
||||
turns out the gcc treats this case as a single empty string - which is what
|
||||
we really want - but Visual C grumbles about the lack of an argument for the
|
||||
macro. Unfortunately, both are within their rights. As there seems to be no
|
||||
way to test for a macro's value being empty at compile time, we have to
|
||||
resort to a runtime test. */
|
||||
|
||||
case PCRE2_CONFIG_VERSION:
|
||||
{
|
||||
const char *v = (XSTRING(Z PCRE2_PRERELEASE)[1] == 0)?
|
||||
XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
|
||||
XSTRING(PCRE2_MAJOR.PCRE2_MINOR) XSTRING(PCRE2_PRERELEASE PCRE2_DATE);
|
||||
return (int)(1 + ((where == NULL)?
|
||||
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre2_config.c */
|
||||
556
3rd/pcre2/src/pcre2_context.c
Normal file
556
3rd/pcre2/src/pcre2_context.c
Normal file
@@ -0,0 +1,556 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Default malloc/free functions *
|
||||
*************************************************/
|
||||
|
||||
/* Ignore the "user data" argument in each case. */
|
||||
|
||||
static void *default_malloc(size_t size, void *data)
|
||||
{
|
||||
(void)data;
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
|
||||
static void default_free(void *block, void *data)
|
||||
{
|
||||
(void)data;
|
||||
free(block);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get a block and save memory control *
|
||||
*************************************************/
|
||||
|
||||
/* This internal function is called to get a block of memory in which the
|
||||
memory control data is to be stored at the start for future use.
|
||||
|
||||
Arguments:
|
||||
size amount of memory required
|
||||
memctl pointer to a memctl block or NULL
|
||||
|
||||
Returns: pointer to memory or NULL on failure
|
||||
*/
|
||||
|
||||
extern void *
|
||||
PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl)
|
||||
{
|
||||
pcre2_memctl *newmemctl;
|
||||
void *yield = (memctl == NULL)? malloc(size) :
|
||||
memctl->malloc(size, memctl->memory_data);
|
||||
if (yield == NULL) return NULL;
|
||||
newmemctl = (pcre2_memctl *)yield;
|
||||
if (memctl == NULL)
|
||||
{
|
||||
newmemctl->malloc = default_malloc;
|
||||
newmemctl->free = default_free;
|
||||
newmemctl->memory_data = NULL;
|
||||
}
|
||||
else *newmemctl = *memctl;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create and initialize contexts *
|
||||
*************************************************/
|
||||
|
||||
/* Initializing for compile and match contexts is done in separate, private
|
||||
functions so that these can be called from functions such as pcre2_compile()
|
||||
when an external context is not supplied. The initializing functions have an
|
||||
option to set up default memory management. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_general_context_create(void *(*private_malloc)(size_t, void *),
|
||||
void (*private_free)(void *, void *), void *memory_data)
|
||||
{
|
||||
pcre2_general_context *gcontext;
|
||||
if (private_malloc == NULL) private_malloc = default_malloc;
|
||||
if (private_free == NULL) private_free = default_free;
|
||||
gcontext = private_malloc(sizeof(pcre2_real_general_context), memory_data);
|
||||
if (gcontext == NULL) return NULL;
|
||||
gcontext->memctl.malloc = private_malloc;
|
||||
gcontext->memctl.free = private_free;
|
||||
gcontext->memctl.memory_data = memory_data;
|
||||
return gcontext;
|
||||
}
|
||||
|
||||
|
||||
/* A default compile context is set up to save having to initialize at run time
|
||||
when no context is supplied to the compile function. */
|
||||
|
||||
pcre2_compile_context PRIV(default_compile_context) = {
|
||||
{ default_malloc, default_free, NULL }, /* Default memory handling */
|
||||
NULL, /* Stack guard */
|
||||
NULL, /* Stack guard data */
|
||||
PRIV(default_tables), /* Character tables */
|
||||
PCRE2_UNSET, /* Max pattern length */
|
||||
PCRE2_UNSET, /* Max pattern compiled length */
|
||||
BSR_DEFAULT, /* Backslash R default */
|
||||
NEWLINE_DEFAULT, /* Newline convention */
|
||||
PARENS_NEST_LIMIT, /* As it says */
|
||||
0, /* Extra options */
|
||||
MAX_VARLOOKBEHIND, /* As it says */
|
||||
PCRE2_OPTIMIZATION_ALL /* All optimizations enabled */
|
||||
};
|
||||
|
||||
/* The create function copies the default into the new memory, but must
|
||||
override the default memory handling functions if a gcontext was provided. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_compile_context_create(pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_compile_context *ccontext = PRIV(memctl_malloc)(
|
||||
sizeof(pcre2_real_compile_context), (pcre2_memctl *)gcontext);
|
||||
if (ccontext == NULL) return NULL;
|
||||
*ccontext = PRIV(default_compile_context);
|
||||
if (gcontext != NULL)
|
||||
*((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
|
||||
return ccontext;
|
||||
}
|
||||
|
||||
|
||||
/* A default match context is set up to save having to initialize at run time
|
||||
when no context is supplied to a match function. */
|
||||
|
||||
pcre2_match_context PRIV(default_match_context) = {
|
||||
{ default_malloc, default_free, NULL },
|
||||
#ifdef SUPPORT_JIT
|
||||
NULL, /* JIT callback */
|
||||
NULL, /* JIT callback data */
|
||||
#endif
|
||||
NULL, /* Callout function */
|
||||
NULL, /* Callout data */
|
||||
NULL, /* Substitute callout function */
|
||||
NULL, /* Substitute callout data */
|
||||
NULL, /* Substitute case callout function */
|
||||
NULL, /* Substitute case callout data */
|
||||
PCRE2_UNSET, /* Offset limit */
|
||||
HEAP_LIMIT,
|
||||
MATCH_LIMIT,
|
||||
MATCH_LIMIT_DEPTH };
|
||||
|
||||
/* The create function copies the default into the new memory, but must
|
||||
override the default memory handling functions if a gcontext was provided. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_context_create(pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_match_context *mcontext = PRIV(memctl_malloc)(
|
||||
sizeof(pcre2_real_match_context), (pcre2_memctl *)gcontext);
|
||||
if (mcontext == NULL) return NULL;
|
||||
*mcontext = PRIV(default_match_context);
|
||||
if (gcontext != NULL)
|
||||
*((pcre2_memctl *)mcontext) = *((pcre2_memctl *)gcontext);
|
||||
return mcontext;
|
||||
}
|
||||
|
||||
|
||||
/* A default convert context is set up to save having to initialize at run time
|
||||
when no context is supplied to the convert function. */
|
||||
|
||||
pcre2_convert_context PRIV(default_convert_context) = {
|
||||
{ default_malloc, default_free, NULL }, /* Default memory handling */
|
||||
#ifdef _WIN32
|
||||
CHAR_BACKSLASH, /* Default path separator */
|
||||
CHAR_GRAVE_ACCENT /* Default escape character */
|
||||
#else /* Not Windows */
|
||||
CHAR_SLASH, /* Default path separator */
|
||||
CHAR_BACKSLASH /* Default escape character */
|
||||
#endif
|
||||
};
|
||||
|
||||
/* The create function copies the default into the new memory, but must
|
||||
override the default memory handling functions if a gcontext was provided. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_convert_context_create(pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_convert_context *ccontext = PRIV(memctl_malloc)(
|
||||
sizeof(pcre2_real_convert_context), (pcre2_memctl *)gcontext);
|
||||
if (ccontext == NULL) return NULL;
|
||||
*ccontext = PRIV(default_convert_context);
|
||||
if (gcontext != NULL)
|
||||
*((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
|
||||
return ccontext;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Context copy functions *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_general_context_copy(pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_general_context *newcontext =
|
||||
gcontext->memctl.malloc(sizeof(pcre2_real_general_context),
|
||||
gcontext->memctl.memory_data);
|
||||
if (newcontext == NULL) return NULL;
|
||||
memcpy(newcontext, gcontext, sizeof(pcre2_real_general_context));
|
||||
return newcontext;
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_compile_context_copy(pcre2_compile_context *ccontext)
|
||||
{
|
||||
pcre2_compile_context *newcontext =
|
||||
ccontext->memctl.malloc(sizeof(pcre2_real_compile_context),
|
||||
ccontext->memctl.memory_data);
|
||||
if (newcontext == NULL) return NULL;
|
||||
memcpy(newcontext, ccontext, sizeof(pcre2_real_compile_context));
|
||||
return newcontext;
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_context_copy(pcre2_match_context *mcontext)
|
||||
{
|
||||
pcre2_match_context *newcontext =
|
||||
mcontext->memctl.malloc(sizeof(pcre2_real_match_context),
|
||||
mcontext->memctl.memory_data);
|
||||
if (newcontext == NULL) return NULL;
|
||||
memcpy(newcontext, mcontext, sizeof(pcre2_real_match_context));
|
||||
return newcontext;
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_convert_context_copy(pcre2_convert_context *ccontext)
|
||||
{
|
||||
pcre2_convert_context *newcontext =
|
||||
ccontext->memctl.malloc(sizeof(pcre2_real_convert_context),
|
||||
ccontext->memctl.memory_data);
|
||||
if (newcontext == NULL) return NULL;
|
||||
memcpy(newcontext, ccontext, sizeof(pcre2_real_convert_context));
|
||||
return newcontext;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Context free functions *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_general_context_free(pcre2_general_context *gcontext)
|
||||
{
|
||||
if (gcontext != NULL)
|
||||
gcontext->memctl.free(gcontext, gcontext->memctl.memory_data);
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_compile_context_free(pcre2_compile_context *ccontext)
|
||||
{
|
||||
if (ccontext != NULL)
|
||||
ccontext->memctl.free(ccontext, ccontext->memctl.memory_data);
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_match_context_free(pcre2_match_context *mcontext)
|
||||
{
|
||||
if (mcontext != NULL)
|
||||
mcontext->memctl.free(mcontext, mcontext->memctl.memory_data);
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_convert_context_free(pcre2_convert_context *ccontext)
|
||||
{
|
||||
if (ccontext != NULL)
|
||||
ccontext->memctl.free(ccontext, ccontext->memctl.memory_data);
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Set values in contexts *
|
||||
*************************************************/
|
||||
|
||||
/* All these functions return 0 for success or PCRE2_ERROR_BADDATA if invalid
|
||||
data is given. Only some of the functions are able to test the validity of the
|
||||
data. */
|
||||
|
||||
|
||||
/* ------------ Compile context ------------ */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_character_tables(pcre2_compile_context *ccontext,
|
||||
const uint8_t *tables)
|
||||
{
|
||||
ccontext->tables = tables;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_bsr(pcre2_compile_context *ccontext, uint32_t value)
|
||||
{
|
||||
switch(value)
|
||||
{
|
||||
case PCRE2_BSR_ANYCRLF:
|
||||
case PCRE2_BSR_UNICODE:
|
||||
ccontext->bsr_convention = value;
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, PCRE2_SIZE length)
|
||||
{
|
||||
ccontext->max_pattern_length = length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_max_pattern_compiled_length(pcre2_compile_context *ccontext, PCRE2_SIZE length)
|
||||
{
|
||||
ccontext->max_pattern_compiled_length = length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline)
|
||||
{
|
||||
switch(newline)
|
||||
{
|
||||
case PCRE2_NEWLINE_CR:
|
||||
case PCRE2_NEWLINE_LF:
|
||||
case PCRE2_NEWLINE_CRLF:
|
||||
case PCRE2_NEWLINE_ANY:
|
||||
case PCRE2_NEWLINE_ANYCRLF:
|
||||
case PCRE2_NEWLINE_NUL:
|
||||
ccontext->newline_convention = newline;
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_max_varlookbehind(pcre2_compile_context *ccontext, uint32_t limit)
|
||||
{
|
||||
ccontext->max_varlookbehind = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, uint32_t limit)
|
||||
{
|
||||
ccontext->parens_nest_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, uint32_t options)
|
||||
{
|
||||
ccontext->extra_options = options;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
|
||||
int (*guard)(uint32_t, void *), void *user_data)
|
||||
{
|
||||
ccontext->stack_guard = guard;
|
||||
ccontext->stack_guard_data = user_data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_optimize(pcre2_compile_context *ccontext, uint32_t directive)
|
||||
{
|
||||
if (ccontext == NULL)
|
||||
return PCRE2_ERROR_NULL;
|
||||
|
||||
switch (directive)
|
||||
{
|
||||
case PCRE2_OPTIMIZATION_NONE:
|
||||
ccontext->optimization_flags = 0;
|
||||
break;
|
||||
|
||||
case PCRE2_OPTIMIZATION_FULL:
|
||||
ccontext->optimization_flags = PCRE2_OPTIMIZATION_ALL;
|
||||
break;
|
||||
|
||||
default:
|
||||
if (directive >= PCRE2_AUTO_POSSESS && directive <= PCRE2_START_OPTIMIZE_OFF)
|
||||
{
|
||||
/* Even directive numbers starting from 64 switch a bit on;
|
||||
* Odd directive numbers starting from 65 switch a bit off */
|
||||
if ((directive & 1) != 0)
|
||||
ccontext->optimization_flags &= ~(1u << ((directive >> 1) - 32));
|
||||
else
|
||||
ccontext->optimization_flags |= 1u << ((directive >> 1) - 32);
|
||||
return 0;
|
||||
}
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ------------ Match context ------------ */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_callout(pcre2_match_context *mcontext,
|
||||
int (*callout)(pcre2_callout_block *, void *), void *callout_data)
|
||||
{
|
||||
mcontext->callout = callout;
|
||||
mcontext->callout_data = callout_data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_substitute_callout(pcre2_match_context *mcontext,
|
||||
int (*substitute_callout)(pcre2_substitute_callout_block *, void *),
|
||||
void *substitute_callout_data)
|
||||
{
|
||||
mcontext->substitute_callout = substitute_callout;
|
||||
mcontext->substitute_callout_data = substitute_callout_data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_substitute_case_callout(pcre2_match_context *mcontext,
|
||||
PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *,
|
||||
PCRE2_SIZE, int, void *),
|
||||
void *substitute_case_callout_data)
|
||||
{
|
||||
mcontext->substitute_case_callout = substitute_case_callout;
|
||||
mcontext->substitute_case_callout_data = substitute_case_callout_data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
mcontext->heap_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
mcontext->match_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_depth_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
mcontext->depth_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE limit)
|
||||
{
|
||||
mcontext->offset_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* These functions became obsolete at release 10.30. The first is kept as a
|
||||
synonym for backwards compatibility. The second now does nothing. Exclude both
|
||||
from coverage reports. */
|
||||
|
||||
/* LCOV_EXCL_START */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
return pcre2_set_depth_limit(mcontext, limit);
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
|
||||
void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *),
|
||||
void *mydata)
|
||||
{
|
||||
(void)mcontext;
|
||||
(void)mymalloc;
|
||||
(void)myfree;
|
||||
(void)mydata;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* LCOV_EXCL_STOP */
|
||||
|
||||
|
||||
/* ------------ Convert context ------------ */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_glob_separator(pcre2_convert_context *ccontext, uint32_t separator)
|
||||
{
|
||||
if (separator != CHAR_SLASH && separator != CHAR_BACKSLASH &&
|
||||
separator != CHAR_DOT) return PCRE2_ERROR_BADDATA;
|
||||
ccontext->glob_separator = separator;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_glob_escape(pcre2_convert_context *ccontext, uint32_t escape)
|
||||
{
|
||||
if (escape > 255 || (escape != 0 && !ispunct(escape)))
|
||||
return PCRE2_ERROR_BADDATA;
|
||||
ccontext->glob_escape = escape;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre2_context.c */
|
||||
|
||||
1191
3rd/pcre2/src/pcre2_convert.c
Normal file
1191
3rd/pcre2/src/pcre2_convert.c
Normal file
@@ -0,0 +1,1191 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#define TYPE_OPTIONS (PCRE2_CONVERT_GLOB| \
|
||||
PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED)
|
||||
|
||||
#define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \
|
||||
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \
|
||||
PCRE2_CONVERT_GLOB_NO_STARSTAR| \
|
||||
TYPE_OPTIONS)
|
||||
|
||||
#define DUMMY_BUFFER_SIZE 100
|
||||
|
||||
/* Generated pattern fragments */
|
||||
|
||||
#define STR_BACKSLASH_A STR_BACKSLASH STR_A
|
||||
#define STR_BACKSLASH_z STR_BACKSLASH STR_z
|
||||
#define STR_COLON_RIGHT_SQUARE_BRACKET STR_COLON STR_RIGHT_SQUARE_BRACKET
|
||||
#define STR_DOT_STAR_LOOKBEHIND STR_DOT STR_ASTERISK STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_LESS_THAN_SIGN STR_EQUALS_SIGN
|
||||
#define STR_LOOKAHEAD_NOT_DOT STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_EXCLAMATION_MARK STR_BACKSLASH STR_DOT STR_RIGHT_PARENTHESIS
|
||||
#define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS
|
||||
#define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
|
||||
|
||||
/* States for POSIX processing */
|
||||
|
||||
enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET,
|
||||
POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED };
|
||||
|
||||
/* Macro to add a character string to the output buffer, checking for overflow. */
|
||||
|
||||
#define PUTCHARS(string) \
|
||||
{ \
|
||||
for (const char *s = string; *s != 0; s++) \
|
||||
{ \
|
||||
if (p >= endp) return PCRE2_ERROR_NOMEMORY; \
|
||||
*p++ = *s; \
|
||||
} \
|
||||
}
|
||||
|
||||
/* Literals that must be escaped: \ ? * + | . ^ $ { } [ ] ( ) */
|
||||
|
||||
static const char *pcre2_escaped_literals =
|
||||
STR_BACKSLASH STR_QUESTION_MARK STR_ASTERISK STR_PLUS
|
||||
STR_VERTICAL_LINE STR_DOT STR_CIRCUMFLEX_ACCENT STR_DOLLAR_SIGN
|
||||
STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
|
||||
STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
|
||||
STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS;
|
||||
|
||||
/* Recognized escaped metacharacters in POSIX basic patterns. */
|
||||
|
||||
static const char *posix_meta_escapes =
|
||||
STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS
|
||||
STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
|
||||
STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9;
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Convert a POSIX pattern *
|
||||
*************************************************/
|
||||
|
||||
/* This function handles both basic and extended POSIX patterns.
|
||||
|
||||
Arguments:
|
||||
pattype the pattern type
|
||||
pattern the pattern
|
||||
plength length in code units
|
||||
utf TRUE if UTF
|
||||
use_buffer where to put the output
|
||||
use_length length of use_buffer
|
||||
bufflenptr where to put the used length
|
||||
dummyrun TRUE if a dummy run
|
||||
ccontext the convert context
|
||||
|
||||
Returns: 0 => success
|
||||
!0 => error code
|
||||
*/
|
||||
|
||||
static int
|
||||
convert_posix(uint32_t pattype, PCRE2_SPTR pattern, PCRE2_SIZE plength,
|
||||
BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
|
||||
PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
|
||||
{
|
||||
PCRE2_SPTR posix = pattern;
|
||||
PCRE2_UCHAR *p = use_buffer;
|
||||
PCRE2_UCHAR *pp = p;
|
||||
PCRE2_UCHAR *endp = p + use_length - 1; /* Allow for trailing zero */
|
||||
PCRE2_SIZE convlength = 0;
|
||||
|
||||
uint32_t bracount = 0;
|
||||
uint32_t posix_state = POSIX_START_REGEX;
|
||||
uint32_t lastspecial = 0;
|
||||
BOOL extended = (pattype & PCRE2_CONVERT_POSIX_EXTENDED) != 0;
|
||||
BOOL nextisliteral = FALSE;
|
||||
|
||||
(void)utf; /* Not used when Unicode not supported */
|
||||
(void)ccontext; /* Not currently used */
|
||||
|
||||
/* Initialize default for error offset as end of input. */
|
||||
|
||||
*bufflenptr = plength;
|
||||
PUTCHARS(STR_STAR_NUL);
|
||||
|
||||
/* Now scan the input. */
|
||||
|
||||
while (plength > 0)
|
||||
{
|
||||
uint32_t c, sc;
|
||||
int clength = 1;
|
||||
|
||||
/* Add in the length of the last item, then, if in the dummy run, pull the
|
||||
pointer back to the start of the (temporary) buffer and then remember the
|
||||
start of the next item. */
|
||||
|
||||
convlength += p - pp;
|
||||
if (dummyrun) p = use_buffer;
|
||||
pp = p;
|
||||
|
||||
/* Pick up the next character */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
c = *posix;
|
||||
#else
|
||||
GETCHARLENTEST(c, posix, clength);
|
||||
#endif
|
||||
posix += clength;
|
||||
plength -= clength;
|
||||
|
||||
sc = nextisliteral? 0 : c;
|
||||
nextisliteral = FALSE;
|
||||
|
||||
/* Handle a character within a class. */
|
||||
|
||||
if (posix_state >= POSIX_CLASS_NOT_STARTED)
|
||||
{
|
||||
if (c == CHAR_RIGHT_SQUARE_BRACKET)
|
||||
{
|
||||
PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
|
||||
posix_state = POSIX_NOT_BRACKET;
|
||||
}
|
||||
|
||||
/* Not the end of the class */
|
||||
|
||||
else
|
||||
{
|
||||
switch (posix_state)
|
||||
{
|
||||
case POSIX_CLASS_STARTED:
|
||||
if (c <= 127 && islower(c)) break; /* Remain in started state */
|
||||
posix_state = POSIX_CLASS_NOT_STARTED;
|
||||
if (c == CHAR_COLON && plength > 0 &&
|
||||
*posix == CHAR_RIGHT_SQUARE_BRACKET)
|
||||
{
|
||||
PUTCHARS(STR_COLON_RIGHT_SQUARE_BRACKET);
|
||||
plength--;
|
||||
posix++;
|
||||
continue; /* With next character after :] */
|
||||
}
|
||||
/* Fall through */
|
||||
|
||||
case POSIX_CLASS_NOT_STARTED:
|
||||
if (c == CHAR_LEFT_SQUARE_BRACKET)
|
||||
posix_state = POSIX_CLASS_STARTING;
|
||||
break;
|
||||
|
||||
case POSIX_CLASS_STARTING:
|
||||
if (c == CHAR_COLON) posix_state = POSIX_CLASS_STARTED;
|
||||
break;
|
||||
}
|
||||
|
||||
if (c == CHAR_BACKSLASH) PUTCHARS(STR_BACKSLASH);
|
||||
if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
|
||||
memcpy(p, posix - clength, CU2BYTES(clength));
|
||||
p += clength;
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle a character not within a class. */
|
||||
|
||||
else switch(sc)
|
||||
{
|
||||
case CHAR_LEFT_SQUARE_BRACKET:
|
||||
PUTCHARS(STR_LEFT_SQUARE_BRACKET);
|
||||
|
||||
#ifdef NEVER
|
||||
/* We could handle special cases [[:<:]] and [[:>:]] (which PCRE does
|
||||
support) but they are not part of POSIX 1003.1. */
|
||||
|
||||
if (plength >= 6)
|
||||
{
|
||||
if (posix[0] == CHAR_LEFT_SQUARE_BRACKET &&
|
||||
posix[1] == CHAR_COLON &&
|
||||
(posix[2] == CHAR_LESS_THAN_SIGN ||
|
||||
posix[2] == CHAR_GREATER_THAN_SIGN) &&
|
||||
posix[3] == CHAR_COLON &&
|
||||
posix[4] == CHAR_RIGHT_SQUARE_BRACKET &&
|
||||
posix[5] == CHAR_RIGHT_SQUARE_BRACKET)
|
||||
{
|
||||
if (p + 6 > endp) return PCRE2_ERROR_NOMEMORY;
|
||||
memcpy(p, posix, CU2BYTES(6));
|
||||
p += 6;
|
||||
posix += 6;
|
||||
plength -= 6;
|
||||
continue; /* With next character */
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Handle start of "normal" character classes */
|
||||
|
||||
posix_state = POSIX_CLASS_NOT_STARTED;
|
||||
|
||||
/* Handle ^ and ] as first characters */
|
||||
|
||||
if (plength > 0)
|
||||
{
|
||||
if (*posix == CHAR_CIRCUMFLEX_ACCENT)
|
||||
{
|
||||
posix++;
|
||||
plength--;
|
||||
PUTCHARS(STR_CIRCUMFLEX_ACCENT);
|
||||
}
|
||||
if (plength > 0 && *posix == CHAR_RIGHT_SQUARE_BRACKET)
|
||||
{
|
||||
posix++;
|
||||
plength--;
|
||||
PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case CHAR_BACKSLASH:
|
||||
if (plength == 0) return PCRE2_ERROR_END_BACKSLASH;
|
||||
if (extended) nextisliteral = TRUE; else
|
||||
{
|
||||
if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL)
|
||||
{
|
||||
if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
|
||||
if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
|
||||
lastspecial = *p++ = *posix++;
|
||||
plength--;
|
||||
}
|
||||
else nextisliteral = TRUE;
|
||||
}
|
||||
break;
|
||||
|
||||
case CHAR_RIGHT_PARENTHESIS:
|
||||
if (!extended || bracount == 0) goto ESCAPE_LITERAL;
|
||||
bracount--;
|
||||
goto COPY_SPECIAL;
|
||||
|
||||
case CHAR_LEFT_PARENTHESIS:
|
||||
bracount++;
|
||||
/* Fall through */
|
||||
|
||||
case CHAR_QUESTION_MARK:
|
||||
case CHAR_PLUS:
|
||||
case CHAR_LEFT_CURLY_BRACKET:
|
||||
case CHAR_RIGHT_CURLY_BRACKET:
|
||||
case CHAR_VERTICAL_LINE:
|
||||
if (!extended) goto ESCAPE_LITERAL;
|
||||
/* Fall through */
|
||||
|
||||
case CHAR_DOT:
|
||||
case CHAR_DOLLAR_SIGN:
|
||||
posix_state = POSIX_NOT_BRACKET;
|
||||
COPY_SPECIAL:
|
||||
lastspecial = c;
|
||||
if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
|
||||
*p++ = c;
|
||||
break;
|
||||
|
||||
case CHAR_ASTERISK:
|
||||
if (lastspecial != CHAR_ASTERISK)
|
||||
{
|
||||
if (!extended && (posix_state < POSIX_NOT_BRACKET ||
|
||||
lastspecial == CHAR_LEFT_PARENTHESIS))
|
||||
goto ESCAPE_LITERAL;
|
||||
goto COPY_SPECIAL;
|
||||
}
|
||||
break; /* Ignore second and subsequent asterisks */
|
||||
|
||||
case CHAR_CIRCUMFLEX_ACCENT:
|
||||
if (extended) goto COPY_SPECIAL;
|
||||
if (posix_state == POSIX_START_REGEX ||
|
||||
lastspecial == CHAR_LEFT_PARENTHESIS)
|
||||
{
|
||||
posix_state = POSIX_ANCHORED;
|
||||
goto COPY_SPECIAL;
|
||||
}
|
||||
/* Fall through */
|
||||
|
||||
default:
|
||||
if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
|
||||
{
|
||||
ESCAPE_LITERAL:
|
||||
PUTCHARS(STR_BACKSLASH);
|
||||
}
|
||||
lastspecial = 0xff; /* Indicates nothing special */
|
||||
if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
|
||||
memcpy(p, posix - clength, CU2BYTES(clength));
|
||||
p += clength;
|
||||
posix_state = POSIX_NOT_BRACKET;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (posix_state >= POSIX_CLASS_NOT_STARTED)
|
||||
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
|
||||
convlength += p - pp; /* Final segment */
|
||||
*bufflenptr = convlength;
|
||||
*p++ = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Convert a glob pattern *
|
||||
*************************************************/
|
||||
|
||||
/* Context for writing the output into a buffer. */
|
||||
|
||||
typedef struct pcre2_output_context {
|
||||
PCRE2_UCHAR *output; /* current output position */
|
||||
PCRE2_SPTR output_end; /* output end */
|
||||
PCRE2_SIZE output_size; /* size of the output */
|
||||
uint8_t out_str[8]; /* string copied to the output */
|
||||
} pcre2_output_context;
|
||||
|
||||
|
||||
/* Write a character into the output.
|
||||
|
||||
Arguments:
|
||||
out output context
|
||||
chr the next character
|
||||
*/
|
||||
|
||||
static void
|
||||
convert_glob_write(pcre2_output_context *out, PCRE2_UCHAR chr)
|
||||
{
|
||||
out->output_size++;
|
||||
|
||||
if (out->output < out->output_end)
|
||||
*out->output++ = chr;
|
||||
}
|
||||
|
||||
|
||||
/* Write a string into the output.
|
||||
|
||||
Arguments:
|
||||
out output context
|
||||
length length of out->out_str
|
||||
*/
|
||||
|
||||
static void
|
||||
convert_glob_write_str(pcre2_output_context *out, PCRE2_SIZE length)
|
||||
{
|
||||
uint8_t *out_str = out->out_str;
|
||||
PCRE2_UCHAR *output = out->output;
|
||||
PCRE2_SPTR output_end = out->output_end;
|
||||
PCRE2_SIZE output_size = out->output_size;
|
||||
|
||||
do
|
||||
{
|
||||
output_size++;
|
||||
|
||||
if (output < output_end)
|
||||
*output++ = *out_str++;
|
||||
}
|
||||
while (--length != 0);
|
||||
|
||||
out->output = output;
|
||||
out->output_size = output_size;
|
||||
}
|
||||
|
||||
|
||||
/* Prints the separator into the output.
|
||||
|
||||
Arguments:
|
||||
out output context
|
||||
separator glob separator
|
||||
with_escape backslash is needed before separator
|
||||
*/
|
||||
|
||||
static void
|
||||
convert_glob_print_separator(pcre2_output_context *out,
|
||||
PCRE2_UCHAR separator, BOOL with_escape)
|
||||
{
|
||||
if (with_escape)
|
||||
convert_glob_write(out, CHAR_BACKSLASH);
|
||||
|
||||
convert_glob_write(out, separator);
|
||||
}
|
||||
|
||||
|
||||
/* Prints a wildcard into the output.
|
||||
|
||||
Arguments:
|
||||
out output context
|
||||
separator glob separator
|
||||
with_escape backslash is needed before separator
|
||||
*/
|
||||
|
||||
static void
|
||||
convert_glob_print_wildcard(pcre2_output_context *out,
|
||||
PCRE2_UCHAR separator, BOOL with_escape)
|
||||
{
|
||||
out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
|
||||
out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
|
||||
convert_glob_write_str(out, 2);
|
||||
|
||||
convert_glob_print_separator(out, separator, with_escape);
|
||||
|
||||
convert_glob_write(out, CHAR_RIGHT_SQUARE_BRACKET);
|
||||
}
|
||||
|
||||
|
||||
/* Parse a posix class.
|
||||
|
||||
Arguments:
|
||||
from starting point of scanning the range
|
||||
pattern_end end of pattern
|
||||
out output context
|
||||
|
||||
Returns: >0 => class index
|
||||
0 => malformed class
|
||||
*/
|
||||
|
||||
static int
|
||||
convert_glob_parse_class(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
|
||||
pcre2_output_context *out)
|
||||
{
|
||||
static const char *posix_classes = "alnum:alpha:ascii:blank:cntrl:digit:"
|
||||
"graph:lower:print:punct:space:upper:word:xdigit:";
|
||||
PCRE2_SPTR start = *from + 1;
|
||||
PCRE2_SPTR pattern = start;
|
||||
const char *class_ptr;
|
||||
PCRE2_UCHAR c;
|
||||
int class_index;
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
if (pattern >= pattern_end) return 0;
|
||||
|
||||
c = *pattern++;
|
||||
|
||||
if (c < CHAR_a || c > CHAR_z) break;
|
||||
}
|
||||
|
||||
if (c != CHAR_COLON || pattern >= pattern_end ||
|
||||
*pattern != CHAR_RIGHT_SQUARE_BRACKET)
|
||||
return 0;
|
||||
|
||||
class_ptr = posix_classes;
|
||||
class_index = 1;
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
if (*class_ptr == CHAR_NUL) return 0;
|
||||
|
||||
pattern = start;
|
||||
|
||||
while (*pattern == (PCRE2_UCHAR) *class_ptr)
|
||||
{
|
||||
if (*pattern == CHAR_COLON)
|
||||
{
|
||||
pattern += 2;
|
||||
start -= 2;
|
||||
|
||||
do convert_glob_write(out, *start++); while (start < pattern);
|
||||
|
||||
*from = pattern;
|
||||
return class_index;
|
||||
}
|
||||
pattern++;
|
||||
class_ptr++;
|
||||
}
|
||||
|
||||
while (*class_ptr != CHAR_COLON) class_ptr++;
|
||||
class_ptr++;
|
||||
class_index++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Checks whether the character is in the class.
|
||||
|
||||
Arguments:
|
||||
class_index class index
|
||||
c character
|
||||
|
||||
Returns: !0 => character is found in the class
|
||||
0 => otherwise
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
convert_glob_char_in_class(int class_index, PCRE2_UCHAR c)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (c > 0xff)
|
||||
{
|
||||
/* ctype functions are not sane for c > 0xff */
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
switch (class_index)
|
||||
{
|
||||
case 1: return isalnum(c);
|
||||
case 2: return isalpha(c);
|
||||
case 3: return 1;
|
||||
case 4: return c == CHAR_HT || c == CHAR_SPACE;
|
||||
case 5: return iscntrl(c);
|
||||
case 6: return isdigit(c);
|
||||
case 7: return isgraph(c);
|
||||
case 8: return islower(c);
|
||||
case 9: return isprint(c);
|
||||
case 10: return ispunct(c);
|
||||
case 11: return isspace(c);
|
||||
case 12: return isupper(c);
|
||||
case 13: return isalnum(c) || c == CHAR_UNDERSCORE;
|
||||
default: return isxdigit(c);
|
||||
}
|
||||
}
|
||||
|
||||
/* Parse a range of characters.
|
||||
|
||||
Arguments:
|
||||
from starting point of scanning the range
|
||||
pattern_end end of pattern
|
||||
out output context
|
||||
separator glob separator
|
||||
with_escape backslash is needed before separator
|
||||
|
||||
Returns: 0 => success
|
||||
!0 => error code
|
||||
*/
|
||||
|
||||
static int
|
||||
convert_glob_parse_range(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
|
||||
pcre2_output_context *out, BOOL utf, PCRE2_UCHAR separator,
|
||||
BOOL with_escape, PCRE2_UCHAR escape, BOOL no_wildsep)
|
||||
{
|
||||
BOOL is_negative = FALSE;
|
||||
BOOL separator_seen = FALSE;
|
||||
BOOL has_prev_c;
|
||||
PCRE2_SPTR pattern = *from;
|
||||
PCRE2_SPTR char_start = NULL;
|
||||
uint32_t c, prev_c;
|
||||
int len, class_index;
|
||||
|
||||
(void)utf; /* Avoid compiler warning. */
|
||||
|
||||
if (pattern >= pattern_end)
|
||||
{
|
||||
*from = pattern;
|
||||
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
|
||||
}
|
||||
|
||||
if (*pattern == CHAR_EXCLAMATION_MARK
|
||||
|| *pattern == CHAR_CIRCUMFLEX_ACCENT)
|
||||
{
|
||||
pattern++;
|
||||
|
||||
if (pattern >= pattern_end)
|
||||
{
|
||||
*from = pattern;
|
||||
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
|
||||
}
|
||||
|
||||
is_negative = TRUE;
|
||||
|
||||
out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
|
||||
out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
|
||||
len = 2;
|
||||
|
||||
if (!no_wildsep)
|
||||
{
|
||||
if (with_escape)
|
||||
{
|
||||
out->out_str[len] = CHAR_BACKSLASH;
|
||||
len++;
|
||||
}
|
||||
out->out_str[len] = (uint8_t) separator;
|
||||
}
|
||||
|
||||
convert_glob_write_str(out, len + 1);
|
||||
}
|
||||
else
|
||||
convert_glob_write(out, CHAR_LEFT_SQUARE_BRACKET);
|
||||
|
||||
has_prev_c = FALSE;
|
||||
prev_c = 0;
|
||||
|
||||
if (*pattern == CHAR_RIGHT_SQUARE_BRACKET)
|
||||
{
|
||||
out->out_str[0] = CHAR_BACKSLASH;
|
||||
out->out_str[1] = CHAR_RIGHT_SQUARE_BRACKET;
|
||||
convert_glob_write_str(out, 2);
|
||||
has_prev_c = TRUE;
|
||||
prev_c = CHAR_RIGHT_SQUARE_BRACKET;
|
||||
pattern++;
|
||||
}
|
||||
|
||||
while (pattern < pattern_end)
|
||||
{
|
||||
char_start = pattern;
|
||||
GETCHARINCTEST(c, pattern);
|
||||
|
||||
if (c == CHAR_RIGHT_SQUARE_BRACKET)
|
||||
{
|
||||
convert_glob_write(out, c);
|
||||
|
||||
if (!is_negative && !no_wildsep && separator_seen)
|
||||
{
|
||||
out->out_str[0] = CHAR_LEFT_PARENTHESIS;
|
||||
out->out_str[1] = CHAR_QUESTION_MARK;
|
||||
out->out_str[2] = CHAR_LESS_THAN_SIGN;
|
||||
out->out_str[3] = CHAR_EXCLAMATION_MARK;
|
||||
convert_glob_write_str(out, 4);
|
||||
|
||||
convert_glob_print_separator(out, separator, with_escape);
|
||||
convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
|
||||
}
|
||||
|
||||
*from = pattern;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (pattern >= pattern_end) break;
|
||||
|
||||
if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
|
||||
{
|
||||
*from = pattern;
|
||||
class_index = convert_glob_parse_class(from, pattern_end, out);
|
||||
|
||||
if (class_index != 0)
|
||||
{
|
||||
pattern = *from;
|
||||
|
||||
has_prev_c = FALSE;
|
||||
prev_c = 0;
|
||||
|
||||
if (!is_negative &&
|
||||
convert_glob_char_in_class (class_index, separator))
|
||||
separator_seen = TRUE;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (c == CHAR_MINUS && has_prev_c &&
|
||||
*pattern != CHAR_RIGHT_SQUARE_BRACKET)
|
||||
{
|
||||
convert_glob_write(out, CHAR_MINUS);
|
||||
|
||||
char_start = pattern;
|
||||
GETCHARINCTEST(c, pattern);
|
||||
|
||||
if (pattern >= pattern_end) break;
|
||||
|
||||
if (escape != 0 && c == escape)
|
||||
{
|
||||
char_start = pattern;
|
||||
GETCHARINCTEST(c, pattern);
|
||||
}
|
||||
else if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
|
||||
{
|
||||
*from = pattern;
|
||||
return PCRE2_ERROR_CONVERT_SYNTAX;
|
||||
}
|
||||
|
||||
if (prev_c > c)
|
||||
{
|
||||
*from = pattern;
|
||||
return PCRE2_ERROR_CONVERT_SYNTAX;
|
||||
}
|
||||
|
||||
if (prev_c < separator && separator < c) separator_seen = TRUE;
|
||||
|
||||
has_prev_c = FALSE;
|
||||
prev_c = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (escape != 0 && c == escape)
|
||||
{
|
||||
char_start = pattern;
|
||||
GETCHARINCTEST(c, pattern);
|
||||
|
||||
if (pattern >= pattern_end) break;
|
||||
}
|
||||
|
||||
has_prev_c = TRUE;
|
||||
prev_c = c;
|
||||
}
|
||||
|
||||
if (c == CHAR_LEFT_SQUARE_BRACKET || c == CHAR_RIGHT_SQUARE_BRACKET ||
|
||||
c == CHAR_BACKSLASH || c == CHAR_MINUS)
|
||||
convert_glob_write(out, CHAR_BACKSLASH);
|
||||
|
||||
if (c == separator) separator_seen = TRUE;
|
||||
|
||||
do convert_glob_write(out, *char_start++); while (char_start < pattern);
|
||||
}
|
||||
|
||||
*from = pattern;
|
||||
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
|
||||
}
|
||||
|
||||
|
||||
/* Prints a (*COMMIT) into the output.
|
||||
|
||||
Arguments:
|
||||
out output context
|
||||
*/
|
||||
|
||||
static void
|
||||
convert_glob_print_commit(pcre2_output_context *out)
|
||||
{
|
||||
out->out_str[0] = CHAR_LEFT_PARENTHESIS;
|
||||
out->out_str[1] = CHAR_ASTERISK;
|
||||
out->out_str[2] = CHAR_C;
|
||||
out->out_str[3] = CHAR_O;
|
||||
out->out_str[4] = CHAR_M;
|
||||
out->out_str[5] = CHAR_M;
|
||||
out->out_str[6] = CHAR_I;
|
||||
out->out_str[7] = CHAR_T;
|
||||
convert_glob_write_str(out, 8);
|
||||
convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
|
||||
}
|
||||
|
||||
|
||||
/* Bash glob converter.
|
||||
|
||||
Arguments:
|
||||
pattype the pattern type
|
||||
pattern the pattern
|
||||
plength length in code units
|
||||
utf TRUE if UTF
|
||||
use_buffer where to put the output
|
||||
use_length length of use_buffer
|
||||
bufflenptr where to put the used length
|
||||
dummyrun TRUE if a dummy run
|
||||
ccontext the convert context
|
||||
|
||||
Returns: 0 => success
|
||||
!0 => error code
|
||||
*/
|
||||
|
||||
static int
|
||||
convert_glob(uint32_t options, PCRE2_SPTR pattern, PCRE2_SIZE plength,
|
||||
BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
|
||||
PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
|
||||
{
|
||||
pcre2_output_context out;
|
||||
PCRE2_SPTR pattern_start = pattern;
|
||||
PCRE2_SPTR pattern_end = pattern + plength;
|
||||
PCRE2_UCHAR separator = ccontext->glob_separator;
|
||||
PCRE2_UCHAR escape = ccontext->glob_escape;
|
||||
PCRE2_UCHAR c;
|
||||
BOOL no_wildsep = (options & PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR) != 0;
|
||||
BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0;
|
||||
BOOL in_atomic = FALSE;
|
||||
BOOL after_starstar = FALSE;
|
||||
BOOL no_slash_z = FALSE;
|
||||
BOOL with_escape, is_start, after_separator;
|
||||
int result = 0;
|
||||
|
||||
(void)utf; /* Avoid compiler warning. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (separator >= 128 || escape >= 128))
|
||||
{
|
||||
/* Currently only ASCII characters are supported. */
|
||||
*bufflenptr = 0;
|
||||
return PCRE2_ERROR_CONVERT_SYNTAX;
|
||||
}
|
||||
#endif
|
||||
|
||||
with_escape = strchr(pcre2_escaped_literals, separator) != NULL;
|
||||
|
||||
/* Initialize default for error offset as end of input. */
|
||||
out.output = use_buffer;
|
||||
out.output_end = use_buffer + use_length;
|
||||
out.output_size = 0;
|
||||
|
||||
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
|
||||
out.out_str[1] = CHAR_QUESTION_MARK;
|
||||
out.out_str[2] = CHAR_s;
|
||||
out.out_str[3] = CHAR_RIGHT_PARENTHESIS;
|
||||
convert_glob_write_str(&out, 4);
|
||||
|
||||
is_start = TRUE;
|
||||
|
||||
if (pattern < pattern_end && pattern[0] == CHAR_ASTERISK)
|
||||
{
|
||||
if (no_wildsep)
|
||||
is_start = FALSE;
|
||||
else if (!no_starstar && pattern + 1 < pattern_end &&
|
||||
pattern[1] == CHAR_ASTERISK)
|
||||
is_start = FALSE;
|
||||
}
|
||||
|
||||
if (is_start)
|
||||
{
|
||||
out.out_str[0] = CHAR_BACKSLASH;
|
||||
out.out_str[1] = CHAR_A;
|
||||
convert_glob_write_str(&out, 2);
|
||||
}
|
||||
|
||||
while (pattern < pattern_end)
|
||||
{
|
||||
c = *pattern++;
|
||||
|
||||
if (c == CHAR_ASTERISK)
|
||||
{
|
||||
is_start = pattern == pattern_start + 1;
|
||||
|
||||
if (in_atomic)
|
||||
{
|
||||
convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
|
||||
in_atomic = FALSE;
|
||||
}
|
||||
|
||||
if (!no_starstar && pattern < pattern_end && *pattern == CHAR_ASTERISK)
|
||||
{
|
||||
after_separator = is_start || (pattern[-2] == separator);
|
||||
|
||||
do pattern++; while (pattern < pattern_end &&
|
||||
*pattern == CHAR_ASTERISK);
|
||||
|
||||
if (pattern >= pattern_end)
|
||||
{
|
||||
no_slash_z = TRUE;
|
||||
break;
|
||||
}
|
||||
|
||||
after_starstar = TRUE;
|
||||
|
||||
if (after_separator && escape != 0 && *pattern == escape &&
|
||||
pattern + 1 < pattern_end && pattern[1] == separator)
|
||||
pattern++;
|
||||
|
||||
if (is_start)
|
||||
{
|
||||
if (*pattern != separator) continue;
|
||||
|
||||
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
|
||||
out.out_str[1] = CHAR_QUESTION_MARK;
|
||||
out.out_str[2] = CHAR_COLON;
|
||||
out.out_str[3] = CHAR_BACKSLASH;
|
||||
out.out_str[4] = CHAR_A;
|
||||
out.out_str[5] = CHAR_VERTICAL_LINE;
|
||||
convert_glob_write_str(&out, 6);
|
||||
|
||||
convert_glob_print_separator(&out, separator, with_escape);
|
||||
convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
|
||||
|
||||
pattern++;
|
||||
continue;
|
||||
}
|
||||
|
||||
convert_glob_print_commit(&out);
|
||||
|
||||
if (!after_separator || *pattern != separator)
|
||||
{
|
||||
out.out_str[0] = CHAR_DOT;
|
||||
out.out_str[1] = CHAR_ASTERISK;
|
||||
out.out_str[2] = CHAR_QUESTION_MARK;
|
||||
convert_glob_write_str(&out, 3);
|
||||
continue;
|
||||
}
|
||||
|
||||
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
|
||||
out.out_str[1] = CHAR_QUESTION_MARK;
|
||||
out.out_str[2] = CHAR_COLON;
|
||||
out.out_str[3] = CHAR_DOT;
|
||||
out.out_str[4] = CHAR_ASTERISK;
|
||||
out.out_str[5] = CHAR_QUESTION_MARK;
|
||||
|
||||
convert_glob_write_str(&out, 6);
|
||||
|
||||
convert_glob_print_separator(&out, separator, with_escape);
|
||||
|
||||
out.out_str[0] = CHAR_RIGHT_PARENTHESIS;
|
||||
out.out_str[1] = CHAR_QUESTION_MARK;
|
||||
out.out_str[2] = CHAR_QUESTION_MARK;
|
||||
convert_glob_write_str(&out, 3);
|
||||
|
||||
pattern++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pattern < pattern_end && *pattern == CHAR_ASTERISK)
|
||||
{
|
||||
do pattern++; while (pattern < pattern_end &&
|
||||
*pattern == CHAR_ASTERISK);
|
||||
}
|
||||
|
||||
if (no_wildsep)
|
||||
{
|
||||
if (pattern >= pattern_end)
|
||||
{
|
||||
no_slash_z = TRUE;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Start check must be after the end check. */
|
||||
if (is_start) continue;
|
||||
}
|
||||
|
||||
if (!is_start)
|
||||
{
|
||||
if (after_starstar)
|
||||
{
|
||||
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
|
||||
out.out_str[1] = CHAR_QUESTION_MARK;
|
||||
out.out_str[2] = CHAR_GREATER_THAN_SIGN;
|
||||
convert_glob_write_str(&out, 3);
|
||||
in_atomic = TRUE;
|
||||
}
|
||||
else
|
||||
convert_glob_print_commit(&out);
|
||||
}
|
||||
|
||||
if (no_wildsep)
|
||||
convert_glob_write(&out, CHAR_DOT);
|
||||
else
|
||||
convert_glob_print_wildcard(&out, separator, with_escape);
|
||||
|
||||
out.out_str[0] = CHAR_ASTERISK;
|
||||
out.out_str[1] = CHAR_QUESTION_MARK;
|
||||
if (pattern >= pattern_end)
|
||||
out.out_str[1] = CHAR_PLUS;
|
||||
convert_glob_write_str(&out, 2);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == CHAR_QUESTION_MARK)
|
||||
{
|
||||
if (no_wildsep)
|
||||
convert_glob_write(&out, CHAR_DOT);
|
||||
else
|
||||
convert_glob_print_wildcard(&out, separator, with_escape);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == CHAR_LEFT_SQUARE_BRACKET)
|
||||
{
|
||||
result = convert_glob_parse_range(&pattern, pattern_end,
|
||||
&out, utf, separator, with_escape, escape, no_wildsep);
|
||||
if (result != 0) break;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (escape != 0 && c == escape)
|
||||
{
|
||||
if (pattern >= pattern_end)
|
||||
{
|
||||
result = PCRE2_ERROR_CONVERT_SYNTAX;
|
||||
break;
|
||||
}
|
||||
c = *pattern++;
|
||||
}
|
||||
|
||||
if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
|
||||
convert_glob_write(&out, CHAR_BACKSLASH);
|
||||
|
||||
convert_glob_write(&out, c);
|
||||
}
|
||||
|
||||
if (result == 0)
|
||||
{
|
||||
if (!no_slash_z)
|
||||
{
|
||||
out.out_str[0] = CHAR_BACKSLASH;
|
||||
out.out_str[1] = CHAR_z;
|
||||
convert_glob_write_str(&out, 2);
|
||||
}
|
||||
|
||||
if (in_atomic)
|
||||
convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
|
||||
|
||||
convert_glob_write(&out, CHAR_NUL);
|
||||
|
||||
if (!dummyrun && out.output_size != (PCRE2_SIZE) (out.output - use_buffer))
|
||||
result = PCRE2_ERROR_NOMEMORY;
|
||||
}
|
||||
|
||||
if (result != 0)
|
||||
{
|
||||
*bufflenptr = pattern - pattern_start;
|
||||
return result;
|
||||
}
|
||||
|
||||
*bufflenptr = out.output_size - 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Convert pattern *
|
||||
*************************************************/
|
||||
|
||||
/* This is the external-facing function for converting other forms of pattern
|
||||
into PCRE2 regular expression patterns. On error, the bufflenptr argument is
|
||||
used to return an offset in the original pattern.
|
||||
|
||||
Arguments:
|
||||
pattern the input pattern
|
||||
plength length of input, or PCRE2_ZERO_TERMINATED
|
||||
options options bits
|
||||
buffptr pointer to pointer to output buffer
|
||||
bufflenptr pointer to length of output buffer
|
||||
ccontext convert context or NULL
|
||||
|
||||
Returns: 0 for success, else an error code (+ve or -ve)
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE plength, uint32_t options,
|
||||
PCRE2_UCHAR **buffptr, PCRE2_SIZE *bufflenptr,
|
||||
pcre2_convert_context *ccontext)
|
||||
{
|
||||
int rc;
|
||||
PCRE2_UCHAR dummy_buffer[DUMMY_BUFFER_SIZE];
|
||||
PCRE2_UCHAR *use_buffer = dummy_buffer;
|
||||
PCRE2_SIZE use_length = DUMMY_BUFFER_SIZE;
|
||||
BOOL utf = (options & PCRE2_CONVERT_UTF) != 0;
|
||||
uint32_t pattype = options & TYPE_OPTIONS;
|
||||
|
||||
if (pattern == NULL || bufflenptr == NULL) return PCRE2_ERROR_NULL;
|
||||
|
||||
if ((options & ~ALL_OPTIONS) != 0 || /* Undefined bit set */
|
||||
(pattype & (~pattype+1)) != pattype || /* More than one type set */
|
||||
pattype == 0) /* No type set */
|
||||
{
|
||||
*bufflenptr = 0; /* Error offset */
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
if (plength == PCRE2_ZERO_TERMINATED) plength = PRIV(strlen)(pattern);
|
||||
if (ccontext == NULL) ccontext =
|
||||
(pcre2_convert_context *)(&PRIV(default_convert_context));
|
||||
|
||||
/* Check UTF if required. */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
*bufflenptr = 0; /* Error offset */
|
||||
return PCRE2_ERROR_UNICODE_NOT_SUPPORTED;
|
||||
}
|
||||
#else
|
||||
if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0)
|
||||
{
|
||||
PCRE2_SIZE erroroffset;
|
||||
rc = PRIV(valid_utf)(pattern, plength, &erroroffset);
|
||||
if (rc != 0)
|
||||
{
|
||||
*bufflenptr = erroroffset;
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* If buffptr is not NULL, and what it points to is not NULL, we are being
|
||||
provided with a buffer and a length, so set them as the buffer to use. */
|
||||
|
||||
if (buffptr != NULL && *buffptr != NULL)
|
||||
{
|
||||
use_buffer = *buffptr;
|
||||
use_length = *bufflenptr;
|
||||
}
|
||||
|
||||
/* Call an individual converter, either just once (if a buffer was provided or
|
||||
just the length is needed), or twice (if a memory allocation is required). */
|
||||
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
PCRE2_UCHAR *allocated;
|
||||
BOOL dummyrun = buffptr == NULL || *buffptr == NULL;
|
||||
|
||||
switch(pattype)
|
||||
{
|
||||
case PCRE2_CONVERT_GLOB:
|
||||
rc = convert_glob(options & ~PCRE2_CONVERT_GLOB, pattern, plength, utf,
|
||||
use_buffer, use_length, bufflenptr, dummyrun, ccontext);
|
||||
break;
|
||||
|
||||
case PCRE2_CONVERT_POSIX_BASIC:
|
||||
case PCRE2_CONVERT_POSIX_EXTENDED:
|
||||
rc = convert_posix(pattype, pattern, plength, utf, use_buffer, use_length,
|
||||
bufflenptr, dummyrun, ccontext);
|
||||
break;
|
||||
|
||||
default:
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
if (rc != 0 || /* Error */
|
||||
buffptr == NULL || /* Just the length is required */
|
||||
*buffptr != NULL) /* Buffer was provided or allocated */
|
||||
return rc;
|
||||
|
||||
/* Allocate memory for the buffer, with hidden space for an allocator at
|
||||
the start. The next time round the loop runs the conversion for real. */
|
||||
|
||||
allocated = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
|
||||
(*bufflenptr + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)ccontext);
|
||||
if (allocated == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
*buffptr = (PCRE2_UCHAR *)(((char *)allocated) + sizeof(pcre2_memctl));
|
||||
|
||||
use_buffer = *buffptr;
|
||||
use_length = *bufflenptr + 1;
|
||||
}
|
||||
|
||||
/* Something went terribly wrong. Trigger an assert and return an error */
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
|
||||
EXIT:
|
||||
|
||||
*bufflenptr = 0; /* Error offset */
|
||||
return PCRE2_ERROR_INTERNAL;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free converted pattern *
|
||||
*************************************************/
|
||||
|
||||
/* This frees a converted pattern that was put in newly-allocated memory.
|
||||
|
||||
Argument: the converted pattern
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_converted_pattern_free(PCRE2_UCHAR *converted)
|
||||
{
|
||||
if (converted != NULL)
|
||||
{
|
||||
pcre2_memctl *memctl =
|
||||
(pcre2_memctl *)((char *)converted - sizeof(pcre2_memctl));
|
||||
memctl->free(memctl, memctl->memory_data);
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_convert.c */
|
||||
4110
3rd/pcre2/src/pcre2_dfa_match.c
Normal file
4110
3rd/pcre2/src/pcre2_dfa_match.c
Normal file
@@ -0,0 +1,4110 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre2_dfa_match(), which is an
|
||||
alternative matching function that uses a sort of DFA algorithm (not a true
|
||||
FSM). This is NOT Perl-compatible, but it has advantages in certain
|
||||
applications. */
|
||||
|
||||
|
||||
/* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
|
||||
the performance of his patterns greatly. I could not use it as it stood, as it
|
||||
was not thread safe, and made assumptions about pattern sizes. Also, it caused
|
||||
test 7 to loop, and test 9 to crash with a segfault.
|
||||
|
||||
The issue is the check for duplicate states, which is done by a simple linear
|
||||
search up the state list. (Grep for "duplicate" below to find the code.) For
|
||||
many patterns, there will never be many states active at one time, so a simple
|
||||
linear search is fine. In patterns that have many active states, it might be a
|
||||
bottleneck. The suggested code used an indexing scheme to remember which states
|
||||
had previously been used for each character, and avoided the linear search when
|
||||
it knew there was no chance of a duplicate. This was implemented when adding
|
||||
states to the state lists.
|
||||
|
||||
I wrote some thread-safe, not-limited code to try something similar at the time
|
||||
of checking for duplicates (instead of when adding states), using index vectors
|
||||
on the stack. It did give a 13% improvement with one specially constructed
|
||||
pattern for certain subject strings, but on other strings and on many of the
|
||||
simpler patterns in the test suite it did worse. The major problem, I think,
|
||||
was the extra time to initialize the index. This had to be done for each call
|
||||
of internal_dfa_match(). (The supplied patch used a static vector, initialized
|
||||
only once - I suspect this was the cause of the problems with the tests.)
|
||||
|
||||
Overall, I concluded that the gains in some cases did not outweigh the losses
|
||||
in others, so I abandoned this code. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#define NLBLOCK mb /* Block containing newline information */
|
||||
#define PSSTART start_subject /* Field containing processed string start */
|
||||
#define PSEND end_subject /* Field containing processed string end */
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#define PUBLIC_DFA_MATCH_OPTIONS \
|
||||
(PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
|
||||
PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
|
||||
PCRE2_PARTIAL_SOFT|PCRE2_DFA_SHORTEST|PCRE2_DFA_RESTART| \
|
||||
PCRE2_COPY_MATCHED_SUBJECT)
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Code parameters and static tables *
|
||||
*************************************************/
|
||||
|
||||
/* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
|
||||
into others, under special conditions. A gap of 20 between the blocks should be
|
||||
enough. The resulting opcodes don't have to be less than 256 because they are
|
||||
never stored, so we push them well clear of the normal opcodes. */
|
||||
|
||||
#define OP_PROP_EXTRA 300
|
||||
#define OP_EXTUNI_EXTRA 320
|
||||
#define OP_ANYNL_EXTRA 340
|
||||
#define OP_HSPACE_EXTRA 360
|
||||
#define OP_VSPACE_EXTRA 380
|
||||
|
||||
|
||||
/* This table identifies those opcodes that are followed immediately by a
|
||||
character that is to be tested in some way. This makes it possible to
|
||||
centralize the loading of these characters. In the case of Type * etc, the
|
||||
"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
|
||||
small value. Non-zero values in the table are the offsets from the opcode where
|
||||
the character is to be found. ***NOTE*** If the start of this table is
|
||||
modified, the three tables that follow must also be modified. */
|
||||
|
||||
static const uint8_t coptable[] = {
|
||||
0, /* End */
|
||||
0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */
|
||||
0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */
|
||||
0, 0, 0, /* Any, AllAny, Anybyte */
|
||||
0, 0, /* \P, \p */
|
||||
0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
|
||||
0, /* \X */
|
||||
0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
|
||||
1, /* Char */
|
||||
1, /* Chari */
|
||||
1, /* not */
|
||||
1, /* noti */
|
||||
/* Positive single-char repeats */
|
||||
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
|
||||
1+IMM2_SIZE, 1+IMM2_SIZE, /* upto, minupto */
|
||||
1+IMM2_SIZE, /* exact */
|
||||
1, 1, 1, 1+IMM2_SIZE, /* *+, ++, ?+, upto+ */
|
||||
1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */
|
||||
1+IMM2_SIZE, 1+IMM2_SIZE, /* upto I, minupto I */
|
||||
1+IMM2_SIZE, /* exact I */
|
||||
1, 1, 1, 1+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */
|
||||
/* Negative single-char repeats - only for chars < 256 */
|
||||
1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
|
||||
1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto, minupto */
|
||||
1+IMM2_SIZE, /* NOT exact */
|
||||
1, 1, 1, 1+IMM2_SIZE, /* NOT *+, ++, ?+, upto+ */
|
||||
1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */
|
||||
1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto I, minupto I */
|
||||
1+IMM2_SIZE, /* NOT exact I */
|
||||
1, 1, 1, 1+IMM2_SIZE, /* NOT *+I, ++I, ?+I, upto+I */
|
||||
/* Positive type repeats */
|
||||
1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
|
||||
1+IMM2_SIZE, 1+IMM2_SIZE, /* Type upto, minupto */
|
||||
1+IMM2_SIZE, /* Type exact */
|
||||
1, 1, 1, 1+IMM2_SIZE, /* Type *+, ++, ?+, upto+ */
|
||||
/* Character class & ref repeats */
|
||||
0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */
|
||||
0, 0, /* CRRANGE, CRMINRANGE */
|
||||
0, 0, 0, 0, /* Possessive *+, ++, ?+, CRPOSRANGE */
|
||||
0, /* CLASS */
|
||||
0, /* NCLASS */
|
||||
0, /* XCLASS - variable length */
|
||||
0, /* ECLASS - variable length */
|
||||
0, /* REF */
|
||||
0, /* REFI */
|
||||
0, /* DNREF */
|
||||
0, /* DNREFI */
|
||||
0, /* RECURSE */
|
||||
0, /* CALLOUT */
|
||||
0, /* CALLOUT_STR */
|
||||
0, /* Alt */
|
||||
0, /* Ket */
|
||||
0, /* KetRmax */
|
||||
0, /* KetRmin */
|
||||
0, /* KetRpos */
|
||||
0, 0, /* Reverse, Vreverse */
|
||||
0, /* Assert */
|
||||
0, /* Assert not */
|
||||
0, /* Assert behind */
|
||||
0, /* Assert behind not */
|
||||
0, /* NA assert */
|
||||
0, /* NA assert behind */
|
||||
0, /* Assert scan substring */
|
||||
0, /* ONCE */
|
||||
0, /* SCRIPT_RUN */
|
||||
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
|
||||
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
|
||||
0, 0, /* CREF, DNCREF */
|
||||
0, 0, /* RREF, DNRREF */
|
||||
0, 0, /* FALSE, TRUE */
|
||||
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
|
||||
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
|
||||
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
|
||||
0, 0, /* COMMIT, COMMIT_ARG */
|
||||
0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */
|
||||
0, 0, 0, /* CLOSE, SKIPZERO, DEFINE */
|
||||
0, 0, /* \B and \b in UCP mode */
|
||||
};
|
||||
|
||||
/* This table identifies those opcodes that inspect a character. It is used to
|
||||
remember the fact that a character could have been inspected when the end of
|
||||
the subject is reached. ***NOTE*** If the start of this table is modified, the
|
||||
two tables that follow must also be modified. */
|
||||
|
||||
static const uint8_t poptable[] = {
|
||||
0, /* End */
|
||||
0, 0, 0, 1, 1, /* \A, \G, \K, \B, \b */
|
||||
1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */
|
||||
1, 1, 1, /* Any, AllAny, Anybyte */
|
||||
1, 1, /* \P, \p */
|
||||
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */
|
||||
1, /* \X */
|
||||
0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
|
||||
1, /* Char */
|
||||
1, /* Chari */
|
||||
1, /* not */
|
||||
1, /* noti */
|
||||
/* Positive single-char repeats */
|
||||
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
|
||||
1, 1, 1, /* upto, minupto, exact */
|
||||
1, 1, 1, 1, /* *+, ++, ?+, upto+ */
|
||||
1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */
|
||||
1, 1, 1, /* upto I, minupto I, exact I */
|
||||
1, 1, 1, 1, /* *+I, ++I, ?+I, upto+I */
|
||||
/* Negative single-char repeats - only for chars < 256 */
|
||||
1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
|
||||
1, 1, 1, /* NOT upto, minupto, exact */
|
||||
1, 1, 1, 1, /* NOT *+, ++, ?+, upto+ */
|
||||
1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */
|
||||
1, 1, 1, /* NOT upto I, minupto I, exact I */
|
||||
1, 1, 1, 1, /* NOT *+I, ++I, ?+I, upto+I */
|
||||
/* Positive type repeats */
|
||||
1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
|
||||
1, 1, 1, /* Type upto, minupto, exact */
|
||||
1, 1, 1, 1, /* Type *+, ++, ?+, upto+ */
|
||||
/* Character class & ref repeats */
|
||||
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
|
||||
1, 1, /* CRRANGE, CRMINRANGE */
|
||||
1, 1, 1, 1, /* Possessive *+, ++, ?+, CRPOSRANGE */
|
||||
1, /* CLASS */
|
||||
1, /* NCLASS */
|
||||
1, /* XCLASS - variable length */
|
||||
1, /* ECLASS - variable length */
|
||||
0, /* REF */
|
||||
0, /* REFI */
|
||||
0, /* DNREF */
|
||||
0, /* DNREFI */
|
||||
0, /* RECURSE */
|
||||
0, /* CALLOUT */
|
||||
0, /* CALLOUT_STR */
|
||||
0, /* Alt */
|
||||
0, /* Ket */
|
||||
0, /* KetRmax */
|
||||
0, /* KetRmin */
|
||||
0, /* KetRpos */
|
||||
0, 0, /* Reverse, Vreverse */
|
||||
0, /* Assert */
|
||||
0, /* Assert not */
|
||||
0, /* Assert behind */
|
||||
0, /* Assert behind not */
|
||||
0, /* NA assert */
|
||||
0, /* NA assert behind */
|
||||
0, /* Assert scan substring */
|
||||
0, /* ONCE */
|
||||
0, /* SCRIPT_RUN */
|
||||
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
|
||||
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
|
||||
0, 0, /* CREF, DNCREF */
|
||||
0, 0, /* RREF, DNRREF */
|
||||
0, 0, /* FALSE, TRUE */
|
||||
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
|
||||
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
|
||||
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
|
||||
0, 0, /* COMMIT, COMMIT_ARG */
|
||||
0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */
|
||||
0, 0, 0, /* CLOSE, SKIPZERO, DEFINE */
|
||||
1, 1, /* \B and \b in UCP mode */
|
||||
};
|
||||
|
||||
/* Compile-time check that these tables have the correct size. */
|
||||
STATIC_ASSERT(sizeof(coptable) == OP_TABLE_LENGTH, coptable);
|
||||
STATIC_ASSERT(sizeof(poptable) == OP_TABLE_LENGTH, poptable);
|
||||
|
||||
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
|
||||
and \w */
|
||||
|
||||
static const uint8_t toptable1[] = {
|
||||
0, 0, 0, 0, 0, 0,
|
||||
ctype_digit, ctype_digit,
|
||||
ctype_space, ctype_space,
|
||||
ctype_word, ctype_word,
|
||||
0, 0 /* OP_ANY, OP_ALLANY */
|
||||
};
|
||||
|
||||
static const uint8_t toptable2[] = {
|
||||
0, 0, 0, 0, 0, 0,
|
||||
ctype_digit, 0,
|
||||
ctype_space, 0,
|
||||
ctype_word, 0,
|
||||
1, 1 /* OP_ANY, OP_ALLANY */
|
||||
};
|
||||
|
||||
|
||||
/* Structure for holding data about a particular state, which is in effect the
|
||||
current data for an active path through the match tree. It must consist
|
||||
entirely of ints because the working vector we are passed, and which we put
|
||||
these structures in, is a vector of ints. */
|
||||
|
||||
typedef struct stateblock {
|
||||
int offset; /* Offset to opcode (-ve has meaning) */
|
||||
int count; /* Count for repeats */
|
||||
int data; /* Some use extra data */
|
||||
} stateblock;
|
||||
|
||||
#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))
|
||||
|
||||
|
||||
/* Before version 10.32 the recursive calls of internal_dfa_match() were passed
|
||||
local working space and output vectors that were created on the stack. This has
|
||||
caused issues for some patterns, especially in small-stack environments such as
|
||||
Windows. A new scheme is now in use which sets up a vector on the stack, but if
|
||||
this is too small, heap memory is used, up to the heap_limit. The main
|
||||
parameters are all numbers of ints because the workspace is a vector of ints.
|
||||
|
||||
The size of the starting stack vector, DFA_START_RWS_SIZE, is in bytes, and is
|
||||
defined in pcre2_internal.h so as to be available to pcre2test when it is
|
||||
finding the minimum heap requirement for a match. */
|
||||
|
||||
#define OVEC_UNIT (sizeof(PCRE2_SIZE)/sizeof(int))
|
||||
|
||||
#define RWS_BASE_SIZE (DFA_START_RWS_SIZE/sizeof(int)) /* Stack vector */
|
||||
#define RWS_RSIZE 1000 /* Work size for recursion */
|
||||
#define RWS_OVEC_RSIZE (1000*OVEC_UNIT) /* Ovector for recursion */
|
||||
#define RWS_OVEC_OSIZE (2*OVEC_UNIT) /* Ovector in other cases */
|
||||
|
||||
/* This structure is at the start of each workspace block. */
|
||||
|
||||
typedef struct RWS_anchor {
|
||||
struct RWS_anchor *next;
|
||||
uint32_t size; /* Number of ints */
|
||||
uint32_t free; /* Number of ints */
|
||||
} RWS_anchor;
|
||||
|
||||
#define RWS_ANCHOR_SIZE (sizeof(RWS_anchor)/sizeof(int))
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Process a callout *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called to perform a callout.
|
||||
|
||||
Arguments:
|
||||
code current code pointer
|
||||
offsets points to current capture offsets
|
||||
current_subject start of current subject match
|
||||
ptr current position in subject
|
||||
mb the match block
|
||||
extracode extra code offset when called from condition
|
||||
lengthptr where to return the callout length
|
||||
|
||||
Returns: the return from the callout
|
||||
*/
|
||||
|
||||
static int
|
||||
do_callout_dfa(PCRE2_SPTR code, PCRE2_SIZE *offsets, PCRE2_SPTR current_subject,
|
||||
PCRE2_SPTR ptr, dfa_match_block *mb, PCRE2_SIZE extracode,
|
||||
PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
pcre2_callout_block *cb = mb->cb;
|
||||
|
||||
*lengthptr = (code[extracode] == OP_CALLOUT)?
|
||||
(PCRE2_SIZE)PRIV(OP_lengths)[OP_CALLOUT] :
|
||||
(PCRE2_SIZE)GET(code, 1 + 2*LINK_SIZE + extracode);
|
||||
|
||||
if (mb->callout == NULL) return 0; /* No callout provided */
|
||||
|
||||
/* Fixed fields in the callout block are set once and for all at the start of
|
||||
matching. */
|
||||
|
||||
cb->offset_vector = offsets;
|
||||
cb->start_match = (PCRE2_SIZE)(current_subject - mb->start_subject);
|
||||
cb->current_position = (PCRE2_SIZE)(ptr - mb->start_subject);
|
||||
cb->pattern_position = GET(code, 1 + extracode);
|
||||
cb->next_item_length = GET(code, 1 + LINK_SIZE + extracode);
|
||||
|
||||
if (code[extracode] == OP_CALLOUT)
|
||||
{
|
||||
cb->callout_number = code[1 + 2*LINK_SIZE + extracode];
|
||||
cb->callout_string_offset = 0;
|
||||
cb->callout_string = NULL;
|
||||
cb->callout_string_length = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
cb->callout_number = 0;
|
||||
cb->callout_string_offset = GET(code, 1 + 3*LINK_SIZE + extracode);
|
||||
cb->callout_string = code + (1 + 4*LINK_SIZE + extracode) + 1;
|
||||
cb->callout_string_length = *lengthptr - (1 + 4*LINK_SIZE) - 2;
|
||||
}
|
||||
|
||||
return (mb->callout)(cb, mb->callout_data);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Expand local workspace memory *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called when internal_dfa_match() is about to be called
|
||||
recursively and there is insufficient working space left in the current
|
||||
workspace block. If there's an existing next block, use it; otherwise get a new
|
||||
block unless the heap limit is reached.
|
||||
|
||||
Arguments:
|
||||
rwsptr pointer to block pointer (updated)
|
||||
ovecsize space needed for an ovector
|
||||
mb the match block
|
||||
|
||||
Returns: 0 rwsptr has been updated
|
||||
!0 an error code
|
||||
*/
|
||||
|
||||
static int
|
||||
more_workspace(RWS_anchor **rwsptr, unsigned int ovecsize, dfa_match_block *mb)
|
||||
{
|
||||
RWS_anchor *rws = *rwsptr;
|
||||
RWS_anchor *new;
|
||||
|
||||
if (rws->next != NULL)
|
||||
{
|
||||
new = rws->next;
|
||||
}
|
||||
|
||||
/* Sizes in the RWS_anchor blocks are in units of sizeof(int), but
|
||||
mb->heap_limit and mb->heap_used are in kibibytes. Play carefully, to avoid
|
||||
overflow. */
|
||||
|
||||
else
|
||||
{
|
||||
uint32_t newsize = (rws->size >= UINT32_MAX/(sizeof(int)*2))? UINT32_MAX/sizeof(int) : rws->size * 2;
|
||||
uint32_t newsizeK = newsize/(1024/sizeof(int));
|
||||
|
||||
if (newsizeK + mb->heap_used > mb->heap_limit)
|
||||
newsizeK = (uint32_t)(mb->heap_limit - mb->heap_used);
|
||||
newsize = newsizeK*(1024/sizeof(int));
|
||||
|
||||
if (newsize < RWS_RSIZE + ovecsize + RWS_ANCHOR_SIZE)
|
||||
return PCRE2_ERROR_HEAPLIMIT;
|
||||
new = mb->memctl.malloc(newsize*sizeof(int), mb->memctl.memory_data);
|
||||
if (new == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
mb->heap_used += newsizeK;
|
||||
new->next = NULL;
|
||||
new->size = newsize;
|
||||
rws->next = new;
|
||||
}
|
||||
|
||||
new->free = new->size - RWS_ANCHOR_SIZE;
|
||||
*rwsptr = new;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match a Regular Expression - DFA engine *
|
||||
*************************************************/
|
||||
|
||||
/* This internal function applies a compiled pattern to a subject string,
|
||||
starting at a given point, using a DFA engine. This function is called from the
|
||||
external one, possibly multiple times if the pattern is not anchored. The
|
||||
function calls itself recursively for some kinds of subpattern.
|
||||
|
||||
Arguments:
|
||||
mb the match_data block with fixed information
|
||||
this_start_code the opening bracket of this subexpression's code
|
||||
current_subject where we currently are in the subject string
|
||||
start_offset start offset in the subject string
|
||||
offsets vector to contain the matching string offsets
|
||||
offsetcount size of same
|
||||
workspace vector of workspace
|
||||
wscount size of same
|
||||
rlevel function call recursion level
|
||||
|
||||
Returns: > 0 => number of match offset pairs placed in offsets
|
||||
= 0 => offsets overflowed; longest matches are present
|
||||
-1 => failed to match
|
||||
< -1 => some kind of unexpected problem
|
||||
|
||||
The following macros are used for adding states to the two state vectors (one
|
||||
for the current character, one for the following character). */
|
||||
|
||||
#define ADD_ACTIVE(x,y) \
|
||||
if (active_count++ < wscount) \
|
||||
{ \
|
||||
next_active_state->offset = (x); \
|
||||
next_active_state->count = (y); \
|
||||
next_active_state++; \
|
||||
} \
|
||||
else return PCRE2_ERROR_DFA_WSSIZE
|
||||
|
||||
#define ADD_ACTIVE_DATA(x,y,z) \
|
||||
if (active_count++ < wscount) \
|
||||
{ \
|
||||
next_active_state->offset = (x); \
|
||||
next_active_state->count = (y); \
|
||||
next_active_state->data = (z); \
|
||||
next_active_state++; \
|
||||
} \
|
||||
else return PCRE2_ERROR_DFA_WSSIZE
|
||||
|
||||
#define ADD_NEW(x,y) \
|
||||
if (new_count++ < wscount) \
|
||||
{ \
|
||||
next_new_state->offset = (x); \
|
||||
next_new_state->count = (y); \
|
||||
next_new_state++; \
|
||||
} \
|
||||
else return PCRE2_ERROR_DFA_WSSIZE
|
||||
|
||||
#define ADD_NEW_DATA(x,y,z) \
|
||||
if (new_count++ < wscount) \
|
||||
{ \
|
||||
next_new_state->offset = (x); \
|
||||
next_new_state->count = (y); \
|
||||
next_new_state->data = (z); \
|
||||
next_new_state++; \
|
||||
} \
|
||||
else return PCRE2_ERROR_DFA_WSSIZE
|
||||
|
||||
/* And now, here is the code */
|
||||
|
||||
static int
|
||||
internal_dfa_match(
|
||||
dfa_match_block *mb,
|
||||
PCRE2_SPTR this_start_code,
|
||||
PCRE2_SPTR current_subject,
|
||||
PCRE2_SIZE start_offset,
|
||||
PCRE2_SIZE *offsets,
|
||||
uint32_t offsetcount,
|
||||
int *workspace,
|
||||
int wscount,
|
||||
uint32_t rlevel,
|
||||
int *RWS)
|
||||
{
|
||||
stateblock *active_states, *new_states, *temp_states;
|
||||
stateblock *next_active_state, *next_new_state;
|
||||
const uint8_t *ctypes, *lcc, *fcc;
|
||||
PCRE2_SPTR ptr;
|
||||
PCRE2_SPTR end_code;
|
||||
dfa_recursion_info new_recursive;
|
||||
int active_count, new_count, match_count;
|
||||
|
||||
/* Some fields in the mb block are frequently referenced, so we load them into
|
||||
independent variables in the hope that this will perform better. */
|
||||
|
||||
PCRE2_SPTR start_subject = mb->start_subject;
|
||||
PCRE2_SPTR end_subject = mb->end_subject;
|
||||
PCRE2_SPTR start_code = mb->start_code;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
|
||||
BOOL utf_or_ucp = utf || (mb->poptions & PCRE2_UCP) != 0;
|
||||
#else
|
||||
BOOL utf = FALSE;
|
||||
#endif
|
||||
|
||||
BOOL reset_could_continue = FALSE;
|
||||
|
||||
if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
|
||||
if (rlevel++ > mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;
|
||||
offsetcount &= (uint32_t)(-2); /* Round down */
|
||||
|
||||
wscount -= 2;
|
||||
wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
|
||||
(2 * INTS_PER_STATEBLOCK);
|
||||
|
||||
ctypes = mb->tables + ctypes_offset;
|
||||
lcc = mb->tables + lcc_offset;
|
||||
fcc = mb->tables + fcc_offset;
|
||||
|
||||
match_count = PCRE2_ERROR_NOMATCH; /* A negative number */
|
||||
|
||||
active_states = (stateblock *)(workspace + 2);
|
||||
next_new_state = new_states = active_states + wscount;
|
||||
new_count = 0;
|
||||
|
||||
/* The first thing in any (sub) pattern is a bracket of some sort. Push all
|
||||
the alternative states onto the list, and find out where the end is. This
|
||||
makes is possible to use this function recursively, when we want to stop at a
|
||||
matching internal ket rather than at the end.
|
||||
|
||||
If we are dealing with a backward assertion we have to find out the maximum
|
||||
amount to move back, and set up each alternative appropriately. */
|
||||
|
||||
if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT)
|
||||
{
|
||||
size_t max_back = 0;
|
||||
size_t gone_back;
|
||||
|
||||
end_code = this_start_code;
|
||||
do
|
||||
{
|
||||
size_t back = (size_t)GET2(end_code, 2+LINK_SIZE);
|
||||
if (back > max_back) max_back = back;
|
||||
end_code += GET(end_code, 1);
|
||||
}
|
||||
while (*end_code == OP_ALT);
|
||||
|
||||
/* If we can't go back the amount required for the longest lookbehind
|
||||
pattern, go back as far as we can; some alternatives may still be viable. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/* In character mode we have to step back character by character */
|
||||
|
||||
if (utf)
|
||||
{
|
||||
for (gone_back = 0; gone_back < max_back; gone_back++)
|
||||
{
|
||||
if (current_subject <= start_subject) break;
|
||||
current_subject--;
|
||||
ACROSSCHAR(current_subject > start_subject, current_subject,
|
||||
current_subject--);
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
||||
/* In byte-mode we can do this quickly. */
|
||||
|
||||
{
|
||||
size_t current_offset = (size_t)(current_subject - start_subject);
|
||||
gone_back = (current_offset < max_back)? current_offset : max_back;
|
||||
current_subject -= gone_back;
|
||||
}
|
||||
|
||||
/* Save the earliest consulted character */
|
||||
|
||||
if (current_subject < mb->start_used_ptr)
|
||||
mb->start_used_ptr = current_subject;
|
||||
|
||||
/* Now we can process the individual branches. There will be an OP_REVERSE at
|
||||
the start of each branch, except when the length of the branch is zero. */
|
||||
|
||||
end_code = this_start_code;
|
||||
do
|
||||
{
|
||||
uint32_t revlen = (end_code[1+LINK_SIZE] == OP_REVERSE)? 1 + IMM2_SIZE : 0;
|
||||
size_t back = (revlen == 0)? 0 : (size_t)GET2(end_code, 2+LINK_SIZE);
|
||||
if (back <= gone_back)
|
||||
{
|
||||
int bstate = (int)(end_code - start_code + 1 + LINK_SIZE + revlen);
|
||||
ADD_NEW_DATA(-bstate, 0, (int)(gone_back - back));
|
||||
}
|
||||
end_code += GET(end_code, 1);
|
||||
}
|
||||
while (*end_code == OP_ALT);
|
||||
}
|
||||
|
||||
/* This is the code for a "normal" subpattern (not a backward assertion). The
|
||||
start of a whole pattern is always one of these. If we are at the top level,
|
||||
we may be asked to restart matching from the same point that we reached for a
|
||||
previous partial match. We still have to scan through the top-level branches to
|
||||
find the end state. */
|
||||
|
||||
else
|
||||
{
|
||||
end_code = this_start_code;
|
||||
|
||||
/* Restarting */
|
||||
|
||||
if (rlevel == 1 && (mb->moptions & PCRE2_DFA_RESTART) != 0)
|
||||
{
|
||||
do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
|
||||
new_count = workspace[1];
|
||||
if (!workspace[0])
|
||||
memcpy(new_states, active_states, (size_t)new_count * sizeof(stateblock));
|
||||
}
|
||||
|
||||
/* Not restarting */
|
||||
|
||||
else
|
||||
{
|
||||
int length = 1 + LINK_SIZE +
|
||||
((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
|
||||
*this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
|
||||
? IMM2_SIZE:0);
|
||||
do
|
||||
{
|
||||
ADD_NEW((int)(end_code - start_code + length), 0);
|
||||
end_code += GET(end_code, 1);
|
||||
length = 1 + LINK_SIZE;
|
||||
}
|
||||
while (*end_code == OP_ALT);
|
||||
}
|
||||
}
|
||||
|
||||
workspace[0] = 0; /* Bit indicating which vector is current */
|
||||
|
||||
/* Loop for scanning the subject */
|
||||
|
||||
ptr = current_subject;
|
||||
for (;;)
|
||||
{
|
||||
int i, j;
|
||||
int clen, dlen;
|
||||
uint32_t c, d;
|
||||
BOOL partial_newline = FALSE;
|
||||
BOOL could_continue = reset_could_continue;
|
||||
reset_could_continue = FALSE;
|
||||
|
||||
if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr;
|
||||
|
||||
/* Make the new state list into the active state list and empty the
|
||||
new state list. */
|
||||
|
||||
temp_states = active_states;
|
||||
active_states = new_states;
|
||||
new_states = temp_states;
|
||||
active_count = new_count;
|
||||
new_count = 0;
|
||||
|
||||
workspace[0] ^= 1; /* Remember for the restarting feature */
|
||||
workspace[1] = active_count;
|
||||
|
||||
/* Set the pointers for adding new states */
|
||||
|
||||
next_active_state = active_states + active_count;
|
||||
next_new_state = new_states;
|
||||
|
||||
/* Load the current character from the subject outside the loop, as many
|
||||
different states may want to look at it, and we assume that at least one
|
||||
will. */
|
||||
|
||||
if (ptr < end_subject)
|
||||
{
|
||||
clen = 1; /* Number of data items in the character */
|
||||
#ifdef SUPPORT_UNICODE
|
||||
GETCHARLENTEST(c, ptr, clen);
|
||||
#else
|
||||
c = *ptr;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
else
|
||||
{
|
||||
clen = 0; /* This indicates the end of the subject */
|
||||
c = NOTACHAR; /* This value should never actually be used */
|
||||
}
|
||||
|
||||
/* Scan up the active states and act on each one. The result of an action
|
||||
may be to add more states to the currently active list (e.g. on hitting a
|
||||
parenthesis) or it may be to put states on the new list, for considering
|
||||
when we move the character pointer on. */
|
||||
|
||||
for (i = 0; i < active_count; i++)
|
||||
{
|
||||
stateblock *current_state = active_states + i;
|
||||
BOOL caseless = FALSE;
|
||||
PCRE2_SPTR code;
|
||||
uint32_t codevalue;
|
||||
int state_offset = current_state->offset;
|
||||
int rrc;
|
||||
int count;
|
||||
|
||||
/* A negative offset is a special case meaning "hold off going to this
|
||||
(negated) state until the number of characters in the data field have
|
||||
been skipped". If the could_continue flag was passed over from a previous
|
||||
state, arrange for it to passed on. */
|
||||
|
||||
if (state_offset < 0)
|
||||
{
|
||||
if (current_state->data > 0)
|
||||
{
|
||||
ADD_NEW_DATA(state_offset, current_state->count,
|
||||
current_state->data - 1);
|
||||
if (could_continue) reset_could_continue = TRUE;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
current_state->offset = state_offset = -state_offset;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check for a duplicate state with the same count, and skip if found.
|
||||
See the note at the head of this module about the possibility of improving
|
||||
performance here. */
|
||||
|
||||
for (j = 0; j < i; j++)
|
||||
{
|
||||
if (active_states[j].offset == state_offset &&
|
||||
active_states[j].count == current_state->count)
|
||||
goto NEXT_ACTIVE_STATE;
|
||||
}
|
||||
|
||||
/* The state offset is the offset to the opcode */
|
||||
|
||||
code = start_code + state_offset;
|
||||
codevalue = *code;
|
||||
|
||||
/* If this opcode inspects a character, but we are at the end of the
|
||||
subject, remember the fact for use when testing for a partial match. */
|
||||
|
||||
if (clen == 0 && poptable[codevalue] != 0)
|
||||
could_continue = TRUE;
|
||||
|
||||
/* If this opcode is followed by an inline character, load it. It is
|
||||
tempting to test for the presence of a subject character here, but that
|
||||
is wrong, because sometimes zero repetitions of the subject are
|
||||
permitted.
|
||||
|
||||
We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
|
||||
argument that is not a data character - but is always one byte long because
|
||||
the values are small. We have to take special action to deal with \P, \p,
|
||||
\H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
|
||||
these ones to new opcodes. */
|
||||
|
||||
if (coptable[codevalue] > 0)
|
||||
{
|
||||
dlen = 1;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
d = code[coptable[codevalue]];
|
||||
if (codevalue >= OP_TYPESTAR)
|
||||
{
|
||||
switch(d)
|
||||
{
|
||||
case OP_ANYBYTE: return PCRE2_ERROR_DFA_UITEM;
|
||||
case OP_NOTPROP:
|
||||
case OP_PROP: codevalue += OP_PROP_EXTRA; break;
|
||||
case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
|
||||
case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
|
||||
case OP_NOT_HSPACE:
|
||||
case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
|
||||
case OP_NOT_VSPACE:
|
||||
case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
dlen = 0; /* Not strictly necessary, but compilers moan */
|
||||
d = NOTACHAR; /* if these variables are not set. */
|
||||
}
|
||||
|
||||
|
||||
/* Now process the individual opcodes */
|
||||
|
||||
switch (codevalue)
|
||||
{
|
||||
/* ========================================================================== */
|
||||
/* Reached a closing bracket. If not at the end of the pattern, carry
|
||||
on with the next opcode. For repeating opcodes, also add the repeat
|
||||
state. Note that KETRPOS will always be encountered at the end of the
|
||||
subpattern, because the possessive subpattern repeats are always handled
|
||||
using recursive calls. Thus, it never adds any new states.
|
||||
|
||||
At the end of the (sub)pattern, unless we have an empty string and
|
||||
PCRE2_NOTEMPTY is set, or PCRE2_NOTEMPTY_ATSTART is set and we are at the
|
||||
start of the subject, save the match data, shifting up all previous
|
||||
matches so we always have the longest first. */
|
||||
|
||||
case OP_KET:
|
||||
case OP_KETRMIN:
|
||||
case OP_KETRMAX:
|
||||
case OP_KETRPOS:
|
||||
if (code != end_code)
|
||||
{
|
||||
ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
|
||||
if (codevalue != OP_KET)
|
||||
{
|
||||
ADD_ACTIVE(state_offset - (int)GET(code, 1), 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ptr > current_subject ||
|
||||
((mb->moptions & PCRE2_NOTEMPTY) == 0 &&
|
||||
((mb->moptions & PCRE2_NOTEMPTY_ATSTART) == 0 ||
|
||||
current_subject > start_subject + mb->start_offset)))
|
||||
{
|
||||
if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
|
||||
else if (match_count > 0 && ++match_count * 2 > (int)offsetcount)
|
||||
match_count = 0;
|
||||
count = ((match_count == 0)? (int)offsetcount : match_count * 2) - 2;
|
||||
if (count > 0) (void)memmove(offsets + 2, offsets,
|
||||
(size_t)count * sizeof(PCRE2_SIZE));
|
||||
if (offsetcount >= 2)
|
||||
{
|
||||
offsets[0] = (PCRE2_SIZE)(current_subject - start_subject);
|
||||
offsets[1] = (PCRE2_SIZE)(ptr - start_subject);
|
||||
}
|
||||
if ((mb->moptions & PCRE2_DFA_SHORTEST) != 0) return match_count;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/* ========================================================================== */
|
||||
/* These opcodes add to the current list of states without looking
|
||||
at the current character. */
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ALT:
|
||||
do { code += GET(code, 1); } while (*code == OP_ALT);
|
||||
ADD_ACTIVE((int)(code - start_code), 0);
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_BRA:
|
||||
case OP_SBRA:
|
||||
do
|
||||
{
|
||||
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
|
||||
code += GET(code, 1);
|
||||
}
|
||||
while (*code == OP_ALT);
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_CBRA:
|
||||
case OP_SCBRA:
|
||||
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE), 0);
|
||||
code += GET(code, 1);
|
||||
while (*code == OP_ALT)
|
||||
{
|
||||
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
|
||||
code += GET(code, 1);
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_BRAZERO:
|
||||
case OP_BRAMINZERO:
|
||||
ADD_ACTIVE(state_offset + 1, 0);
|
||||
code += 1 + GET(code, 2);
|
||||
while (*code == OP_ALT) code += GET(code, 1);
|
||||
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_SKIPZERO:
|
||||
code += 1 + GET(code, 2);
|
||||
while (*code == OP_ALT) code += GET(code, 1);
|
||||
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_CIRC:
|
||||
if (ptr == start_subject && (mb->moptions & PCRE2_NOTBOL) == 0)
|
||||
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_CIRCM:
|
||||
if ((ptr == start_subject && (mb->moptions & PCRE2_NOTBOL) == 0) ||
|
||||
((ptr != end_subject || (mb->poptions & PCRE2_ALT_CIRCUMFLEX) != 0 )
|
||||
&& WAS_NEWLINE(ptr)))
|
||||
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_EOD:
|
||||
if (ptr >= end_subject)
|
||||
{
|
||||
if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
return PCRE2_ERROR_PARTIAL;
|
||||
else { ADD_ACTIVE(state_offset + 1, 0); }
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_SOD:
|
||||
if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_SOM:
|
||||
if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
|
||||
break;
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* These opcodes inspect the next subject character, and sometimes
|
||||
the previous one as well, but do not have an argument. The variable
|
||||
clen contains the length of the current character and is zero if we are
|
||||
at the end of the subject. */
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ANY:
|
||||
if (clen > 0 && !IS_NEWLINE(ptr))
|
||||
{
|
||||
if (ptr + 1 >= mb->end_subject &&
|
||||
(mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
|
||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||
NLBLOCK->nllen == 2 &&
|
||||
c == NLBLOCK->nl[0])
|
||||
{
|
||||
could_continue = partial_newline = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ALLANY:
|
||||
if (clen > 0)
|
||||
{ ADD_NEW(state_offset + 1, 0); }
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_EODN:
|
||||
if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - mb->nllen))
|
||||
{
|
||||
if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
return PCRE2_ERROR_PARTIAL;
|
||||
ADD_ACTIVE(state_offset + 1, 0);
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_DOLL:
|
||||
if ((mb->moptions & PCRE2_NOTEOL) == 0)
|
||||
{
|
||||
if (clen == 0 && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
could_continue = TRUE;
|
||||
else if (clen == 0 ||
|
||||
((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
|
||||
(ptr == end_subject - mb->nllen)
|
||||
))
|
||||
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||
else if (ptr + 1 >= mb->end_subject &&
|
||||
(mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 &&
|
||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||
NLBLOCK->nllen == 2 &&
|
||||
c == NLBLOCK->nl[0])
|
||||
{
|
||||
if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
{
|
||||
reset_could_continue = TRUE;
|
||||
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
|
||||
}
|
||||
else could_continue = partial_newline = TRUE;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_DOLLM:
|
||||
if ((mb->moptions & PCRE2_NOTEOL) == 0)
|
||||
{
|
||||
if (clen == 0 && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
could_continue = TRUE;
|
||||
else if (clen == 0 ||
|
||||
((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
|
||||
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||
else if (ptr + 1 >= mb->end_subject &&
|
||||
(mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 &&
|
||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||
NLBLOCK->nllen == 2 &&
|
||||
c == NLBLOCK->nl[0])
|
||||
{
|
||||
if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
{
|
||||
reset_could_continue = TRUE;
|
||||
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
|
||||
}
|
||||
else could_continue = partial_newline = TRUE;
|
||||
}
|
||||
}
|
||||
else if (IS_NEWLINE(ptr))
|
||||
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
|
||||
case OP_DIGIT:
|
||||
case OP_WHITESPACE:
|
||||
case OP_WORDCHAR:
|
||||
if (clen > 0 && c < 256 &&
|
||||
((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
|
||||
{ ADD_NEW(state_offset + 1, 0); }
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_NOT_DIGIT:
|
||||
case OP_NOT_WHITESPACE:
|
||||
case OP_NOT_WORDCHAR:
|
||||
if (clen > 0 && (c >= 256 ||
|
||||
((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
|
||||
{ ADD_NEW(state_offset + 1, 0); }
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_WORD_BOUNDARY:
|
||||
case OP_NOT_WORD_BOUNDARY:
|
||||
case OP_NOT_UCP_WORD_BOUNDARY:
|
||||
case OP_UCP_WORD_BOUNDARY:
|
||||
{
|
||||
int left_word, right_word;
|
||||
|
||||
if (ptr > start_subject)
|
||||
{
|
||||
PCRE2_SPTR temp = ptr - 1;
|
||||
if (temp < mb->start_used_ptr) mb->start_used_ptr = temp;
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (utf) { BACKCHAR(temp); }
|
||||
#endif
|
||||
GETCHARTEST(d, temp);
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (codevalue == OP_UCP_WORD_BOUNDARY ||
|
||||
codevalue == OP_NOT_UCP_WORD_BOUNDARY)
|
||||
{
|
||||
int chartype = UCD_CHARTYPE(d);
|
||||
int category = PRIV(ucp_gentype)[chartype];
|
||||
left_word = (category == ucp_L || category == ucp_N ||
|
||||
chartype == ucp_Mn || chartype == ucp_Pc);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
|
||||
}
|
||||
else left_word = FALSE;
|
||||
|
||||
if (clen > 0)
|
||||
{
|
||||
if (ptr >= mb->last_used_ptr)
|
||||
{
|
||||
PCRE2_SPTR temp = ptr + 1;
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (utf) { FORWARDCHARTEST(temp, mb->end_subject); }
|
||||
#endif
|
||||
mb->last_used_ptr = temp;
|
||||
}
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (codevalue == OP_UCP_WORD_BOUNDARY ||
|
||||
codevalue == OP_NOT_UCP_WORD_BOUNDARY)
|
||||
{
|
||||
int chartype = UCD_CHARTYPE(c);
|
||||
int category = PRIV(ucp_gentype)[chartype];
|
||||
right_word = (category == ucp_L || category == ucp_N ||
|
||||
chartype == ucp_Mn || chartype == ucp_Pc);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
|
||||
}
|
||||
else right_word = FALSE;
|
||||
|
||||
if ((left_word == right_word) ==
|
||||
(codevalue == OP_NOT_WORD_BOUNDARY ||
|
||||
codevalue == OP_NOT_UCP_WORD_BOUNDARY))
|
||||
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
/* Check the next character by Unicode property. We will get here only
|
||||
if the support is in the binary; otherwise a compile-time error occurs.
|
||||
*/
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case OP_PROP:
|
||||
case OP_NOTPROP:
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
int chartype;
|
||||
const uint32_t *cp;
|
||||
const ucd_record * prop = GET_UCD(c);
|
||||
switch(code[1])
|
||||
{
|
||||
case PT_LAMP:
|
||||
chartype = prop->chartype;
|
||||
OK = chartype == ucp_Lu || chartype == ucp_Ll ||
|
||||
chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
OK = prop->chartype == code[2];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
OK = prop->script == code[2];
|
||||
break;
|
||||
|
||||
case PT_SCX:
|
||||
OK = (prop->script == code[2] ||
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[2]) != 0);
|
||||
break;
|
||||
|
||||
/* These are specials for combination cases. */
|
||||
|
||||
case PT_ALNUM:
|
||||
chartype = prop->chartype;
|
||||
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N;
|
||||
break;
|
||||
|
||||
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||
which means that Perl space and POSIX space are now identical. PCRE
|
||||
was changed at release 8.34. */
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
chartype = prop->chartype;
|
||||
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||
chartype == ucp_Mn || chartype == ucp_Pc;
|
||||
break;
|
||||
|
||||
case PT_CLIST:
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (c > MAX_UTF_CODE_POINT)
|
||||
{
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
cp = PRIV(ucd_caseless_sets) + code[2];
|
||||
for (;;)
|
||||
{
|
||||
if (c < *cp) { OK = FALSE; break; }
|
||||
if (c == *cp++) { OK = TRUE; break; }
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_UCNC:
|
||||
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
||||
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
|
||||
c >= 0xe000;
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
OK = UCD_BIDICLASS(c) == code[2];
|
||||
break;
|
||||
|
||||
case PT_BOOL:
|
||||
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||
UCD_BPROPS_PROP(prop), code[2]) != 0;
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
|
||||
default:
|
||||
OK = codevalue != OP_PROP;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* These opcodes likewise inspect the subject character, but have an
|
||||
argument that is not a data character. It is one of these opcodes:
|
||||
OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
|
||||
OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
|
||||
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||
if (clen > 0)
|
||||
{
|
||||
if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
|
||||
(mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
|
||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||
NLBLOCK->nllen == 2 &&
|
||||
c == NLBLOCK->nl[0])
|
||||
{
|
||||
could_continue = partial_newline = TRUE;
|
||||
}
|
||||
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||
(c < 256 &&
|
||||
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||
{
|
||||
if (count > 0 && codevalue == OP_TYPEPOSPLUS)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
count++;
|
||||
ADD_NEW(state_offset, count);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSQUERY:
|
||||
ADD_ACTIVE(state_offset + 2, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
|
||||
(mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
|
||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||
NLBLOCK->nllen == 2 &&
|
||||
c == NLBLOCK->nl[0])
|
||||
{
|
||||
could_continue = partial_newline = TRUE;
|
||||
}
|
||||
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||
(c < 256 &&
|
||||
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||
{
|
||||
if (codevalue == OP_TYPEPOSQUERY)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW(state_offset + 2, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPOSSTAR:
|
||||
ADD_ACTIVE(state_offset + 2, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
|
||||
(mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
|
||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||
NLBLOCK->nllen == 2 &&
|
||||
c == NLBLOCK->nl[0])
|
||||
{
|
||||
could_continue = partial_newline = TRUE;
|
||||
}
|
||||
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||
(c < 256 &&
|
||||
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||
{
|
||||
if (codevalue == OP_TYPEPOSSTAR)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW(state_offset, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_TYPEEXACT:
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
|
||||
(mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
|
||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||
NLBLOCK->nllen == 2 &&
|
||||
c == NLBLOCK->nl[0])
|
||||
{
|
||||
could_continue = partial_newline = TRUE;
|
||||
}
|
||||
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||
(c < 256 &&
|
||||
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||
{
|
||||
if (++count >= (int)GET2(code, 1))
|
||||
{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
|
||||
else
|
||||
{ ADD_NEW(state_offset, count); }
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEPOSUPTO:
|
||||
ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
|
||||
(mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
|
||||
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||
NLBLOCK->nllen == 2 &&
|
||||
c == NLBLOCK->nl[0])
|
||||
{
|
||||
could_continue = partial_newline = TRUE;
|
||||
}
|
||||
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||
(c < 256 &&
|
||||
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||
{
|
||||
if (codevalue == OP_TYPEPOSUPTO)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
if (++count >= (int)GET2(code, 1))
|
||||
{ ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
|
||||
else
|
||||
{ ADD_NEW(state_offset, count); }
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/* ========================================================================== */
|
||||
/* These are virtual opcodes that are used when something like
|
||||
OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
|
||||
argument. It keeps the code above fast for the other cases. The argument
|
||||
is in the d variable. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case OP_PROP_EXTRA + OP_TYPEPLUS:
|
||||
case OP_PROP_EXTRA + OP_TYPEMINPLUS:
|
||||
case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
int chartype;
|
||||
const uint32_t *cp;
|
||||
const ucd_record * prop = GET_UCD(c);
|
||||
switch(code[2])
|
||||
{
|
||||
case PT_LAMP:
|
||||
chartype = prop->chartype;
|
||||
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
OK = prop->chartype == code[3];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
OK = prop->script == code[3];
|
||||
break;
|
||||
|
||||
case PT_SCX:
|
||||
OK = (prop->script == code[3] ||
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0);
|
||||
break;
|
||||
|
||||
/* These are specials for combination cases. */
|
||||
|
||||
case PT_ALNUM:
|
||||
chartype = prop->chartype;
|
||||
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N;
|
||||
break;
|
||||
|
||||
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||
which means that Perl space and POSIX space are now identical. PCRE
|
||||
was changed at release 8.34. */
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
chartype = prop->chartype;
|
||||
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||
chartype == ucp_Mn || chartype == ucp_Pc;
|
||||
break;
|
||||
|
||||
case PT_CLIST:
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (c > MAX_UTF_CODE_POINT)
|
||||
{
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
cp = PRIV(ucd_caseless_sets) + code[3];
|
||||
for (;;)
|
||||
{
|
||||
if (c < *cp) { OK = FALSE; break; }
|
||||
if (c == *cp++) { OK = TRUE; break; }
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_UCNC:
|
||||
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
||||
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
|
||||
c >= 0xe000;
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
OK = UCD_BIDICLASS(c) == code[3];
|
||||
break;
|
||||
|
||||
case PT_BOOL:
|
||||
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||
UCD_BPROPS_PROP(prop), code[3]) != 0;
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
|
||||
default:
|
||||
OK = codevalue != OP_PROP;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_PROP))
|
||||
{
|
||||
if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
count++;
|
||||
ADD_NEW(state_offset, count);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||
if (clen > 0)
|
||||
{
|
||||
int ncount = 0;
|
||||
if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
(void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
|
||||
&ncount);
|
||||
count++;
|
||||
ADD_NEW_DATA(-state_offset, count, ncount);
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ANYNL_EXTRA + OP_TYPEPLUS:
|
||||
case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
|
||||
case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||
if (clen > 0)
|
||||
{
|
||||
int ncount = 0;
|
||||
switch (c)
|
||||
{
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
case CHAR_NEL:
|
||||
#ifndef EBCDIC
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
#endif /* Not EBCDIC */
|
||||
if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
|
||||
goto ANYNL01;
|
||||
|
||||
case CHAR_CR:
|
||||
if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
|
||||
/* Fall through */
|
||||
|
||||
ANYNL01:
|
||||
case CHAR_LF:
|
||||
if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
count++;
|
||||
ADD_NEW_DATA(-state_offset, count, ncount);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_VSPACE_EXTRA + OP_TYPEPLUS:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
VSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_VSPACE))
|
||||
{
|
||||
if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
count++;
|
||||
ADD_NEW_DATA(-state_offset, count, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_HSPACE_EXTRA + OP_TYPEPLUS:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_HSPACE))
|
||||
{
|
||||
if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
count++;
|
||||
ADD_NEW_DATA(-state_offset, count, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case OP_PROP_EXTRA + OP_TYPEQUERY:
|
||||
case OP_PROP_EXTRA + OP_TYPEMINQUERY:
|
||||
case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
|
||||
count = 4;
|
||||
goto QS1;
|
||||
|
||||
case OP_PROP_EXTRA + OP_TYPESTAR:
|
||||
case OP_PROP_EXTRA + OP_TYPEMINSTAR:
|
||||
case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
|
||||
count = 0;
|
||||
|
||||
QS1:
|
||||
|
||||
ADD_ACTIVE(state_offset + 4, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
int chartype;
|
||||
const uint32_t *cp;
|
||||
const ucd_record * prop = GET_UCD(c);
|
||||
switch(code[2])
|
||||
{
|
||||
case PT_LAMP:
|
||||
chartype = prop->chartype;
|
||||
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
OK = prop->chartype == code[3];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
OK = prop->script == code[3];
|
||||
break;
|
||||
|
||||
case PT_SCX:
|
||||
OK = (prop->script == code[3] ||
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0);
|
||||
break;
|
||||
|
||||
/* These are specials for combination cases. */
|
||||
|
||||
case PT_ALNUM:
|
||||
chartype = prop->chartype;
|
||||
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N;
|
||||
break;
|
||||
|
||||
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||
which means that Perl space and POSIX space are now identical. PCRE
|
||||
was changed at release 8.34. */
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
chartype = prop->chartype;
|
||||
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||
chartype == ucp_Mn || chartype == ucp_Pc;
|
||||
break;
|
||||
|
||||
case PT_CLIST:
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (c > MAX_UTF_CODE_POINT)
|
||||
{
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
cp = PRIV(ucd_caseless_sets) + code[3];
|
||||
for (;;)
|
||||
{
|
||||
if (c < *cp) { OK = FALSE; break; }
|
||||
if (c == *cp++) { OK = TRUE; break; }
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_UCNC:
|
||||
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
||||
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
|
||||
c >= 0xe000;
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
OK = UCD_BIDICLASS(c) == code[3];
|
||||
break;
|
||||
|
||||
case PT_BOOL:
|
||||
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||
UCD_BPROPS_PROP(prop), code[3]) != 0;
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
|
||||
default:
|
||||
OK = codevalue != OP_PROP;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_PROP))
|
||||
{
|
||||
if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
|
||||
codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW(state_offset + count, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
|
||||
count = 2;
|
||||
goto QS2;
|
||||
|
||||
case OP_EXTUNI_EXTRA + OP_TYPESTAR:
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
|
||||
count = 0;
|
||||
|
||||
QS2:
|
||||
|
||||
ADD_ACTIVE(state_offset + 2, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
int ncount = 0;
|
||||
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
|
||||
codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
(void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
|
||||
&ncount);
|
||||
ADD_NEW_DATA(-(state_offset + count), 0, ncount);
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ANYNL_EXTRA + OP_TYPEQUERY:
|
||||
case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
|
||||
case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
|
||||
count = 2;
|
||||
goto QS3;
|
||||
|
||||
case OP_ANYNL_EXTRA + OP_TYPESTAR:
|
||||
case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
|
||||
case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
|
||||
count = 0;
|
||||
|
||||
QS3:
|
||||
ADD_ACTIVE(state_offset + 2, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
int ncount = 0;
|
||||
switch (c)
|
||||
{
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
case CHAR_NEL:
|
||||
#ifndef EBCDIC
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
#endif /* Not EBCDIC */
|
||||
if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
|
||||
goto ANYNL02;
|
||||
|
||||
case CHAR_CR:
|
||||
if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
|
||||
/* Fall through */
|
||||
|
||||
ANYNL02:
|
||||
case CHAR_LF:
|
||||
if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
|
||||
codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_VSPACE_EXTRA + OP_TYPEQUERY:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
|
||||
count = 2;
|
||||
goto QS4;
|
||||
|
||||
case OP_VSPACE_EXTRA + OP_TYPESTAR:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
|
||||
count = 0;
|
||||
|
||||
QS4:
|
||||
ADD_ACTIVE(state_offset + 2, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
VSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
if (OK == (d == OP_VSPACE))
|
||||
{
|
||||
if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
|
||||
codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_HSPACE_EXTRA + OP_TYPEQUERY:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
|
||||
count = 2;
|
||||
goto QS5;
|
||||
|
||||
case OP_HSPACE_EXTRA + OP_TYPESTAR:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
|
||||
count = 0;
|
||||
|
||||
QS5:
|
||||
ADD_ACTIVE(state_offset + 2, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_HSPACE))
|
||||
{
|
||||
if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
|
||||
codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case OP_PROP_EXTRA + OP_TYPEEXACT:
|
||||
case OP_PROP_EXTRA + OP_TYPEUPTO:
|
||||
case OP_PROP_EXTRA + OP_TYPEMINUPTO:
|
||||
case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
|
||||
if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
|
||||
{ ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
int chartype;
|
||||
const uint32_t *cp;
|
||||
const ucd_record * prop = GET_UCD(c);
|
||||
switch(code[1 + IMM2_SIZE + 1])
|
||||
{
|
||||
case PT_LAMP:
|
||||
chartype = prop->chartype;
|
||||
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
OK = prop->chartype == code[1 + IMM2_SIZE + 2];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
OK = prop->script == code[1 + IMM2_SIZE + 2];
|
||||
break;
|
||||
|
||||
case PT_SCX:
|
||||
OK = (prop->script == code[1 + IMM2_SIZE + 2] ||
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop),
|
||||
code[1 + IMM2_SIZE + 2]) != 0);
|
||||
break;
|
||||
|
||||
/* These are specials for combination cases. */
|
||||
|
||||
case PT_ALNUM:
|
||||
chartype = prop->chartype;
|
||||
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N;
|
||||
break;
|
||||
|
||||
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||
which means that Perl space and POSIX space are now identical. PCRE
|
||||
was changed at release 8.34. */
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
chartype = prop->chartype;
|
||||
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||
chartype == ucp_Mn || chartype == ucp_Pc;
|
||||
break;
|
||||
|
||||
case PT_CLIST:
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (c > MAX_UTF_CODE_POINT)
|
||||
{
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
|
||||
for (;;)
|
||||
{
|
||||
if (c < *cp) { OK = FALSE; break; }
|
||||
if (c == *cp++) { OK = TRUE; break; }
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_UCNC:
|
||||
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
||||
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
|
||||
c >= 0xe000;
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
OK = UCD_BIDICLASS(c) == code[1 + IMM2_SIZE + 2];
|
||||
break;
|
||||
|
||||
case PT_BOOL:
|
||||
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||
UCD_BPROPS_PROP(prop), code[1 + IMM2_SIZE + 2]) != 0;
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
|
||||
default:
|
||||
OK = codevalue != OP_PROP;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_PROP))
|
||||
{
|
||||
if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
if (++count >= (int)GET2(code, 1))
|
||||
{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
|
||||
else
|
||||
{ ADD_NEW(state_offset, count); }
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
|
||||
if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
|
||||
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
PCRE2_SPTR nptr;
|
||||
int ncount = 0;
|
||||
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
|
||||
&ncount);
|
||||
if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
reset_could_continue = TRUE;
|
||||
if (++count >= (int)GET2(code, 1))
|
||||
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
|
||||
else
|
||||
{ ADD_NEW_DATA(-state_offset, count, ncount); }
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ANYNL_EXTRA + OP_TYPEEXACT:
|
||||
case OP_ANYNL_EXTRA + OP_TYPEUPTO:
|
||||
case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
|
||||
case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
|
||||
if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
|
||||
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
int ncount = 0;
|
||||
switch (c)
|
||||
{
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
case CHAR_NEL:
|
||||
#ifndef EBCDIC
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
#endif /* Not EBCDIC */
|
||||
if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
|
||||
goto ANYNL03;
|
||||
|
||||
case CHAR_CR:
|
||||
if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
|
||||
/* Fall through */
|
||||
|
||||
ANYNL03:
|
||||
case CHAR_LF:
|
||||
if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
if (++count >= (int)GET2(code, 1))
|
||||
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
|
||||
else
|
||||
{ ADD_NEW_DATA(-state_offset, count, ncount); }
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_VSPACE_EXTRA + OP_TYPEEXACT:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEUPTO:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
|
||||
case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
|
||||
if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
|
||||
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
VSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_VSPACE))
|
||||
{
|
||||
if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
if (++count >= (int)GET2(code, 1))
|
||||
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
|
||||
else
|
||||
{ ADD_NEW_DATA(-state_offset, count, 0); }
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_HSPACE_EXTRA + OP_TYPEEXACT:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEUPTO:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
|
||||
case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
|
||||
if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
|
||||
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
switch (c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (OK == (d == OP_HSPACE))
|
||||
{
|
||||
if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
if (++count >= (int)GET2(code, 1))
|
||||
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
|
||||
else
|
||||
{ ADD_NEW_DATA(-state_offset, count, 0); }
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/* ========================================================================== */
|
||||
/* These opcodes are followed by a character that is usually compared
|
||||
to the current subject character; it is loaded into d. We still get
|
||||
here even if there is no subject character, because in some cases zero
|
||||
repetitions are permitted. */
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_CHAR:
|
||||
if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_CHARI:
|
||||
if (clen == 0) break;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf_or_ucp)
|
||||
{
|
||||
if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
|
||||
{
|
||||
unsigned int othercase;
|
||||
if (c < 128)
|
||||
othercase = fcc[c];
|
||||
else
|
||||
othercase = UCD_OTHERCASE(c);
|
||||
if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
/* Not UTF or UCP mode */
|
||||
{
|
||||
if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
|
||||
{ ADD_NEW(state_offset + 2, 0); }
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/*-----------------------------------------------------------------*/
|
||||
/* This is a tricky one because it can match more than one character.
|
||||
Find out how many characters to skip, and then set up a negative state
|
||||
to wait for them to pass before continuing. */
|
||||
|
||||
case OP_EXTUNI:
|
||||
if (clen > 0)
|
||||
{
|
||||
int ncount = 0;
|
||||
PCRE2_SPTR nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject,
|
||||
end_subject, utf, &ncount);
|
||||
if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
reset_could_continue = TRUE;
|
||||
ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
/* This is a tricky like EXTUNI because it too can match more than one
|
||||
character (when CR is followed by LF). In this case, set up a negative
|
||||
state to wait for one character to pass before continuing. */
|
||||
|
||||
case OP_ANYNL:
|
||||
if (clen > 0) switch(c)
|
||||
{
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
case CHAR_NEL:
|
||||
#ifndef EBCDIC
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
#endif /* Not EBCDIC */
|
||||
if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
|
||||
/* Fall through */
|
||||
|
||||
case CHAR_LF:
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
break;
|
||||
|
||||
case CHAR_CR:
|
||||
if (ptr + 1 >= end_subject)
|
||||
{
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
reset_could_continue = TRUE;
|
||||
}
|
||||
else if (UCHAR21TEST(ptr + 1) == CHAR_LF)
|
||||
{
|
||||
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_NOT_VSPACE:
|
||||
if (clen > 0) switch(c)
|
||||
{
|
||||
VSPACE_CASES:
|
||||
break;
|
||||
|
||||
default:
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_VSPACE:
|
||||
if (clen > 0) switch(c)
|
||||
{
|
||||
VSPACE_CASES:
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_NOT_HSPACE:
|
||||
if (clen > 0) switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
break;
|
||||
|
||||
default:
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_HSPACE:
|
||||
if (clen > 0) switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
/* Match a negated single character casefully. */
|
||||
|
||||
case OP_NOT:
|
||||
if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
/* Match a negated single character caselessly. */
|
||||
|
||||
case OP_NOTI:
|
||||
if (clen > 0)
|
||||
{
|
||||
uint32_t otherd;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf_or_ucp && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
otherd = TABLE_GET(d, fcc, d);
|
||||
if (c != d && c != otherd)
|
||||
{ ADD_NEW(state_offset + dlen + 1, 0); }
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_PLUSI:
|
||||
case OP_MINPLUSI:
|
||||
case OP_POSPLUSI:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_NOTPOSPLUSI:
|
||||
caseless = TRUE;
|
||||
codevalue -= OP_STARI - OP_STAR;
|
||||
|
||||
/* Fall through */
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_POSPLUS:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
|
||||
if (clen > 0)
|
||||
{
|
||||
uint32_t otherd = NOTACHAR;
|
||||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf_or_ucp && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
otherd = TABLE_GET(d, fcc, d);
|
||||
}
|
||||
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||
{
|
||||
if (count > 0 &&
|
||||
(codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
count++;
|
||||
ADD_NEW(state_offset, count);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_QUERYI:
|
||||
case OP_MINQUERYI:
|
||||
case OP_POSQUERYI:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_NOTPOSQUERYI:
|
||||
caseless = TRUE;
|
||||
codevalue -= OP_STARI - OP_STAR;
|
||||
/* Fall through */
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_POSQUERY:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTPOSQUERY:
|
||||
ADD_ACTIVE(state_offset + dlen + 1, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
uint32_t otherd = NOTACHAR;
|
||||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf_or_ucp && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
otherd = TABLE_GET(d, fcc, d);
|
||||
}
|
||||
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||
{
|
||||
if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW(state_offset + dlen + 1, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_STARI:
|
||||
case OP_MINSTARI:
|
||||
case OP_POSSTARI:
|
||||
case OP_NOTSTARI:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_NOTPOSSTARI:
|
||||
caseless = TRUE;
|
||||
codevalue -= OP_STARI - OP_STAR;
|
||||
/* Fall through */
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_POSSTAR:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTPOSSTAR:
|
||||
ADD_ACTIVE(state_offset + dlen + 1, 0);
|
||||
if (clen > 0)
|
||||
{
|
||||
uint32_t otherd = NOTACHAR;
|
||||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf_or_ucp && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
otherd = TABLE_GET(d, fcc, d);
|
||||
}
|
||||
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||
{
|
||||
if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW(state_offset, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_EXACTI:
|
||||
case OP_NOTEXACTI:
|
||||
caseless = TRUE;
|
||||
codevalue -= OP_STARI - OP_STAR;
|
||||
/* Fall through */
|
||||
case OP_EXACT:
|
||||
case OP_NOTEXACT:
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
uint32_t otherd = NOTACHAR;
|
||||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf_or_ucp && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
otherd = TABLE_GET(d, fcc, d);
|
||||
}
|
||||
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||
{
|
||||
if (++count >= (int)GET2(code, 1))
|
||||
{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
|
||||
else
|
||||
{ ADD_NEW(state_offset, count); }
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_UPTOI:
|
||||
case OP_MINUPTOI:
|
||||
case OP_POSUPTOI:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_NOTPOSUPTOI:
|
||||
caseless = TRUE;
|
||||
codevalue -= OP_STARI - OP_STAR;
|
||||
/* Fall through */
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_POSUPTO:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTPOSUPTO:
|
||||
ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0)
|
||||
{
|
||||
uint32_t otherd = NOTACHAR;
|
||||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf_or_ucp && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
otherd = TABLE_GET(d, fcc, d);
|
||||
}
|
||||
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||
{
|
||||
if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
if (++count >= (int)GET2(code, 1))
|
||||
{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
|
||||
else
|
||||
{ ADD_NEW(state_offset, count); }
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* These are the class-handling opcodes */
|
||||
|
||||
case OP_CLASS:
|
||||
case OP_NCLASS:
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_XCLASS:
|
||||
case OP_ECLASS:
|
||||
#endif
|
||||
{
|
||||
BOOL isinclass = FALSE;
|
||||
int next_state_offset;
|
||||
PCRE2_SPTR ecode;
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
/* An extended class may have a table or a list of single characters,
|
||||
ranges, or both, and it may be positive or negative. There's a
|
||||
function that sorts all this out. */
|
||||
|
||||
if (codevalue == OP_XCLASS)
|
||||
{
|
||||
ecode = code + GET(code, 1);
|
||||
if (clen > 0)
|
||||
isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE,
|
||||
(const uint8_t*)mb->start_code, utf);
|
||||
}
|
||||
|
||||
/* A nested set-based class has internal opcodes for performing
|
||||
set operations. */
|
||||
|
||||
else if (codevalue == OP_ECLASS)
|
||||
{
|
||||
ecode = code + GET(code, 1);
|
||||
if (clen > 0)
|
||||
isinclass = PRIV(eclass)(c, code + 1 + LINK_SIZE, ecode,
|
||||
(const uint8_t*)mb->start_code, utf);
|
||||
}
|
||||
|
||||
else
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
|
||||
/* For a simple class, there is always just a 32-byte table, and we
|
||||
can set isinclass from it. */
|
||||
|
||||
{
|
||||
ecode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
|
||||
if (clen > 0)
|
||||
{
|
||||
isinclass = (c > 255)? (codevalue == OP_NCLASS) :
|
||||
((((const uint8_t *)(code + 1))[c/8] & (1u << (c&7))) != 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* At this point, isinclass is set for all kinds of class, and ecode
|
||||
points to the byte after the end of the class. If there is a
|
||||
quantifier, this is where it will be. */
|
||||
|
||||
next_state_offset = (int)(ecode - start_code);
|
||||
|
||||
switch (*ecode)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRPOSSTAR:
|
||||
ADD_ACTIVE(next_state_offset + 1, 0);
|
||||
if (isinclass)
|
||||
{
|
||||
if (*ecode == OP_CRPOSSTAR)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW(state_offset, 0);
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_CRPLUS:
|
||||
case OP_CRMINPLUS:
|
||||
case OP_CRPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
|
||||
if (isinclass)
|
||||
{
|
||||
if (count > 0 && *ecode == OP_CRPOSPLUS)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
count++;
|
||||
ADD_NEW(state_offset, count);
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
case OP_CRPOSQUERY:
|
||||
ADD_ACTIVE(next_state_offset + 1, 0);
|
||||
if (isinclass)
|
||||
{
|
||||
if (*ecode == OP_CRPOSQUERY)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW(next_state_offset + 1, 0);
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
case OP_CRPOSRANGE:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count >= (int)GET2(ecode, 1))
|
||||
{ ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
|
||||
if (isinclass)
|
||||
{
|
||||
int max = (int)GET2(ecode, 1 + IMM2_SIZE);
|
||||
|
||||
if (*ecode == OP_CRPOSRANGE && count >= (int)GET2(ecode, 1))
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
|
||||
if (++count >= max && max != 0) /* Max 0 => no limit */
|
||||
{ ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
|
||||
else
|
||||
{ ADD_NEW(state_offset, count); }
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
if (isinclass) { ADD_NEW(next_state_offset, 0); }
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/* ========================================================================== */
|
||||
/* These are the opcodes for fancy brackets of various kinds. We have
|
||||
to use recursion in order to handle them. The "always failing" assertion
|
||||
(?!) is optimised to OP_FAIL when compiling, so we have to support that,
|
||||
though the other "backtracking verbs" are not supported. */
|
||||
|
||||
case OP_FAIL:
|
||||
break;
|
||||
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
{
|
||||
int rc;
|
||||
int *local_workspace;
|
||||
PCRE2_SIZE *local_offsets;
|
||||
PCRE2_SPTR endasscode = code + GET(code, 1);
|
||||
RWS_anchor *rws = (RWS_anchor *)RWS;
|
||||
|
||||
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
|
||||
{
|
||||
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
|
||||
if (rc != 0) return rc;
|
||||
RWS = (int *)rws;
|
||||
}
|
||||
|
||||
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
|
||||
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
|
||||
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
|
||||
|
||||
rc = internal_dfa_match(
|
||||
mb, /* static match data */
|
||||
code, /* this subexpression's code */
|
||||
ptr, /* where we currently are */
|
||||
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
|
||||
local_offsets, /* offset vector */
|
||||
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
|
||||
local_workspace, /* workspace vector */
|
||||
RWS_RSIZE, /* size of same */
|
||||
rlevel, /* function recursion level */
|
||||
RWS); /* recursion workspace */
|
||||
|
||||
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc;
|
||||
if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
|
||||
{ ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_COND:
|
||||
case OP_SCOND:
|
||||
{
|
||||
int codelink = (int)GET(code, 1);
|
||||
PCRE2_UCHAR condcode;
|
||||
|
||||
/* Because of the way auto-callout works during compile, a callout item
|
||||
is inserted between OP_COND and an assertion condition. This does not
|
||||
happen for the other conditions. */
|
||||
|
||||
if (code[LINK_SIZE + 1] == OP_CALLOUT
|
||||
|| code[LINK_SIZE + 1] == OP_CALLOUT_STR)
|
||||
{
|
||||
PCRE2_SIZE callout_length;
|
||||
rrc = do_callout_dfa(code, offsets, current_subject, ptr, mb,
|
||||
1 + LINK_SIZE, &callout_length);
|
||||
if (rrc < 0) return rrc; /* Abandon */
|
||||
if (rrc > 0) break; /* Fail this thread */
|
||||
code += callout_length; /* Skip callout data */
|
||||
}
|
||||
|
||||
condcode = code[LINK_SIZE+1];
|
||||
|
||||
/* Back reference conditions and duplicate named recursion conditions
|
||||
are not supported */
|
||||
|
||||
if (condcode == OP_CREF || condcode == OP_DNCREF ||
|
||||
condcode == OP_DNRREF)
|
||||
return PCRE2_ERROR_DFA_UCOND;
|
||||
|
||||
/* The DEFINE condition is always false, and the assertion (?!) is
|
||||
converted to OP_FAIL. */
|
||||
|
||||
if (condcode == OP_FALSE || condcode == OP_FAIL)
|
||||
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
|
||||
|
||||
/* There is also an always-true condition */
|
||||
|
||||
else if (condcode == OP_TRUE)
|
||||
{ ADD_ACTIVE(state_offset + LINK_SIZE + 2, 0); }
|
||||
|
||||
/* The only supported version of OP_RREF is for the value RREF_ANY,
|
||||
which means "test if in any recursion". We can't test for specifically
|
||||
recursed groups. */
|
||||
|
||||
else if (condcode == OP_RREF)
|
||||
{
|
||||
unsigned int value = GET2(code, LINK_SIZE + 2);
|
||||
if (value != RREF_ANY) return PCRE2_ERROR_DFA_UCOND;
|
||||
if (mb->recursive != NULL)
|
||||
{ ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
|
||||
else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
|
||||
}
|
||||
|
||||
/* Otherwise, the condition is an assertion */
|
||||
|
||||
else
|
||||
{
|
||||
int rc;
|
||||
int *local_workspace;
|
||||
PCRE2_SIZE *local_offsets;
|
||||
PCRE2_SPTR asscode = code + LINK_SIZE + 1;
|
||||
PCRE2_SPTR endasscode = asscode + GET(asscode, 1);
|
||||
RWS_anchor *rws = (RWS_anchor *)RWS;
|
||||
|
||||
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
|
||||
{
|
||||
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
|
||||
if (rc != 0) return rc;
|
||||
RWS = (int *)rws;
|
||||
}
|
||||
|
||||
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
|
||||
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
|
||||
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
|
||||
|
||||
rc = internal_dfa_match(
|
||||
mb, /* fixed match data */
|
||||
asscode, /* this subexpression's code */
|
||||
ptr, /* where we currently are */
|
||||
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
|
||||
local_offsets, /* offset vector */
|
||||
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
|
||||
local_workspace, /* workspace vector */
|
||||
RWS_RSIZE, /* size of same */
|
||||
rlevel, /* function recursion level */
|
||||
RWS); /* recursion workspace */
|
||||
|
||||
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc;
|
||||
if ((rc >= 0) ==
|
||||
(condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
|
||||
{ ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
|
||||
else
|
||||
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_RECURSE:
|
||||
{
|
||||
int rc;
|
||||
int *local_workspace;
|
||||
PCRE2_SIZE *local_offsets;
|
||||
RWS_anchor *rws = (RWS_anchor *)RWS;
|
||||
PCRE2_SPTR callpat = start_code + GET(code, 1);
|
||||
uint32_t recno = (callpat == mb->start_code)? 0 :
|
||||
GET2(callpat, 1 + LINK_SIZE);
|
||||
|
||||
if (rws->free < RWS_RSIZE + RWS_OVEC_RSIZE)
|
||||
{
|
||||
rc = more_workspace(&rws, RWS_OVEC_RSIZE, mb);
|
||||
if (rc != 0) return rc;
|
||||
RWS = (int *)rws;
|
||||
}
|
||||
|
||||
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
|
||||
local_workspace = ((int *)local_offsets) + RWS_OVEC_RSIZE;
|
||||
rws->free -= RWS_RSIZE + RWS_OVEC_RSIZE;
|
||||
|
||||
/* Check for repeating a recursion without advancing the subject
|
||||
pointer or last used character. This should catch convoluted mutual
|
||||
recursions. (Some simple cases are caught at compile time.) */
|
||||
|
||||
for (dfa_recursion_info *ri = mb->recursive;
|
||||
ri != NULL;
|
||||
ri = ri->prevrec)
|
||||
{
|
||||
if (recno == ri->group_num && ptr == ri->subject_position &&
|
||||
mb->last_used_ptr == ri->last_used_ptr)
|
||||
return PCRE2_ERROR_RECURSELOOP;
|
||||
}
|
||||
|
||||
/* Remember this recursion and where we started it so as to
|
||||
catch infinite loops. */
|
||||
|
||||
new_recursive.group_num = recno;
|
||||
new_recursive.subject_position = ptr;
|
||||
new_recursive.last_used_ptr = mb->last_used_ptr;
|
||||
new_recursive.prevrec = mb->recursive;
|
||||
mb->recursive = &new_recursive;
|
||||
|
||||
rc = internal_dfa_match(
|
||||
mb, /* fixed match data */
|
||||
callpat, /* this subexpression's code */
|
||||
ptr, /* where we currently are */
|
||||
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
|
||||
local_offsets, /* offset vector */
|
||||
RWS_OVEC_RSIZE/OVEC_UNIT, /* size of same */
|
||||
local_workspace, /* workspace vector */
|
||||
RWS_RSIZE, /* size of same */
|
||||
rlevel, /* function recursion level */
|
||||
RWS); /* recursion workspace */
|
||||
|
||||
rws->free += RWS_RSIZE + RWS_OVEC_RSIZE;
|
||||
mb->recursive = new_recursive.prevrec; /* Done this recursion */
|
||||
|
||||
/* Ran out of internal offsets */
|
||||
|
||||
if (rc == 0) return PCRE2_ERROR_DFA_RECURSE;
|
||||
|
||||
/* For each successful matched substring, set up the next state with a
|
||||
count of characters to skip before trying it. Note that the count is in
|
||||
characters, not bytes. */
|
||||
|
||||
if (rc > 0)
|
||||
{
|
||||
for (rc = rc*2 - 2; rc >= 0; rc -= 2)
|
||||
{
|
||||
PCRE2_SIZE charcount = local_offsets[rc+1] - local_offsets[rc];
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (utf)
|
||||
{
|
||||
PCRE2_SPTR p = start_subject + local_offsets[rc];
|
||||
PCRE2_SPTR pp = start_subject + local_offsets[rc+1];
|
||||
while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
|
||||
}
|
||||
#endif
|
||||
if (charcount > 0)
|
||||
{
|
||||
ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0,
|
||||
(int)(charcount - 1));
|
||||
}
|
||||
else
|
||||
{
|
||||
ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (rc != PCRE2_ERROR_NOMATCH) return rc;
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_BRAPOS:
|
||||
case OP_SBRAPOS:
|
||||
case OP_CBRAPOS:
|
||||
case OP_SCBRAPOS:
|
||||
case OP_BRAPOSZERO:
|
||||
{
|
||||
int rc;
|
||||
int *local_workspace;
|
||||
PCRE2_SIZE *local_offsets;
|
||||
PCRE2_SIZE charcount, matched_count;
|
||||
PCRE2_SPTR local_ptr = ptr;
|
||||
RWS_anchor *rws = (RWS_anchor *)RWS;
|
||||
BOOL allow_zero;
|
||||
|
||||
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
|
||||
{
|
||||
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
|
||||
if (rc != 0) return rc;
|
||||
RWS = (int *)rws;
|
||||
}
|
||||
|
||||
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
|
||||
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
|
||||
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
if (codevalue == OP_BRAPOSZERO)
|
||||
{
|
||||
allow_zero = TRUE;
|
||||
++code; /* The following opcode will be one of the above BRAs */
|
||||
}
|
||||
else allow_zero = FALSE;
|
||||
|
||||
/* Loop to match the subpattern as many times as possible as if it were
|
||||
a complete pattern. */
|
||||
|
||||
for (matched_count = 0;; matched_count++)
|
||||
{
|
||||
rc = internal_dfa_match(
|
||||
mb, /* fixed match data */
|
||||
code, /* this subexpression's code */
|
||||
local_ptr, /* where we currently are */
|
||||
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
|
||||
local_offsets, /* offset vector */
|
||||
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
|
||||
local_workspace, /* workspace vector */
|
||||
RWS_RSIZE, /* size of same */
|
||||
rlevel, /* function recursion level */
|
||||
RWS); /* recursion workspace */
|
||||
|
||||
/* Failed to match */
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
if (rc != PCRE2_ERROR_NOMATCH) return rc;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Matched: break the loop if zero characters matched. */
|
||||
|
||||
charcount = local_offsets[1] - local_offsets[0];
|
||||
if (charcount == 0) break;
|
||||
local_ptr += charcount; /* Advance temporary position ptr */
|
||||
}
|
||||
|
||||
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
/* At this point we have matched the subpattern matched_count
|
||||
times, and local_ptr is pointing to the character after the end of the
|
||||
last match. */
|
||||
|
||||
if (matched_count > 0 || allow_zero)
|
||||
{
|
||||
PCRE2_SPTR end_subpattern = code;
|
||||
int next_state_offset;
|
||||
|
||||
do { end_subpattern += GET(end_subpattern, 1); }
|
||||
while (*end_subpattern == OP_ALT);
|
||||
next_state_offset =
|
||||
(int)(end_subpattern - start_code + LINK_SIZE + 1);
|
||||
|
||||
/* Optimization: if there are no more active states, and there
|
||||
are no new states yet set up, then skip over the subject string
|
||||
right here, to save looping. Otherwise, set up the new state to swing
|
||||
into action when the end of the matched substring is reached. */
|
||||
|
||||
if (i + 1 >= active_count && new_count == 0)
|
||||
{
|
||||
ptr = local_ptr;
|
||||
clen = 0;
|
||||
ADD_NEW(next_state_offset, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
PCRE2_SPTR p = ptr;
|
||||
PCRE2_SPTR pp = local_ptr;
|
||||
charcount = (PCRE2_SIZE)(pp - p);
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (utf) while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
|
||||
#endif
|
||||
ADD_NEW_DATA(-next_state_offset, 0, (int)(charcount - 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_ONCE:
|
||||
{
|
||||
int rc;
|
||||
int *local_workspace;
|
||||
PCRE2_SIZE *local_offsets;
|
||||
RWS_anchor *rws = (RWS_anchor *)RWS;
|
||||
|
||||
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
|
||||
{
|
||||
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
|
||||
if (rc != 0) return rc;
|
||||
RWS = (int *)rws;
|
||||
}
|
||||
|
||||
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
|
||||
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
|
||||
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
rc = internal_dfa_match(
|
||||
mb, /* fixed match data */
|
||||
code, /* this subexpression's code */
|
||||
ptr, /* where we currently are */
|
||||
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
|
||||
local_offsets, /* offset vector */
|
||||
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
|
||||
local_workspace, /* workspace vector */
|
||||
RWS_RSIZE, /* size of same */
|
||||
rlevel, /* function recursion level */
|
||||
RWS); /* recursion workspace */
|
||||
|
||||
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
if (rc >= 0)
|
||||
{
|
||||
PCRE2_SPTR end_subpattern = code;
|
||||
PCRE2_SIZE charcount = local_offsets[1] - local_offsets[0];
|
||||
int next_state_offset, repeat_state_offset;
|
||||
|
||||
do { end_subpattern += GET(end_subpattern, 1); }
|
||||
while (*end_subpattern == OP_ALT);
|
||||
next_state_offset =
|
||||
(int)(end_subpattern - start_code + LINK_SIZE + 1);
|
||||
|
||||
/* If the end of this subpattern is KETRMAX or KETRMIN, we must
|
||||
arrange for the repeat state also to be added to the relevant list.
|
||||
Calculate the offset, or set -1 for no repeat. */
|
||||
|
||||
repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
|
||||
*end_subpattern == OP_KETRMIN)?
|
||||
(int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
|
||||
|
||||
/* If we have matched an empty string, add the next state at the
|
||||
current character pointer. This is important so that the duplicate
|
||||
checking kicks in, which is what breaks infinite loops that match an
|
||||
empty string. */
|
||||
|
||||
if (charcount == 0)
|
||||
{
|
||||
ADD_ACTIVE(next_state_offset, 0);
|
||||
}
|
||||
|
||||
/* Optimization: if there are no more active states, and there
|
||||
are no new states yet set up, then skip over the subject string
|
||||
right here, to save looping. Otherwise, set up the new state to swing
|
||||
into action when the end of the matched substring is reached. */
|
||||
|
||||
else if (i + 1 >= active_count && new_count == 0)
|
||||
{
|
||||
ptr += charcount;
|
||||
clen = 0;
|
||||
ADD_NEW(next_state_offset, 0);
|
||||
|
||||
/* If we are adding a repeat state at the new character position,
|
||||
we must fudge things so that it is the only current state.
|
||||
Otherwise, it might be a duplicate of one we processed before, and
|
||||
that would cause it to be skipped. */
|
||||
|
||||
if (repeat_state_offset >= 0)
|
||||
{
|
||||
next_active_state = active_states;
|
||||
active_count = 0;
|
||||
i = -1;
|
||||
ADD_ACTIVE(repeat_state_offset, 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (utf)
|
||||
{
|
||||
PCRE2_SPTR p = start_subject + local_offsets[0];
|
||||
PCRE2_SPTR pp = start_subject + local_offsets[1];
|
||||
while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
|
||||
}
|
||||
#endif
|
||||
ADD_NEW_DATA(-next_state_offset, 0, (int)(charcount - 1));
|
||||
if (repeat_state_offset >= 0)
|
||||
{ ADD_NEW_DATA(-repeat_state_offset, 0, (int)(charcount - 1)); }
|
||||
}
|
||||
}
|
||||
else if (rc != PCRE2_ERROR_NOMATCH) return rc;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* Handle callouts */
|
||||
|
||||
case OP_CALLOUT:
|
||||
case OP_CALLOUT_STR:
|
||||
{
|
||||
PCRE2_SIZE callout_length;
|
||||
rrc = do_callout_dfa(code, offsets, current_subject, ptr, mb, 0,
|
||||
&callout_length);
|
||||
if (rrc < 0) return rrc; /* Abandon */
|
||||
if (rrc == 0)
|
||||
{ ADD_ACTIVE(state_offset + (int)callout_length, 0); }
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
default: /* Unsupported opcode */
|
||||
return PCRE2_ERROR_DFA_UITEM;
|
||||
}
|
||||
|
||||
NEXT_ACTIVE_STATE: continue;
|
||||
|
||||
} /* End of loop scanning active states */
|
||||
|
||||
/* We have finished the processing at the current subject character. If no
|
||||
new states have been set for the next character, we have found all the
|
||||
matches that we are going to find. If partial matching has been requested,
|
||||
check for appropriate conditions.
|
||||
|
||||
The "could_continue" variable is true if a state could have continued but
|
||||
for the fact that the end of the subject was reached. */
|
||||
|
||||
if (new_count <= 0)
|
||||
{
|
||||
if (could_continue && /* Some could go on, and */
|
||||
( /* either... */
|
||||
(mb->moptions & PCRE2_PARTIAL_HARD) != 0 /* Hard partial */
|
||||
|| /* or... */
|
||||
((mb->moptions & PCRE2_PARTIAL_SOFT) != 0 && /* Soft partial and */
|
||||
match_count < 0) /* no matches */
|
||||
) && /* And... */
|
||||
(
|
||||
partial_newline || /* Either partial NL */
|
||||
( /* or ... */
|
||||
ptr >= end_subject && /* End of subject and */
|
||||
( /* either */
|
||||
ptr > mb->start_used_ptr || /* Inspected non-empty string */
|
||||
mb->allowemptypartial /* or pattern has lookbehind */
|
||||
) /* or could match empty */
|
||||
)
|
||||
))
|
||||
match_count = PCRE2_ERROR_PARTIAL;
|
||||
break; /* Exit from loop along the subject string */
|
||||
}
|
||||
|
||||
/* One or more states are active for the next character. */
|
||||
|
||||
ptr += clen; /* Advance to next subject character */
|
||||
} /* Loop to move along the subject string */
|
||||
|
||||
/* Control gets here from "break" a few lines above. If we have a match and
|
||||
PCRE2_ENDANCHORED is set, the match fails. */
|
||||
|
||||
if (match_count >= 0 &&
|
||||
((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0 &&
|
||||
ptr < end_subject)
|
||||
match_count = PCRE2_ERROR_NOMATCH;
|
||||
|
||||
return match_count;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match a pattern using the DFA algorithm *
|
||||
*************************************************/
|
||||
|
||||
/* This function matches a compiled pattern to a subject string, using the
|
||||
alternate matching algorithm that finds all matches at once.
|
||||
|
||||
Arguments:
|
||||
code points to the compiled pattern
|
||||
subject subject string
|
||||
length length of subject string
|
||||
startoffset where to start matching in the subject
|
||||
options option bits
|
||||
match_data points to a match data structure
|
||||
gcontext points to a match context
|
||||
workspace pointer to workspace
|
||||
wscount size of workspace
|
||||
|
||||
Returns: > 0 => number of match offset pairs placed in offsets
|
||||
= 0 => offsets overflowed; longest matches are present
|
||||
-1 => failed to match
|
||||
< -1 => some kind of unexpected problem
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
|
||||
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
|
||||
pcre2_match_context *mcontext, int *workspace, PCRE2_SIZE wscount)
|
||||
{
|
||||
int rc;
|
||||
int was_zero_terminated = 0;
|
||||
|
||||
const pcre2_real_code *re = (const pcre2_real_code *)code;
|
||||
|
||||
PCRE2_SPTR start_match;
|
||||
PCRE2_SPTR end_subject;
|
||||
PCRE2_SPTR bumpalong_limit;
|
||||
PCRE2_SPTR req_cu_ptr;
|
||||
|
||||
BOOL utf, anchored, startline, firstline;
|
||||
BOOL has_first_cu = FALSE;
|
||||
BOOL has_req_cu = FALSE;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
PCRE2_SPTR memchr_found_first_cu = NULL;
|
||||
PCRE2_SPTR memchr_found_first_cu2 = NULL;
|
||||
#endif
|
||||
|
||||
PCRE2_UCHAR first_cu = 0;
|
||||
PCRE2_UCHAR first_cu2 = 0;
|
||||
PCRE2_UCHAR req_cu = 0;
|
||||
PCRE2_UCHAR req_cu2 = 0;
|
||||
|
||||
const uint8_t *start_bits = NULL;
|
||||
|
||||
/* We need to have mb pointing to a match block, because the IS_NEWLINE macro
|
||||
is used below, and it expects NLBLOCK to be defined as a pointer. */
|
||||
|
||||
pcre2_callout_block cb;
|
||||
dfa_match_block actual_match_block;
|
||||
dfa_match_block *mb = &actual_match_block;
|
||||
|
||||
/* Set up a starting block of memory for use during recursive calls to
|
||||
internal_dfa_match(). By putting this on the stack, it minimizes resource use
|
||||
in the case when it is not needed. If this is too small, more memory is
|
||||
obtained from the heap. At the start of each block is an anchor structure.*/
|
||||
|
||||
int base_recursion_workspace[RWS_BASE_SIZE];
|
||||
RWS_anchor *rws = (RWS_anchor *)base_recursion_workspace;
|
||||
rws->next = NULL;
|
||||
rws->size = RWS_BASE_SIZE;
|
||||
rws->free = RWS_BASE_SIZE - RWS_ANCHOR_SIZE;
|
||||
|
||||
/* Recognize NULL, length 0 as an empty string. */
|
||||
|
||||
if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
|
||||
|
||||
/* Plausibility checks */
|
||||
|
||||
if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
|
||||
if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
|
||||
return PCRE2_ERROR_NULL;
|
||||
|
||||
if (length == PCRE2_ZERO_TERMINATED)
|
||||
{
|
||||
length = PRIV(strlen)(subject);
|
||||
was_zero_terminated = 1;
|
||||
}
|
||||
|
||||
if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE;
|
||||
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
|
||||
|
||||
/* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
|
||||
time. */
|
||||
|
||||
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 &&
|
||||
((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
/* Invalid UTF support is not available for DFA matching. */
|
||||
|
||||
if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
|
||||
return PCRE2_ERROR_DFA_UINVALID_UTF;
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
return with PCRE2_ERROR_BADMAGIC. */
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||
|
||||
/* Check the code unit width. */
|
||||
|
||||
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
|
||||
return PCRE2_ERROR_BADMODE;
|
||||
|
||||
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
|
||||
options variable for this function. Users of PCRE2 who are not calling the
|
||||
function directly would like to have a way of setting these flags, in the same
|
||||
way that they can set pcre2_compile() flags like PCRE2_NO_AUTO_POSSESS with
|
||||
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
|
||||
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
|
||||
transferred to the options for this function. The bits are guaranteed to be
|
||||
adjacent, but do not have the same values. This bit of Boolean trickery assumes
|
||||
that the match-time bits are not more significant than the flag bits. If by
|
||||
accident this is not the case, a compile-time division by zero error will
|
||||
occur. */
|
||||
|
||||
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
|
||||
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
|
||||
options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
|
||||
#undef FF
|
||||
#undef OO
|
||||
|
||||
/* If restarting after a partial match, do some sanity checks on the contents
|
||||
of the workspace. */
|
||||
|
||||
if ((options & PCRE2_DFA_RESTART) != 0)
|
||||
{
|
||||
if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
|
||||
workspace[1] > (int)((wscount - 2)/INTS_PER_STATEBLOCK))
|
||||
return PCRE2_ERROR_DFA_BADRESTART;
|
||||
}
|
||||
|
||||
/* Set some local values */
|
||||
|
||||
utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
start_match = subject + start_offset;
|
||||
end_subject = subject + length;
|
||||
req_cu_ptr = start_match - 1;
|
||||
anchored = (options & (PCRE2_ANCHORED|PCRE2_DFA_RESTART)) != 0 ||
|
||||
(re->overall_options & PCRE2_ANCHORED) != 0;
|
||||
|
||||
/* The "must be at the start of a line" flags are used in a loop when finding
|
||||
where to start. */
|
||||
|
||||
startline = (re->flags & PCRE2_STARTLINE) != 0;
|
||||
firstline = !anchored && (re->overall_options & PCRE2_FIRSTLINE) != 0;
|
||||
bumpalong_limit = end_subject;
|
||||
|
||||
/* Initialize and set up the fixed fields in the callout block, with a pointer
|
||||
in the match block. */
|
||||
|
||||
mb->cb = &cb;
|
||||
cb.version = 2;
|
||||
cb.subject = subject;
|
||||
cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
|
||||
cb.callout_flags = 0;
|
||||
cb.capture_top = 1; /* No capture support */
|
||||
cb.capture_last = 0;
|
||||
cb.mark = NULL; /* No (*MARK) support */
|
||||
|
||||
/* Get data from the match context, if present, and fill in the remaining
|
||||
fields in the match block. It is an error to set an offset limit without
|
||||
setting the flag at compile time. */
|
||||
|
||||
if (mcontext == NULL)
|
||||
{
|
||||
mb->callout = NULL;
|
||||
mb->memctl = re->memctl;
|
||||
mb->match_limit = PRIV(default_match_context).match_limit;
|
||||
mb->match_limit_depth = PRIV(default_match_context).depth_limit;
|
||||
mb->heap_limit = PRIV(default_match_context).heap_limit;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (mcontext->offset_limit != PCRE2_UNSET)
|
||||
{
|
||||
if ((re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
|
||||
return PCRE2_ERROR_BADOFFSETLIMIT;
|
||||
bumpalong_limit = subject + mcontext->offset_limit;
|
||||
}
|
||||
mb->callout = mcontext->callout;
|
||||
mb->callout_data = mcontext->callout_data;
|
||||
mb->memctl = mcontext->memctl;
|
||||
mb->match_limit = mcontext->match_limit;
|
||||
mb->match_limit_depth = mcontext->depth_limit;
|
||||
mb->heap_limit = mcontext->heap_limit;
|
||||
}
|
||||
|
||||
if (mb->match_limit > re->limit_match)
|
||||
mb->match_limit = re->limit_match;
|
||||
|
||||
if (mb->match_limit_depth > re->limit_depth)
|
||||
mb->match_limit_depth = re->limit_depth;
|
||||
|
||||
if (mb->heap_limit > re->limit_heap)
|
||||
mb->heap_limit = re->limit_heap;
|
||||
|
||||
mb->start_code = (PCRE2_SPTR)((const uint8_t *)re + re->code_start);
|
||||
mb->tables = re->tables;
|
||||
mb->start_subject = subject;
|
||||
mb->end_subject = end_subject;
|
||||
mb->start_offset = start_offset;
|
||||
mb->allowemptypartial = (re->max_lookbehind > 0) ||
|
||||
(re->flags & PCRE2_MATCH_EMPTY) != 0;
|
||||
mb->moptions = options;
|
||||
mb->poptions = re->overall_options;
|
||||
mb->match_call_count = 0;
|
||||
mb->heap_used = 0;
|
||||
|
||||
/* Process the \R and newline settings. */
|
||||
|
||||
mb->bsr_convention = re->bsr_convention;
|
||||
mb->nltype = NLTYPE_FIXED;
|
||||
switch(re->newline_convention)
|
||||
{
|
||||
case PCRE2_NEWLINE_CR:
|
||||
mb->nllen = 1;
|
||||
mb->nl[0] = CHAR_CR;
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_LF:
|
||||
mb->nllen = 1;
|
||||
mb->nl[0] = CHAR_NL;
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_NUL:
|
||||
mb->nllen = 1;
|
||||
mb->nl[0] = CHAR_NUL;
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_CRLF:
|
||||
mb->nllen = 2;
|
||||
mb->nl[0] = CHAR_CR;
|
||||
mb->nl[1] = CHAR_NL;
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_ANY:
|
||||
mb->nltype = NLTYPE_ANY;
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_ANYCRLF:
|
||||
mb->nltype = NLTYPE_ANYCRLF;
|
||||
break;
|
||||
|
||||
default:
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return PCRE2_ERROR_INTERNAL;
|
||||
}
|
||||
|
||||
/* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
|
||||
we must also check that a starting offset does not point into the middle of a
|
||||
multiunit character. We check only the portion of the subject that is going to
|
||||
be inspected during matching - from the offset minus the maximum back reference
|
||||
to the given length. This saves time when a small part of a large subject is
|
||||
being matched by the use of a starting offset. Note that the maximum lookbehind
|
||||
is a number of characters, not code units. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||
{
|
||||
PCRE2_SPTR check_subject = start_match; /* start_match includes offset */
|
||||
|
||||
if (start_offset > 0)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
unsigned int i;
|
||||
if (start_match < end_subject && NOT_FIRSTCU(*start_match))
|
||||
return PCRE2_ERROR_BADUTFOFFSET;
|
||||
for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--)
|
||||
{
|
||||
check_subject--;
|
||||
while (check_subject > subject &&
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
(*check_subject & 0xc0) == 0x80)
|
||||
#else /* 16-bit */
|
||||
(*check_subject & 0xfc00) == 0xdc00)
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
check_subject--;
|
||||
}
|
||||
#else /* In the 32-bit library, one code unit equals one character. */
|
||||
check_subject -= re->max_lookbehind;
|
||||
if (check_subject < subject) check_subject = subject;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
}
|
||||
|
||||
/* Validate the relevant portion of the subject. After an error, adjust the
|
||||
offset to be an absolute offset in the whole string. */
|
||||
|
||||
match_data->rc = PRIV(valid_utf)(check_subject,
|
||||
length - (PCRE2_SIZE)(check_subject - subject), &(match_data->startchar));
|
||||
if (match_data->rc != 0)
|
||||
{
|
||||
match_data->startchar += (PCRE2_SIZE)(check_subject - subject);
|
||||
return match_data->rc;
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Set up the first code unit to match, if available. If there's no first code
|
||||
unit there may be a bitmap of possible first characters. */
|
||||
|
||||
if ((re->flags & PCRE2_FIRSTSET) != 0)
|
||||
{
|
||||
has_first_cu = TRUE;
|
||||
first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
|
||||
if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
|
||||
{
|
||||
first_cu2 = TABLE_GET(first_cu, mb->tables + fcc_offset, first_cu);
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (first_cu > 127 && !utf && (re->overall_options & PCRE2_UCP) != 0)
|
||||
first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
|
||||
#else
|
||||
if (first_cu > 127 && (utf || (re->overall_options & PCRE2_UCP) != 0))
|
||||
first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
|
||||
#endif
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
}
|
||||
else
|
||||
if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
|
||||
start_bits = re->start_bitmap;
|
||||
|
||||
/* There may be a "last known required code unit" set. */
|
||||
|
||||
if ((re->flags & PCRE2_LASTSET) != 0)
|
||||
{
|
||||
has_req_cu = TRUE;
|
||||
req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit);
|
||||
if ((re->flags & PCRE2_LASTCASELESS) != 0)
|
||||
{
|
||||
req_cu2 = TABLE_GET(req_cu, mb->tables + fcc_offset, req_cu);
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (req_cu > 127 && !utf && (re->overall_options & PCRE2_UCP) != 0)
|
||||
req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu);
|
||||
#else
|
||||
if (req_cu > 127 && (utf || (re->overall_options & PCRE2_UCP) != 0))
|
||||
req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu);
|
||||
#endif
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
}
|
||||
|
||||
/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
|
||||
free the memory that was obtained. */
|
||||
|
||||
if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
|
||||
{
|
||||
match_data->memctl.free((void *)match_data->subject,
|
||||
match_data->memctl.memory_data);
|
||||
match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
|
||||
}
|
||||
|
||||
/* Fill in fields that are always returned in the match data. */
|
||||
|
||||
match_data->code = re;
|
||||
match_data->subject = NULL; /* Default for no match */
|
||||
match_data->mark = NULL;
|
||||
match_data->matchedby = PCRE2_MATCHEDBY_DFA_INTERPRETER;
|
||||
|
||||
/* Call the main matching function, looping for a non-anchored regex after a
|
||||
failed match. If not restarting, perform certain optimizations at the start of
|
||||
a match. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
/* ----------------- Start of match optimizations ---------------- */
|
||||
|
||||
/* There are some optimizations that avoid running the match if a known
|
||||
starting point is not found, or if a known later code unit is not present.
|
||||
However, there is an option (settable at compile time) that disables
|
||||
these, for testing and for ensuring that all callouts do actually occur.
|
||||
The optimizations must also be avoided when restarting a DFA match. */
|
||||
|
||||
if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0 &&
|
||||
(options & PCRE2_DFA_RESTART) == 0)
|
||||
{
|
||||
/* If firstline is TRUE, the start of the match is constrained to the first
|
||||
line of a multiline string. That is, the match must be before or at the
|
||||
first newline following the start of matching. Temporarily adjust
|
||||
end_subject so that we stop the optimization scans for a first code unit
|
||||
immediately after the first character of a newline (the first code unit can
|
||||
legitimately be a newline). If the match fails at the newline, later code
|
||||
breaks this loop. */
|
||||
|
||||
if (firstline)
|
||||
{
|
||||
PCRE2_SPTR t = start_match;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
while (t < end_subject && !IS_NEWLINE(t))
|
||||
{
|
||||
t++;
|
||||
ACROSSCHAR(t < end_subject, t, t++);
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
while (t < end_subject && !IS_NEWLINE(t)) t++;
|
||||
end_subject = t;
|
||||
}
|
||||
|
||||
/* Anchored: check the first code unit if one is recorded. This may seem
|
||||
pointless but it can help in detecting a no match case without scanning for
|
||||
the required code unit. */
|
||||
|
||||
if (anchored)
|
||||
{
|
||||
if (has_first_cu || start_bits != NULL)
|
||||
{
|
||||
BOOL ok = start_match < end_subject;
|
||||
if (ok)
|
||||
{
|
||||
PCRE2_UCHAR c = UCHAR21TEST(start_match);
|
||||
ok = has_first_cu && (c == first_cu || c == first_cu2);
|
||||
if (!ok && start_bits != NULL)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (c > 255) c = 255;
|
||||
#endif
|
||||
ok = (start_bits[c/8] & (1u << (c&7))) != 0;
|
||||
}
|
||||
}
|
||||
if (!ok) break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Not anchored. Advance to a unique first code unit if there is one. */
|
||||
|
||||
else
|
||||
{
|
||||
if (has_first_cu)
|
||||
{
|
||||
if (first_cu != first_cu2) /* Caseless */
|
||||
{
|
||||
/* In 16-bit and 32_bit modes we have to do our own search, so can
|
||||
look for both cases at once. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
PCRE2_UCHAR smc;
|
||||
while (start_match < end_subject &&
|
||||
(smc = UCHAR21TEST(start_match)) != first_cu &&
|
||||
smc != first_cu2)
|
||||
start_match++;
|
||||
#else
|
||||
/* In 8-bit mode, the use of memchr() gives a big speed up, even
|
||||
though we have to call it twice in order to find the earliest
|
||||
occurrence of the code unit in either of its cases. Caching is used
|
||||
to remember the positions of previously found code units. This can
|
||||
make a huge difference when the strings are very long and only one
|
||||
case is actually present. */
|
||||
|
||||
PCRE2_SPTR pp1 = NULL;
|
||||
PCRE2_SPTR pp2 = NULL;
|
||||
PCRE2_SIZE searchlength = end_subject - start_match;
|
||||
|
||||
/* If we haven't got a previously found position for first_cu, or if
|
||||
the current starting position is later, we need to do a search. If
|
||||
the code unit is not found, set it to the end. */
|
||||
|
||||
if (memchr_found_first_cu == NULL ||
|
||||
start_match > memchr_found_first_cu)
|
||||
{
|
||||
pp1 = memchr(start_match, first_cu, searchlength);
|
||||
memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1;
|
||||
}
|
||||
|
||||
/* If the start is before a previously found position, use the
|
||||
previous position, or NULL if a previous search failed. */
|
||||
|
||||
else pp1 = (memchr_found_first_cu == end_subject)? NULL :
|
||||
memchr_found_first_cu;
|
||||
|
||||
/* Do the same thing for the other case. */
|
||||
|
||||
if (memchr_found_first_cu2 == NULL ||
|
||||
start_match > memchr_found_first_cu2)
|
||||
{
|
||||
pp2 = memchr(start_match, first_cu2, searchlength);
|
||||
memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2;
|
||||
}
|
||||
|
||||
else pp2 = (memchr_found_first_cu2 == end_subject)? NULL :
|
||||
memchr_found_first_cu2;
|
||||
|
||||
/* Set the start to the end of the subject if neither case was found.
|
||||
Otherwise, use the earlier found point. */
|
||||
|
||||
if (pp1 == NULL)
|
||||
start_match = (pp2 == NULL)? end_subject : pp2;
|
||||
else
|
||||
start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
|
||||
|
||||
#endif /* 8-bit handling */
|
||||
}
|
||||
|
||||
/* The caseful case is much simpler. */
|
||||
|
||||
else
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
while (start_match < end_subject && UCHAR21TEST(start_match) !=
|
||||
first_cu)
|
||||
start_match++;
|
||||
#else /* 8-bit code units */
|
||||
start_match = memchr(start_match, first_cu, end_subject - start_match);
|
||||
if (start_match == NULL) start_match = end_subject;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* If we can't find the required code unit, having reached the true end
|
||||
of the subject, break the bumpalong loop, to force a match failure,
|
||||
except when doing partial matching, when we let the next cycle run at
|
||||
the end of the subject. To see why, consider the pattern /(?<=abc)def/,
|
||||
which partially matches "abc", even though the string does not contain
|
||||
the starting character "d". If we have not reached the true end of the
|
||||
subject (PCRE2_FIRSTLINE caused end_subject to be temporarily modified)
|
||||
we also let the cycle run, because the matching string is legitimately
|
||||
allowed to start with the first code unit of a newline. */
|
||||
|
||||
if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0 &&
|
||||
start_match >= mb->end_subject)
|
||||
break;
|
||||
}
|
||||
|
||||
/* If there's no first code unit, advance to just after a linebreak for a
|
||||
multiline match if required. */
|
||||
|
||||
else if (startline)
|
||||
{
|
||||
if (start_match > mb->start_subject + start_offset)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
||||
{
|
||||
start_match++;
|
||||
ACROSSCHAR(start_match < end_subject, start_match, start_match++);
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
||||
start_match++;
|
||||
|
||||
/* If we have just passed a CR and the newline option is ANY or
|
||||
ANYCRLF, and we are now at a LF, advance the match position by one
|
||||
more code unit. */
|
||||
|
||||
if (start_match[-1] == CHAR_CR &&
|
||||
(mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
|
||||
start_match < end_subject &&
|
||||
UCHAR21TEST(start_match) == CHAR_NL)
|
||||
start_match++;
|
||||
}
|
||||
}
|
||||
|
||||
/* If there's no first code unit or a requirement for a multiline line
|
||||
start, advance to a non-unique first code unit if any have been
|
||||
identified. The bitmap contains only 256 bits. When code units are 16 or
|
||||
32 bits wide, all code units greater than 254 set the 255 bit. */
|
||||
|
||||
else if (start_bits != NULL)
|
||||
{
|
||||
while (start_match < end_subject)
|
||||
{
|
||||
uint32_t c = UCHAR21TEST(start_match);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (c > 255) c = 255;
|
||||
#endif
|
||||
if ((start_bits[c/8] & (1u << (c&7))) != 0) break;
|
||||
start_match++;
|
||||
}
|
||||
|
||||
/* See comment above in first_cu checking about the next line. */
|
||||
|
||||
if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0 &&
|
||||
start_match >= mb->end_subject)
|
||||
break;
|
||||
}
|
||||
} /* End of first code unit handling */
|
||||
|
||||
/* Restore fudged end_subject */
|
||||
|
||||
end_subject = mb->end_subject;
|
||||
|
||||
/* The following two optimizations are disabled for partial matching. */
|
||||
|
||||
if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0)
|
||||
{
|
||||
PCRE2_SPTR p;
|
||||
|
||||
/* The minimum matching length is a lower bound; no actual string of that
|
||||
length may actually match the pattern. Although the value is, strictly,
|
||||
in characters, we treat it as code units to avoid spending too much time
|
||||
in this optimization. */
|
||||
|
||||
if (end_subject - start_match < re->minlength) goto NOMATCH_EXIT;
|
||||
|
||||
/* If req_cu is set, we know that that code unit must appear in the
|
||||
subject for the match to succeed. If the first code unit is set, req_cu
|
||||
must be later in the subject; otherwise the test starts at the match
|
||||
point. This optimization can save a huge amount of backtracking in
|
||||
patterns with nested unlimited repeats that aren't going to match.
|
||||
Writing separate code for cased/caseless versions makes it go faster, as
|
||||
does using an autoincrement and backing off on a match. As in the case of
|
||||
the first code unit, using memchr() in the 8-bit library gives a big
|
||||
speed up. Unlike the first_cu check above, we do not need to call
|
||||
memchr() twice in the caseless case because we only need to check for the
|
||||
presence of the character in either case, not find the first occurrence.
|
||||
|
||||
The search can be skipped if the code unit was found later than the
|
||||
current starting point in a previous iteration of the bumpalong loop.
|
||||
|
||||
HOWEVER: when the subject string is very, very long, searching to its end
|
||||
can take a long time, and give bad performance on quite ordinary
|
||||
patterns. This showed up when somebody was matching something like
|
||||
/^\d+C/ on a 32-megabyte string... so we don't do this when the string is
|
||||
sufficiently long, but it's worth searching a lot more for unanchored
|
||||
patterns. */
|
||||
|
||||
p = start_match + (has_first_cu? 1:0);
|
||||
if (has_req_cu && p > req_cu_ptr)
|
||||
{
|
||||
PCRE2_SIZE check_length = end_subject - start_match;
|
||||
|
||||
if (check_length < REQ_CU_MAX ||
|
||||
(!anchored && check_length < REQ_CU_MAX * 1000))
|
||||
{
|
||||
if (req_cu != req_cu2) /* Caseless */
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
while (p < end_subject)
|
||||
{
|
||||
uint32_t pp = UCHAR21INCTEST(p);
|
||||
if (pp == req_cu || pp == req_cu2) { p--; break; }
|
||||
}
|
||||
#else /* 8-bit code units */
|
||||
PCRE2_SPTR pp = p;
|
||||
p = memchr(pp, req_cu, end_subject - pp);
|
||||
if (p == NULL)
|
||||
{
|
||||
p = memchr(pp, req_cu2, end_subject - pp);
|
||||
if (p == NULL) p = end_subject;
|
||||
}
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
|
||||
}
|
||||
|
||||
/* The caseful case */
|
||||
|
||||
else
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
while (p < end_subject)
|
||||
{
|
||||
if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
|
||||
}
|
||||
|
||||
#else /* 8-bit code units */
|
||||
p = memchr(p, req_cu, end_subject - p);
|
||||
if (p == NULL) p = end_subject;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* If we can't find the required code unit, break the matching loop,
|
||||
forcing a match failure. */
|
||||
|
||||
if (p >= end_subject) break;
|
||||
|
||||
/* If we have found the required code unit, save the point where we
|
||||
found it, so that we don't search again next time round the loop if
|
||||
the start hasn't passed this code unit yet. */
|
||||
|
||||
req_cu_ptr = p;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------ End of start of match optimizations ------------ */
|
||||
|
||||
/* Give no match if we have passed the bumpalong limit. */
|
||||
|
||||
if (start_match > bumpalong_limit) break;
|
||||
|
||||
/* OK, now we can do the business */
|
||||
|
||||
mb->start_used_ptr = start_match;
|
||||
mb->last_used_ptr = start_match;
|
||||
mb->recursive = NULL;
|
||||
|
||||
rc = internal_dfa_match(
|
||||
mb, /* fixed match data */
|
||||
mb->start_code, /* this subexpression's code */
|
||||
start_match, /* where we currently are */
|
||||
start_offset, /* start offset in subject */
|
||||
match_data->ovector, /* offset vector */
|
||||
(uint32_t)match_data->oveccount * 2, /* actual size of same */
|
||||
workspace, /* workspace vector */
|
||||
(int)wscount, /* size of same */
|
||||
0, /* function recurse level */
|
||||
base_recursion_workspace); /* initial workspace for recursion */
|
||||
|
||||
/* Anything other than "no match" means we are done, always; otherwise, carry
|
||||
on only if not anchored. */
|
||||
|
||||
if (rc != PCRE2_ERROR_NOMATCH || anchored)
|
||||
{
|
||||
if (rc == PCRE2_ERROR_PARTIAL && match_data->oveccount > 0)
|
||||
{
|
||||
match_data->ovector[0] = (PCRE2_SIZE)(start_match - subject);
|
||||
match_data->ovector[1] = (PCRE2_SIZE)(end_subject - subject);
|
||||
}
|
||||
match_data->subject_length = length;
|
||||
match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject);
|
||||
match_data->rightchar = (PCRE2_SIZE)(mb->last_used_ptr - subject);
|
||||
match_data->startchar = (PCRE2_SIZE)(start_match - subject);
|
||||
match_data->rc = rc;
|
||||
|
||||
if (rc >= 0 &&(options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
|
||||
{
|
||||
length = CU2BYTES(length + was_zero_terminated);
|
||||
match_data->subject = match_data->memctl.malloc(length,
|
||||
match_data->memctl.memory_data);
|
||||
if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
memcpy((void *)match_data->subject, subject, length);
|
||||
match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rc >= 0 || rc == PCRE2_ERROR_PARTIAL) match_data->subject = subject;
|
||||
}
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* Advance to the next subject character unless we are at the end of a line
|
||||
and firstline is set. */
|
||||
|
||||
if (firstline && IS_NEWLINE(start_match)) break;
|
||||
start_match++;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
ACROSSCHAR(start_match < end_subject, start_match, start_match++);
|
||||
}
|
||||
#endif
|
||||
if (start_match > end_subject) break;
|
||||
|
||||
/* If we have just passed a CR and we are now at a LF, and the pattern does
|
||||
not contain any explicit matches for \r or \n, and the newline option is CRLF
|
||||
or ANY or ANYCRLF, advance the match position by one more character. */
|
||||
|
||||
if (UCHAR21TEST(start_match - 1) == CHAR_CR &&
|
||||
start_match < end_subject &&
|
||||
UCHAR21TEST(start_match) == CHAR_NL &&
|
||||
(re->flags & PCRE2_HASCRORLF) == 0 &&
|
||||
(mb->nltype == NLTYPE_ANY ||
|
||||
mb->nltype == NLTYPE_ANYCRLF ||
|
||||
mb->nllen == 2))
|
||||
start_match++;
|
||||
|
||||
} /* "Bumpalong" loop */
|
||||
|
||||
NOMATCH_EXIT:
|
||||
rc = PCRE2_ERROR_NOMATCH;
|
||||
|
||||
EXIT:
|
||||
while (rws->next != NULL)
|
||||
{
|
||||
RWS_anchor *next = rws->next;
|
||||
rws->next = next->next;
|
||||
mb->memctl.free(next, mb->memctl.memory_data);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* These #undefs are here to enable unity builds with CMake. */
|
||||
|
||||
#undef NLBLOCK /* Block containing newline information */
|
||||
#undef PSSTART /* Field containing processed string start */
|
||||
#undef PSEND /* Field containing processed string end */
|
||||
|
||||
/* End of pcre2_dfa_match.c */
|
||||
297
3rd/pcre2/src/pcre2_dftables.c
Normal file
297
3rd/pcre2/src/pcre2_dftables.c
Normal file
@@ -0,0 +1,297 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This is a freestanding support program to generate a file containing
|
||||
character tables for PCRE2. The tables are built using the pcre2_maketables()
|
||||
function, which is part of the PCRE2 API. By default, the system's "C" locale
|
||||
is used rather than what the building user happens to have set, but the -L
|
||||
option can be used to select the current locale from the LC_ALL environment
|
||||
variable. By default, the tables are written in source form, but if -b is
|
||||
given, they are written in binary. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <locale.h>
|
||||
|
||||
#define PCRE2_DFTABLES /* for pcre2_internal.h, pcre2_maketables.c */
|
||||
|
||||
#define PCRE2_CODE_UNIT_WIDTH 0 /* Must be set, but not relevant here */
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#include "pcre2_maketables.c"
|
||||
|
||||
|
||||
static const char *classlist[] =
|
||||
{
|
||||
"space", "xdigit", "digit", "upper", "lower",
|
||||
"word", "graph", "print", "punct", "cntrl"
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Usage *
|
||||
*************************************************/
|
||||
|
||||
static void
|
||||
usage(void)
|
||||
{
|
||||
(void)fprintf(stderr,
|
||||
"Usage: pcre2_dftables [options] <output file>\n"
|
||||
" -b Write output in binary (default is source code)\n"
|
||||
" -L Use locale from LC_ALL (default is \"C\" locale)\n"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Entry point *
|
||||
*************************************************/
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
FILE *f;
|
||||
int i;
|
||||
int nclass = 0;
|
||||
BOOL binary = FALSE;
|
||||
char *env = (char *)"C";
|
||||
const uint8_t *tables;
|
||||
const uint8_t *base_of_tables;
|
||||
|
||||
/* Process options */
|
||||
|
||||
for (i = 1; i < argc; i++)
|
||||
{
|
||||
char *arg = argv[i];
|
||||
if (*arg != '-') break;
|
||||
|
||||
if (strcmp(arg, "-help") == 0 || strcmp(arg, "--help") == 0)
|
||||
{
|
||||
usage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
else if (strcmp(arg, "-L") == 0)
|
||||
{
|
||||
if (setlocale(LC_ALL, "") == NULL)
|
||||
{
|
||||
(void)fprintf(stderr, "pcre2_dftables: setlocale() failed\n");
|
||||
return 1;
|
||||
}
|
||||
env = getenv("LC_ALL");
|
||||
}
|
||||
|
||||
else if (strcmp(arg, "-b") == 0)
|
||||
binary = TRUE;
|
||||
|
||||
else
|
||||
{
|
||||
(void)fprintf(stderr, "pcre2_dftables: unrecognized option %s\n", arg);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (i != argc - 1)
|
||||
{
|
||||
(void)fprintf(stderr, "pcre2_dftables: one filename argument is required\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Make the tables */
|
||||
|
||||
tables = maketables();
|
||||
base_of_tables = tables;
|
||||
|
||||
f = fopen(argv[i], "wb");
|
||||
if (f == NULL)
|
||||
{
|
||||
fprintf(stderr, "pcre2_dftables: failed to open %s for writing\n", argv[1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* If -b was specified, we write the tables in binary. */
|
||||
|
||||
if (binary)
|
||||
{
|
||||
int yield = 0;
|
||||
size_t len = fwrite(tables, 1, TABLES_LENGTH, f);
|
||||
if (len != TABLES_LENGTH)
|
||||
{
|
||||
(void)fprintf(stderr, "pcre2_dftables: fwrite() returned wrong length %d "
|
||||
"instead of %d\n", (int)len, TABLES_LENGTH);
|
||||
yield = 1;
|
||||
}
|
||||
fclose(f);
|
||||
free((void *)base_of_tables);
|
||||
return yield;
|
||||
}
|
||||
|
||||
/* Write the tables as source code for inclusion in the PCRE2 library. There
|
||||
are several fprintf() calls here, because gcc in pedantic mode complains about
|
||||
the very long string otherwise. */
|
||||
|
||||
(void)fprintf(f,
|
||||
"/*************************************************\n"
|
||||
"* Perl-Compatible Regular Expressions *\n"
|
||||
"*************************************************/\n\n"
|
||||
"/* This file was automatically written by the pcre2_dftables auxiliary\n"
|
||||
"program. It contains character tables that are used when no external\n"
|
||||
"tables are passed to PCRE2 by the application that calls it. The tables\n"
|
||||
"are used only for characters whose code values are less than 256, and\n"
|
||||
"only relevant if not in UCP mode. */\n\n");
|
||||
|
||||
(void)fprintf(f,
|
||||
"/* This set of tables was written in the %s locale. */\n\n", env);
|
||||
|
||||
(void)fprintf(f,
|
||||
"/* The pcre2_ftables program (which is distributed with PCRE2) can be used\n"
|
||||
"to build alternative versions of this file. This is necessary if you are\n"
|
||||
"running in an EBCDIC environment, or if you want to default to a different\n"
|
||||
"encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates\n"
|
||||
"these tables in the \"C\" locale by default. This happens automatically if\n"
|
||||
"PCRE2 is configured with --enable-rebuild-chartables. However, you can run\n"
|
||||
"pcre2_dftables manually with the -L option to build tables using the LC_ALL\n"
|
||||
"locale. */\n\n");
|
||||
|
||||
/* Force config.h in z/OS */
|
||||
|
||||
#if defined NATIVE_ZOS
|
||||
(void)fprintf(f,
|
||||
"/* For z/OS, config.h is forced */\n"
|
||||
"#ifndef HAVE_CONFIG_H\n"
|
||||
"#define HAVE_CONFIG_H 1\n"
|
||||
"#endif\n\n");
|
||||
#endif
|
||||
|
||||
(void)fprintf(f,
|
||||
"#ifdef HAVE_CONFIG_H\n"
|
||||
"#include \"config.h\"\n"
|
||||
"#endif\n\n"
|
||||
"#include \"pcre2_internal.h\"\n\n");
|
||||
|
||||
(void)fprintf(f,
|
||||
"const uint8_t PRIV(default_tables)[] = {\n\n"
|
||||
"/* This table is a lower casing table. */\n\n");
|
||||
|
||||
(void)fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||
fprintf(f, "%3d", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
(void)fprintf(f, ",\n\n");
|
||||
|
||||
(void)fprintf(f, "/* This table is a case flipping table. */\n\n");
|
||||
|
||||
(void)fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||
fprintf(f, "%3d", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
(void)fprintf(f, ",\n\n");
|
||||
|
||||
(void)fprintf(f,
|
||||
"/* This table contains bit maps for various character classes. Each map is 32\n"
|
||||
"bytes long and the bits run from the least significant end of each byte. The\n"
|
||||
"classes that have their own maps are: space, xdigit, digit, upper, lower, word,\n"
|
||||
"graph, print, punct, and cntrl. Other classes are built from combinations. */\n\n");
|
||||
|
||||
(void)fprintf(f, " ");
|
||||
for (i = 0; i < cbit_length; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0)
|
||||
{
|
||||
if ((i & 31) == 0) (void)fprintf(f, "\n");
|
||||
if ((i & 24) == 8) (void)fprintf(f, " /* %s */", classlist[nclass++]);
|
||||
(void)fprintf(f, "\n ");
|
||||
}
|
||||
(void)fprintf(f, "0x%02x", *tables++);
|
||||
if (i != cbit_length - 1) (void)fprintf(f, ",");
|
||||
}
|
||||
(void)fprintf(f, ",\n\n");
|
||||
|
||||
(void)fprintf(f,
|
||||
"/* This table identifies various classes of character by individual bits:\n"
|
||||
" 0x%02x white space character\n"
|
||||
" 0x%02x letter\n"
|
||||
" 0x%02x lower case letter\n"
|
||||
" 0x%02x decimal digit\n"
|
||||
" 0x%02x word (alphanumeric or '_')\n*/\n\n",
|
||||
ctype_space, ctype_letter, ctype_lcletter, ctype_digit, ctype_word);
|
||||
|
||||
(void)fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0)
|
||||
{
|
||||
(void)fprintf(f, " /* ");
|
||||
if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
|
||||
else (void)fprintf(f, "%3d-", i-8);
|
||||
if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
|
||||
else (void)fprintf(f, "%3d", i-1);
|
||||
(void)fprintf(f, " */\n ");
|
||||
}
|
||||
(void)fprintf(f, "0x%02x", *tables++);
|
||||
if (i != 255) (void)fprintf(f, ",");
|
||||
}
|
||||
|
||||
(void)fprintf(f, "};/* ");
|
||||
if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
|
||||
else (void)fprintf(f, "%3d-", i-8);
|
||||
if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
|
||||
else (void)fprintf(f, "%3d", i-1);
|
||||
(void)fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n");
|
||||
|
||||
fclose(f);
|
||||
free((void *)base_of_tables);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre2_dftables.c */
|
||||
367
3rd/pcre2/src/pcre2_error.c
Normal file
367
3rd/pcre2/src/pcre2_error.c
Normal file
@@ -0,0 +1,367 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#define STRING(a) # a
|
||||
#define XSTRING(s) STRING(s)
|
||||
|
||||
/* The texts of compile-time error messages. Compile-time error numbers start
|
||||
at COMPILE_ERROR_BASE (100).
|
||||
|
||||
This used to be a table of strings, but in order to reduce the number of
|
||||
relocations needed when a shared library is loaded dynamically, it is now one
|
||||
long string. We cannot use a table of offsets, because the lengths of inserts
|
||||
such as XSTRING(MAX_NAME_SIZE) are not known. Instead,
|
||||
pcre2_get_error_message() counts through to the one it wants - this isn't a
|
||||
performance issue because these strings are used only when there is an error.
|
||||
|
||||
Each substring ends with \0 to insert a null character. This includes the final
|
||||
substring, so that the whole string ends with \0\0, which can be detected when
|
||||
counting through. */
|
||||
|
||||
static const unsigned char compile_error_texts[] =
|
||||
"no error\0"
|
||||
"\\ at end of pattern\0"
|
||||
"\\c at end of pattern\0"
|
||||
"unrecognized character follows \\\0"
|
||||
"numbers out of order in {} quantifier\0"
|
||||
/* 5 */
|
||||
"number too big in {} quantifier\0"
|
||||
"missing terminating ] for character class\0"
|
||||
"escape sequence is invalid in character class\0"
|
||||
"range out of order in character class\0"
|
||||
"quantifier does not follow a repeatable item\0"
|
||||
/* 10 */
|
||||
"internal error: unexpected repeat\0"
|
||||
"unrecognized character after (? or (?-\0"
|
||||
"POSIX named classes are supported only within a class\0"
|
||||
"POSIX collating elements are not supported\0"
|
||||
"missing closing parenthesis\0"
|
||||
/* 15 */
|
||||
"reference to non-existent subpattern\0"
|
||||
"pattern passed as NULL with non-zero length\0"
|
||||
"unrecognised compile-time option bit(s)\0"
|
||||
"missing ) after (?# comment\0"
|
||||
"parentheses are too deeply nested\0"
|
||||
/* 20 */
|
||||
"regular expression is too large\0"
|
||||
"failed to allocate heap memory\0"
|
||||
"unmatched closing parenthesis\0"
|
||||
"internal error: code overflow\0"
|
||||
"missing closing parenthesis for condition\0"
|
||||
/* 25 */
|
||||
"length of lookbehind assertion is not limited\0"
|
||||
"a relative value of zero is not allowed\0"
|
||||
"conditional subpattern contains more than two branches\0"
|
||||
"atomic assertion expected after (?( or (?(?C)\0"
|
||||
"digit expected after (?+ or (?-\0"
|
||||
/* 30 */
|
||||
"unknown POSIX class name\0"
|
||||
"internal error in pcre2_study(): should not occur\0"
|
||||
"this version of PCRE2 does not have Unicode support\0"
|
||||
"parentheses are too deeply nested (stack check)\0"
|
||||
"character code point value in \\x{} or \\o{} is too large\0"
|
||||
/* 35 */
|
||||
"lookbehind is too complicated\0"
|
||||
"\\C is not allowed in a lookbehind assertion in UTF-" XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0"
|
||||
"PCRE2 does not support \\F, \\L, \\l, \\N{name}, \\U, or \\u\0"
|
||||
"number after (?C is greater than 255\0"
|
||||
"closing parenthesis for (?C expected\0"
|
||||
/* 40 */
|
||||
"invalid escape sequence in (*VERB) name\0"
|
||||
"unrecognized character after (?P\0"
|
||||
"syntax error in subpattern name (missing terminator?)\0"
|
||||
"two named subpatterns have the same name (PCRE2_DUPNAMES not set)\0"
|
||||
"subpattern name must start with a non-digit\0"
|
||||
/* 45 */
|
||||
"this version of PCRE2 does not have support for \\P, \\p, or \\X\0"
|
||||
"malformed \\P or \\p sequence\0"
|
||||
"unknown property after \\P or \\p\0"
|
||||
"subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " code units)\0"
|
||||
"too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
|
||||
/* 50 */
|
||||
"invalid range in character class\0"
|
||||
"octal value is greater than \\377 in 8-bit non-UTF-8 mode\0"
|
||||
"internal error: overran compiling workspace\0"
|
||||
"internal error: previously-checked referenced subpattern not found\0"
|
||||
"DEFINE subpattern contains more than one branch\0"
|
||||
/* 55 */
|
||||
"missing opening brace after \\o\0"
|
||||
"internal error: unknown newline setting\0"
|
||||
"\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
|
||||
"(?R (recursive pattern call) must be followed by a closing parenthesis\0"
|
||||
/* "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" */
|
||||
"obsolete error (should not occur)\0" /* Was the above */
|
||||
/* 60 */
|
||||
"(*VERB) not recognized or malformed\0"
|
||||
"subpattern number is too big\0"
|
||||
"subpattern name expected\0"
|
||||
"internal error: parsed pattern overflow\0"
|
||||
"non-octal character in \\o{} (closing brace missing?)\0"
|
||||
/* 65 */
|
||||
"different names for subpatterns of the same number are not allowed\0"
|
||||
"(*MARK) must have an argument\0"
|
||||
"non-hex character in \\x{} (closing brace missing?)\0"
|
||||
#ifndef EBCDIC
|
||||
"\\c must be followed by a printable ASCII character\0"
|
||||
#else
|
||||
"\\c must be followed by a letter or one of [\\]^_?\0"
|
||||
#endif
|
||||
"\\k is not followed by a braced, angle-bracketed, or quoted name\0"
|
||||
/* 70 */
|
||||
"internal error: unknown meta code in check_lookbehinds()\0"
|
||||
"\\N is not supported in a class\0"
|
||||
"callout string is too long\0"
|
||||
"disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
|
||||
"using UTF is disabled by the application\0"
|
||||
/* 75 */
|
||||
"using UCP is disabled by the application\0"
|
||||
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
|
||||
"character code point value in \\u.... sequence is too large\0"
|
||||
"digits missing after \\x or in \\x{} or \\o{} or \\N{U+}\0"
|
||||
"syntax error or number too big in (?(VERSION condition\0"
|
||||
/* 80 */
|
||||
"internal error: unknown opcode in auto_possessify()\0"
|
||||
"missing terminating delimiter for callout with string argument\0"
|
||||
"unrecognized string delimiter follows (?C\0"
|
||||
"using \\C is disabled by the application\0"
|
||||
"(?| and/or (?J: or (?x: parentheses are too deeply nested\0"
|
||||
/* 85 */
|
||||
"using \\C is disabled in this PCRE2 library\0"
|
||||
"regular expression is too complicated\0"
|
||||
"lookbehind assertion is too long\0"
|
||||
"pattern string is longer than the limit set by the application\0"
|
||||
"internal error: unknown code in parsed pattern\0"
|
||||
/* 90 */
|
||||
"internal error: bad code value in parsed_skip()\0"
|
||||
"PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0"
|
||||
"invalid option bits with PCRE2_LITERAL\0"
|
||||
"\\N{U+dddd} is supported only in Unicode (UTF) mode\0"
|
||||
"invalid hyphen in option setting\0"
|
||||
/* 95 */
|
||||
"(*alpha_assertion) not recognized\0"
|
||||
"script runs require Unicode support, which this version of PCRE2 does not have\0"
|
||||
"too many capturing groups (maximum 65535)\0"
|
||||
"octal digit missing after \\0 (PCRE2_EXTRA_NO_BS0 is set)\0"
|
||||
"\\K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)\0"
|
||||
/* 100 */
|
||||
"branch too long in variable-length lookbehind assertion\0"
|
||||
"compiled pattern would be longer than the limit set by the application\0"
|
||||
"octal value given by \\ddd is greater than \\377 (forbidden by PCRE2_EXTRA_PYTHON_OCTAL)\0"
|
||||
"using callouts is disabled by the application\0"
|
||||
"PCRE2_EXTRA_TURKISH_CASING require Unicode (UTF or UCP) mode\0"
|
||||
/* 105 */
|
||||
"PCRE2_EXTRA_TURKISH_CASING requires UTF in 8-bit mode\0"
|
||||
"PCRE2_EXTRA_TURKISH_CASING and PCRE2_EXTRA_CASELESS_RESTRICT are not compatible\0"
|
||||
"extended character class nesting is too deep\0"
|
||||
"invalid operator in extended character class\0"
|
||||
"unexpected operator in extended character class (no preceding operand)\0"
|
||||
/* 110 */
|
||||
"expected operand after operator in extended character class\0"
|
||||
"square brackets needed to clarify operator precedence in extended character class\0"
|
||||
"missing terminating ] for extended character class (note '[' must be escaped under PCRE2_ALT_EXTENDED_CLASS)\0"
|
||||
"unexpected expression in extended character class (no preceding operator)\0"
|
||||
"empty expression in extended character class\0"
|
||||
/* 115 */
|
||||
"terminating ] with no following closing parenthesis in (?[...]\0"
|
||||
"unexpected character in (?[...]) extended character class\0"
|
||||
;
|
||||
|
||||
/* Match-time and UTF error texts are in the same format. */
|
||||
|
||||
static const unsigned char match_error_texts[] =
|
||||
"no error\0"
|
||||
"no match\0"
|
||||
"partial match\0"
|
||||
"UTF-8 error: 1 byte missing at end\0"
|
||||
"UTF-8 error: 2 bytes missing at end\0"
|
||||
/* 5 */
|
||||
"UTF-8 error: 3 bytes missing at end\0"
|
||||
"UTF-8 error: 4 bytes missing at end\0"
|
||||
"UTF-8 error: 5 bytes missing at end\0"
|
||||
"UTF-8 error: byte 2 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 3 top bits not 0x80\0"
|
||||
/* 10 */
|
||||
"UTF-8 error: byte 4 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 5 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 6 top bits not 0x80\0"
|
||||
"UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"
|
||||
"UTF-8 error: 6-byte character is not allowed (RFC 3629)\0"
|
||||
/* 15 */
|
||||
"UTF-8 error: code points greater than 0x10ffff are not defined\0"
|
||||
"UTF-8 error: code points 0xd800-0xdfff are not defined\0"
|
||||
"UTF-8 error: overlong 2-byte sequence\0"
|
||||
"UTF-8 error: overlong 3-byte sequence\0"
|
||||
"UTF-8 error: overlong 4-byte sequence\0"
|
||||
/* 20 */
|
||||
"UTF-8 error: overlong 5-byte sequence\0"
|
||||
"UTF-8 error: overlong 6-byte sequence\0"
|
||||
"UTF-8 error: isolated byte with 0x80 bit set\0"
|
||||
"UTF-8 error: illegal byte (0xfe or 0xff)\0"
|
||||
"UTF-16 error: missing low surrogate at end\0"
|
||||
/* 25 */
|
||||
"UTF-16 error: invalid low surrogate\0"
|
||||
"UTF-16 error: isolated low surrogate\0"
|
||||
"UTF-32 error: code points 0xd800-0xdfff are not defined\0"
|
||||
"UTF-32 error: code points greater than 0x10ffff are not defined\0"
|
||||
"bad data value\0"
|
||||
/* 30 */
|
||||
"patterns do not all use the same character tables\0"
|
||||
"magic number missing\0"
|
||||
"pattern compiled in wrong mode: 8/16/32-bit error\0"
|
||||
"bad offset value\0"
|
||||
"bad option value\0"
|
||||
/* 35 */
|
||||
"invalid replacement string\0"
|
||||
"bad offset into UTF string\0"
|
||||
"callout error code\0" /* Never returned by PCRE2 itself */
|
||||
"invalid data in workspace for DFA restart\0"
|
||||
"too much recursion for DFA matching\0"
|
||||
/* 40 */
|
||||
"backreference condition or recursion test is not supported for DFA matching\0"
|
||||
"function is not supported for DFA matching\0"
|
||||
"pattern contains an item that is not supported for DFA matching\0"
|
||||
"workspace size exceeded in DFA matching\0"
|
||||
"internal error - pattern overwritten?\0"
|
||||
/* 45 */
|
||||
"bad JIT option\0"
|
||||
"JIT stack limit reached\0"
|
||||
"match limit exceeded\0"
|
||||
"no more memory\0"
|
||||
"unknown substring\0"
|
||||
/* 50 */
|
||||
"non-unique substring name\0"
|
||||
"NULL argument passed with non-zero length\0"
|
||||
"nested recursion at the same subject position\0"
|
||||
"matching depth limit exceeded\0"
|
||||
"requested value is not available\0"
|
||||
/* 55 */
|
||||
"requested value is not set\0"
|
||||
"offset limit set without PCRE2_USE_OFFSET_LIMIT\0"
|
||||
"bad escape sequence in replacement string\0"
|
||||
"expected closing curly bracket in replacement string\0"
|
||||
"bad substitution in replacement string\0"
|
||||
/* 60 */
|
||||
"match with end before start or start moved backwards is not supported\0"
|
||||
"too many replacements (more than INT_MAX)\0"
|
||||
"bad serialized data\0"
|
||||
"heap limit exceeded\0"
|
||||
"invalid syntax\0"
|
||||
/* 65 */
|
||||
"internal error - duplicate substitution match\0"
|
||||
"PCRE2_MATCH_INVALID_UTF is not supported for DFA matching\0"
|
||||
"INTERNAL ERROR: invalid substring offset\0"
|
||||
"feature is not supported by the JIT compiler\0"
|
||||
"error performing replacement case transformation\0"
|
||||
/* 70 */
|
||||
"replacement too large (longer than PCRE2_SIZE)\0"
|
||||
;
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return error message *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies an error message into a buffer whose units are of an
|
||||
appropriate width. Error numbers are positive for compile-time errors, and
|
||||
negative for match-time errors (except for UTF errors), but the numbers are all
|
||||
distinct.
|
||||
|
||||
Arguments:
|
||||
enumber error number
|
||||
buffer where to put the message (zero terminated)
|
||||
size size of the buffer in code units
|
||||
|
||||
Returns: length of message if all is well
|
||||
negative on error
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, PCRE2_SIZE size)
|
||||
{
|
||||
const unsigned char *message;
|
||||
PCRE2_SIZE i;
|
||||
int n;
|
||||
|
||||
if (size == 0) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
if (enumber >= COMPILE_ERROR_BASE) /* Compile error */
|
||||
{
|
||||
message = compile_error_texts;
|
||||
n = enumber - COMPILE_ERROR_BASE;
|
||||
}
|
||||
else if (enumber < 0) /* Match or UTF error */
|
||||
{
|
||||
message = match_error_texts;
|
||||
n = -enumber;
|
||||
}
|
||||
else /* Invalid error number */
|
||||
{
|
||||
message = (const unsigned char *)"\0"; /* Empty message list */
|
||||
n = 1;
|
||||
}
|
||||
|
||||
for (; n > 0; n--)
|
||||
{
|
||||
while (*message++ != CHAR_NUL) {};
|
||||
if (*message == CHAR_NUL) return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
|
||||
for (i = 0; *message != 0; i++)
|
||||
{
|
||||
if (i >= size - 1)
|
||||
{
|
||||
buffer[i] = 0; /* Terminate partial message */
|
||||
return PCRE2_ERROR_NOMEMORY;
|
||||
}
|
||||
buffer[i] = *message++;
|
||||
}
|
||||
|
||||
buffer[i] = 0;
|
||||
return (int)i;
|
||||
}
|
||||
|
||||
/* End of pcre2_error.c */
|
||||
162
3rd/pcre2/src/pcre2_extuni.c
Normal file
162
3rd/pcre2/src/pcre2_extuni.c
Normal file
@@ -0,0 +1,162 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains an internal function that is used to match a Unicode
|
||||
extended grapheme sequence. It is used by both pcre2_match() and
|
||||
pcre2_dfa_match(). However, it is called only when Unicode support is being
|
||||
compiled. Nevertheless, we provide a dummy function when there is no Unicode
|
||||
support, because some compilers do not like functionless source files. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/* Dummy function */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
PCRE2_SPTR
|
||||
PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
|
||||
PCRE2_SPTR end_subject, BOOL utf, int *xcount)
|
||||
{
|
||||
(void)c;
|
||||
(void)eptr;
|
||||
(void)start_subject;
|
||||
(void)end_subject;
|
||||
(void)utf;
|
||||
(void)xcount;
|
||||
return NULL;
|
||||
}
|
||||
#else
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match an extended grapheme sequence *
|
||||
*************************************************/
|
||||
|
||||
/* NOTE: The logic contained in this function is replicated in three special-
|
||||
purpose functions in the pcre2_jit_compile.c module. If the logic below is
|
||||
changed, they must be kept in step so that the interpreter and the JIT have the
|
||||
same behaviour.
|
||||
|
||||
Arguments:
|
||||
c the first character
|
||||
eptr pointer to next character
|
||||
start_subject pointer to start of subject
|
||||
end_subject pointer to end of subject
|
||||
utf TRUE if in UTF mode
|
||||
xcount pointer to count of additional characters,
|
||||
or NULL if count not needed
|
||||
|
||||
Returns: pointer after the end of the sequence
|
||||
*/
|
||||
|
||||
PCRE2_SPTR
|
||||
PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
|
||||
PCRE2_SPTR end_subject, BOOL utf, int *xcount)
|
||||
{
|
||||
BOOL was_ep_ZWJ = FALSE;
|
||||
int lgb = UCD_GRAPHBREAK(c);
|
||||
|
||||
while (eptr < end_subject)
|
||||
{
|
||||
int rgb;
|
||||
int len = 1;
|
||||
if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
|
||||
rgb = UCD_GRAPHBREAK(c);
|
||||
if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
|
||||
|
||||
/* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
|
||||
preceded by Extended Pictographic. */
|
||||
|
||||
if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
|
||||
break;
|
||||
|
||||
/* Not breaking between Regional Indicators is allowed only if there
|
||||
are an even number of preceding RIs. */
|
||||
|
||||
if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
|
||||
{
|
||||
int ricount = 0;
|
||||
PCRE2_SPTR bptr = eptr - 1;
|
||||
if (utf) BACKCHAR(bptr);
|
||||
|
||||
/* bptr is pointing to the left-hand character */
|
||||
|
||||
while (bptr > start_subject)
|
||||
{
|
||||
bptr--;
|
||||
if (utf)
|
||||
{
|
||||
BACKCHAR(bptr);
|
||||
GETCHAR(c, bptr);
|
||||
}
|
||||
else
|
||||
c = *bptr;
|
||||
if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
|
||||
ricount++;
|
||||
}
|
||||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
||||
}
|
||||
|
||||
/* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
|
||||
between; see next statement). */
|
||||
|
||||
was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
|
||||
|
||||
/* If Extend follows Extended_Pictographic, do not update lgb; this allows
|
||||
any number of them before a following ZWJ. */
|
||||
|
||||
if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) lgb = rgb;
|
||||
|
||||
eptr += len;
|
||||
if (xcount != NULL) *xcount += 1;
|
||||
}
|
||||
|
||||
return eptr;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* End of pcre2_extuni.c */
|
||||
220
3rd/pcre2/src/pcre2_find_bracket.c
Normal file
220
3rd/pcre2/src/pcre2_find_bracket.c
Normal file
@@ -0,0 +1,220 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains a single function that scans through a compiled pattern
|
||||
until it finds a capturing bracket with the given number, or, if the number is
|
||||
negative, an instance of OP_REVERSE or OP_VREVERSE for a lookbehind. The
|
||||
function is called from pcre2_compile.c and also from pcre2_study.c when
|
||||
finding the minimum matching length. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Scan compiled regex for specific bracket *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
code points to start of expression
|
||||
utf TRUE in UTF mode
|
||||
number the required bracket number or negative to find a lookbehind
|
||||
|
||||
Returns: pointer to the opcode for the bracket, or NULL if not found
|
||||
*/
|
||||
|
||||
PCRE2_SPTR
|
||||
PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
PCRE2_UCHAR c = *code;
|
||||
|
||||
if (c == OP_END) return NULL;
|
||||
|
||||
/* XCLASS is used for classes that cannot be represented just by a bit map.
|
||||
This includes negated single high-valued characters. ECLASS is used for
|
||||
classes that use set operations internally. CALLOUT_STR is used for
|
||||
callouts with string arguments. In each case the length in the table is
|
||||
zero; the actual length is stored in the compiled code. */
|
||||
|
||||
if (c == OP_XCLASS || c == OP_ECLASS) code += GET(code, 1);
|
||||
else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
|
||||
|
||||
/* Handle lookbehind */
|
||||
|
||||
else if (c == OP_REVERSE || c == OP_VREVERSE)
|
||||
{
|
||||
if (number < 0) return code;
|
||||
code += PRIV(OP_lengths)[c];
|
||||
}
|
||||
|
||||
/* Handle capturing bracket */
|
||||
|
||||
else if (c == OP_CBRA || c == OP_SCBRA ||
|
||||
c == OP_CBRAPOS || c == OP_SCBRAPOS)
|
||||
{
|
||||
int n = (int)GET2(code, 1+LINK_SIZE);
|
||||
if (n == number) return code;
|
||||
code += PRIV(OP_lengths)[c];
|
||||
}
|
||||
|
||||
/* Otherwise, we can get the item's length from the table, except that for
|
||||
repeated character types, we have to test for \p and \P, which have an extra
|
||||
two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
|
||||
must add in its length. */
|
||||
|
||||
else
|
||||
{
|
||||
switch(c)
|
||||
{
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEPOSQUERY:
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
|
||||
break;
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEPOSUPTO:
|
||||
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
|
||||
code += 2;
|
||||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
code += code[1];
|
||||
break;
|
||||
}
|
||||
|
||||
/* Add in the fixed length from the table */
|
||||
|
||||
code += PRIV(OP_lengths)[c];
|
||||
|
||||
/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
|
||||
followed by a multi-byte character. The length in the table is a minimum, so
|
||||
we have to arrange to skip the extra bytes. */
|
||||
|
||||
#ifdef MAYBE_UTF_MULTI
|
||||
if (utf) switch(c)
|
||||
{
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
case OP_NOT:
|
||||
case OP_NOTI:
|
||||
case OP_EXACT:
|
||||
case OP_EXACTI:
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTEXACTI:
|
||||
case OP_UPTO:
|
||||
case OP_UPTOI:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_MINUPTO:
|
||||
case OP_MINUPTOI:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_POSUPTO:
|
||||
case OP_POSUPTOI:
|
||||
case OP_NOTPOSUPTO:
|
||||
case OP_NOTPOSUPTOI:
|
||||
case OP_STAR:
|
||||
case OP_STARI:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTSTARI:
|
||||
case OP_MINSTAR:
|
||||
case OP_MINSTARI:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_POSSTAR:
|
||||
case OP_POSSTARI:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_PLUS:
|
||||
case OP_PLUSI:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_MINPLUS:
|
||||
case OP_MINPLUSI:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_POSPLUS:
|
||||
case OP_POSPLUSI:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_QUERY:
|
||||
case OP_QUERYI:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_MINQUERY:
|
||||
case OP_MINQUERYI:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_POSQUERY:
|
||||
case OP_POSQUERYI:
|
||||
case OP_NOTPOSQUERY:
|
||||
case OP_NOTPOSQUERYI:
|
||||
if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
|
||||
break;
|
||||
}
|
||||
#else
|
||||
(void)(utf); /* Keep compiler happy by referencing function argument */
|
||||
#endif /* MAYBE_UTF_MULTI */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_find_bracket.c */
|
||||
804
3rd/pcre2/src/pcre2_fuzzsupport.c
Normal file
804
3rd/pcre2/src/pcre2_fuzzsupport.c
Normal file
@@ -0,0 +1,804 @@
|
||||
/***************************************************************************
|
||||
Fuzzer driver for PCRE2. Given an arbitrary string of bytes and a length, it
|
||||
tries to compile and match it, deriving options from the string itself. If
|
||||
STANDALONE is defined, a main program that calls the driver with the contents
|
||||
of specified files is compiled, and commentary on what is happening is output.
|
||||
If an argument starts with '=' the rest of it it is taken as a literal string
|
||||
rather than a file name. This allows easy testing of short strings.
|
||||
|
||||
Written by Philip Hazel, October 2016
|
||||
Updated February 2024 (Addison Crump added 16-bit/32-bit and JIT support)
|
||||
Further updates March/April/May 2024 by PH
|
||||
***************************************************************************/
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
/* stack size adjustment */
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
|
||||
#define STACK_SIZE_MB 256
|
||||
#define JIT_SIZE_LIMIT (200 * 1024)
|
||||
|
||||
#ifndef PCRE2_CODE_UNIT_WIDTH
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
#endif
|
||||
|
||||
#include "config.h"
|
||||
#include "pcre2.h"
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#define MAX_MATCH_SIZE 1000
|
||||
|
||||
#define DFA_WORKSPACE_COUNT 100
|
||||
|
||||
/* When adding new compile or match options, remember to update the functions
|
||||
below that output them. */
|
||||
|
||||
#define ALLOWED_COMPILE_OPTIONS \
|
||||
(PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
|
||||
PCRE2_ALT_EXTENDED_CLASS|PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT| \
|
||||
PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
|
||||
PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED| \
|
||||
PCRE2_EXTENDED_MORE|PCRE2_FIRSTLINE| \
|
||||
PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
|
||||
PCRE2_NO_AUTO_CAPTURE| \
|
||||
PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
|
||||
PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
|
||||
PCRE2_UTF)
|
||||
|
||||
#define ALLOWED_MATCH_OPTIONS \
|
||||
(PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
|
||||
PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
|
||||
PCRE2_PARTIAL_SOFT)
|
||||
|
||||
#define BASE_MATCH_OPTIONS \
|
||||
(PCRE2_NO_JIT|PCRE2_DISABLE_RECURSELOOP_CHECK)
|
||||
|
||||
|
||||
#if defined(SUPPORT_DIFF_FUZZ) || defined(STANDALONE)
|
||||
static void print_compile_options(FILE *stream, uint32_t compile_options)
|
||||
{
|
||||
fprintf(stream, "Compile options %s%.8x =",
|
||||
(compile_options == PCRE2_NEVER_BACKSLASH_C)? "(base) " : "",
|
||||
compile_options);
|
||||
|
||||
fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
|
||||
((compile_options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
|
||||
((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
|
||||
((compile_options & PCRE2_ALT_EXTENDED_CLASS) != 0)? "alt_extended_class" : "",
|
||||
((compile_options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
|
||||
((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
|
||||
((compile_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
|
||||
((compile_options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
|
||||
((compile_options & PCRE2_CASELESS) != 0)? " caseless" : "",
|
||||
((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
|
||||
((compile_options & PCRE2_DOTALL) != 0)? " dotall" : "",
|
||||
((compile_options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
|
||||
((compile_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
|
||||
((compile_options & PCRE2_EXTENDED) != 0)? " extended" : "",
|
||||
((compile_options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
|
||||
((compile_options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
|
||||
((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
|
||||
((compile_options & PCRE2_MULTILINE) != 0)? " multiline" : "",
|
||||
((compile_options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
|
||||
((compile_options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
|
||||
((compile_options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
|
||||
((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
|
||||
((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
|
||||
((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
|
||||
((compile_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
|
||||
((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
|
||||
((compile_options & PCRE2_UCP) != 0)? " ucp" : "",
|
||||
((compile_options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
|
||||
((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
|
||||
((compile_options & PCRE2_UTF) != 0)? " utf" : "");
|
||||
}
|
||||
|
||||
static void print_match_options(FILE *stream, uint32_t match_options)
|
||||
{
|
||||
fprintf(stream, "Match options %s%.8x =",
|
||||
(match_options == BASE_MATCH_OPTIONS)? "(base) " : "", match_options);
|
||||
|
||||
fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s\n",
|
||||
((match_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
|
||||
((match_options & PCRE2_DISABLE_RECURSELOOP_CHECK) != 0)? " disable_recurseloop_check" : "",
|
||||
((match_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
|
||||
((match_options & PCRE2_NO_JIT) != 0)? " no_jit" : "",
|
||||
((match_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
|
||||
((match_options & PCRE2_NOTBOL) != 0)? " notbol" : "",
|
||||
((match_options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
|
||||
((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
|
||||
((match_options & PCRE2_NOTEOL) != 0)? " noteol" : "",
|
||||
((match_options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
|
||||
((match_options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
|
||||
}
|
||||
|
||||
|
||||
/* This function can print an error message at all code unit widths. */
|
||||
|
||||
static void print_error(FILE *f, int errorcode, const char *text, ...)
|
||||
{
|
||||
PCRE2_UCHAR buffer[256];
|
||||
PCRE2_UCHAR *p = buffer;
|
||||
va_list ap;
|
||||
va_start(ap, text);
|
||||
vfprintf(f, text, ap);
|
||||
va_end(ap);
|
||||
pcre2_get_error_message(errorcode, buffer, 256);
|
||||
while (*p != 0) fprintf(f, "%c", *p++);
|
||||
printf("\n");
|
||||
}
|
||||
#endif /* defined(SUPPORT_DIFF_FUZZ || defined(STANDALONE) */
|
||||
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
#ifdef SUPPORT_DIFF_FUZZ
|
||||
static void dump_matches(FILE *stream, int count, pcre2_match_data *match_data)
|
||||
{
|
||||
int errorcode;
|
||||
|
||||
for (int index = 0; index < count; index++)
|
||||
{
|
||||
PCRE2_UCHAR *bufferptr = NULL;
|
||||
PCRE2_SIZE bufflen = 0;
|
||||
|
||||
errorcode = pcre2_substring_get_bynumber(match_data, index, &bufferptr,
|
||||
&bufflen);
|
||||
|
||||
if (errorcode >= 0)
|
||||
{
|
||||
fprintf(stream, "Match %d (hex encoded): ", index);
|
||||
for (PCRE2_SIZE i = 0; i < bufflen; i++)
|
||||
{
|
||||
fprintf(stream, "%02x", bufferptr[i]);
|
||||
}
|
||||
fprintf(stream, "\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
print_error(stream, errorcode, "Match %d failed: ", index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* This function describes the current test case being evaluated, then aborts */
|
||||
|
||||
static void describe_failure(
|
||||
const char *task,
|
||||
const PCRE2_UCHAR *data,
|
||||
PCRE2_SIZE size,
|
||||
uint32_t compile_options,
|
||||
uint32_t match_options,
|
||||
int errorcode,
|
||||
int errorcode_jit,
|
||||
int matches,
|
||||
int matches_jit,
|
||||
pcre2_match_data *match_data,
|
||||
pcre2_match_data *match_data_jit
|
||||
) {
|
||||
|
||||
fprintf(stderr, "Encountered failure while performing %s; context:\n", task);
|
||||
|
||||
fprintf(stderr, "Pattern/sample string (hex encoded): ");
|
||||
for (size_t i = 0; i < size; i++)
|
||||
{
|
||||
fprintf(stderr, "%02x", data[i]);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
print_compile_options(stderr, compile_options);
|
||||
print_match_options(stderr, match_options);
|
||||
|
||||
if (errorcode < 0)
|
||||
{
|
||||
print_error(stderr, errorcode, "Non-JIT'd operation emitted an error: ");
|
||||
}
|
||||
|
||||
if (matches >= 0)
|
||||
{
|
||||
fprintf(stderr, "Non-JIT'd operation did not emit an error.\n");
|
||||
if (match_data != NULL)
|
||||
{
|
||||
fprintf(stderr, "%d matches discovered by non-JIT'd regex:\n", matches);
|
||||
dump_matches(stderr, matches, match_data);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (errorcode_jit < 0)
|
||||
{
|
||||
print_error(stderr, errorcode_jit, "JIT'd operation emitted error %d:",
|
||||
errorcode_jit);
|
||||
}
|
||||
|
||||
if (matches_jit >= 0)
|
||||
{
|
||||
fprintf(stderr, "JIT'd operation did not emit an error.\n");
|
||||
if (match_data_jit != NULL)
|
||||
{
|
||||
fprintf(stderr, "%d matches discovered by JIT'd regex:\n", matches_jit);
|
||||
dump_matches(stderr, matches_jit, match_data_jit);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
abort();
|
||||
}
|
||||
#endif /* SUPPORT_DIFF_FUZZ */
|
||||
#endif /* SUPPORT_JIT */
|
||||
|
||||
/* This is the callout function. Its only purpose is to halt matching if there
|
||||
are more than 100 callouts, as one way of stopping too much time being spent on
|
||||
fruitless matches. The callout data is a pointer to the counter. */
|
||||
|
||||
static int callout_function(pcre2_callout_block *cb, void *callout_data)
|
||||
{
|
||||
(void)cb; /* Avoid unused parameter warning */
|
||||
*((uint32_t *)callout_data) += 1;
|
||||
return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0;
|
||||
}
|
||||
|
||||
/* Putting in this apparently unnecessary prototype prevents gcc from giving a
|
||||
"no previous prototype" warning when compiling at high warning level. */
|
||||
|
||||
int LLVMFuzzerInitialize(int *, char ***);
|
||||
|
||||
int LLVMFuzzerTestOneInput(unsigned char *, size_t);
|
||||
|
||||
int LLVMFuzzerInitialize(int *argc, char ***argv)
|
||||
{
|
||||
int rc;
|
||||
struct rlimit rlim;
|
||||
getrlimit(RLIMIT_STACK, &rlim);
|
||||
rlim.rlim_cur = STACK_SIZE_MB * 1024 * 1024;
|
||||
if (rlim.rlim_cur > rlim.rlim_max)
|
||||
{
|
||||
fprintf(stderr, "Hard stack size limit is too small\n");
|
||||
_exit(1);
|
||||
}
|
||||
rc = setrlimit(RLIMIT_STACK, &rlim);
|
||||
if (rc != 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to expand stack size\n");
|
||||
_exit(1);
|
||||
}
|
||||
|
||||
(void)argc; /* Avoid "unused parameter" warnings */
|
||||
(void)argv;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Here's the driving function. */
|
||||
|
||||
int LLVMFuzzerTestOneInput(unsigned char *data, size_t size)
|
||||
{
|
||||
PCRE2_UCHAR *wdata;
|
||||
PCRE2_UCHAR *newwdata = NULL;
|
||||
uint32_t compile_options;
|
||||
uint32_t match_options;
|
||||
uint64_t random_options;
|
||||
pcre2_match_data *match_data = NULL;
|
||||
#ifdef SUPPORT_JIT
|
||||
pcre2_match_data *match_data_jit = NULL;
|
||||
#endif
|
||||
pcre2_compile_context *compile_context = NULL;
|
||||
pcre2_match_context *match_context = NULL;
|
||||
size_t match_size;
|
||||
int dfa_workspace[DFA_WORKSPACE_COUNT];
|
||||
|
||||
if (size < sizeof(random_options)) return -1;
|
||||
|
||||
random_options = *(uint64_t *)(data);
|
||||
data += sizeof(random_options);
|
||||
wdata = (PCRE2_UCHAR *)data;
|
||||
size -= sizeof(random_options);
|
||||
size /= PCRE2_CODE_UNIT_WIDTH / 8;
|
||||
|
||||
/* PCRE2 compiles quantified groups by replicating them. In certain cases of
|
||||
very large quantifiers this can lead to unacceptably long JIT compile times. To
|
||||
get around this, we scan the data string for large quantifiers that follow a
|
||||
closing parenthesis, and reduce the value of the quantifier to 10, assuming
|
||||
that this will make minimal difference to the detection of bugs.
|
||||
|
||||
Do the same for quantifiers that follow a closing square bracket, because
|
||||
classes that contain a number of non-ascii characters can take a lot of time
|
||||
when matching.
|
||||
|
||||
We have to make a copy of the input because oss-fuzz complains if we overwrite
|
||||
the original. Start the scan at the second character so there can be a
|
||||
lookbehind for a backslash, and end it before the end so that the next
|
||||
character can be checked for an opening brace. */
|
||||
|
||||
if (size > 3)
|
||||
{
|
||||
newwdata = malloc(size * sizeof(PCRE2_UCHAR));
|
||||
memcpy(newwdata, wdata, size * sizeof(PCRE2_UCHAR));
|
||||
wdata = newwdata;
|
||||
|
||||
for (size_t i = 1; i < size - 2; i++)
|
||||
{
|
||||
size_t j;
|
||||
|
||||
if ((wdata[i] != ')' && wdata[i] != ']') || wdata[i-1] == '\\' ||
|
||||
wdata[i+1] != '{')
|
||||
continue;
|
||||
i++; /* Points to '{' */
|
||||
|
||||
/* Loop for two values in a quantifier. Offset i points to brace or comma
|
||||
at the start of the loop. */
|
||||
|
||||
for (int ii = 0; ii < 2; ii++)
|
||||
{
|
||||
int q = 0;
|
||||
|
||||
if (i >= size - 1) goto END_QSCAN; /* Can happen for , */
|
||||
|
||||
/* Ignore leading spaces. */
|
||||
|
||||
while (wdata[i+1] == ' ' || wdata[i+1] == '\t')
|
||||
{
|
||||
i++;
|
||||
if (i >= size - 1) goto END_QSCAN;
|
||||
}
|
||||
|
||||
/* Ignore non-significant leading zeros. */
|
||||
|
||||
while (wdata[i+1] == '0' && i+2 < size && wdata[i+2] >= '0' &&
|
||||
wdata[i+2] <= '9')
|
||||
{
|
||||
i++;
|
||||
if (i >= size - 1) goto END_QSCAN;
|
||||
}
|
||||
|
||||
/* Scan for a number ending in brace, or comma in the first iteration,
|
||||
optionally preceded by space. */
|
||||
|
||||
for (j = i + 1; j < size && j < i + 7; j++)
|
||||
{
|
||||
if (wdata[j] == ' ' || wdata[j] == '\t')
|
||||
{
|
||||
j++;
|
||||
while (j < size && (wdata[j] == ' ' || wdata[j] == '\t')) j++;
|
||||
if (j >= size) goto OUTERLOOP;
|
||||
if (wdata[j] != '}' && wdata[j] != ',') goto OUTERLOOP;
|
||||
}
|
||||
if (wdata[j] == '}' || (ii == 0 && wdata[j] == ',')) break;
|
||||
|
||||
if (wdata[j] < '0' || wdata[j] > '9')
|
||||
{
|
||||
j--; /* Ensure this character is checked next. The */
|
||||
goto OUTERLOOP; /* string might be (e.g.) "){9){234}" */
|
||||
}
|
||||
q = q * 10 + (wdata[j] - '0');
|
||||
}
|
||||
|
||||
if (j >= size) goto END_QSCAN; /* End of data */
|
||||
|
||||
/* Hit ',' or '}' or read 6 digits. Six digits is a number > 65536 which
|
||||
is the maximum quantifier. Leave such numbers alone. */
|
||||
|
||||
if (j >= i + 7 || q > 65535) goto OUTERLOOP;
|
||||
|
||||
/* Limit the quantifier size to 10 */
|
||||
|
||||
if (q > 10)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
printf("Reduced quantifier value %d to 10.\n", q);
|
||||
#endif
|
||||
for (size_t k = i + 1; k < j; k++) wdata[k] = '0';
|
||||
wdata[j - 2] = '1';
|
||||
}
|
||||
|
||||
/* Advance to end of number and break if reached closing brace (continue
|
||||
after comma, which is only valid in the first time round this loop). */
|
||||
|
||||
i = j;
|
||||
if (wdata[i] == '}') break;
|
||||
}
|
||||
|
||||
/* Continue along the data string */
|
||||
|
||||
OUTERLOOP:
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
END_QSCAN:
|
||||
|
||||
/* Limiting the length of the subject for matching stops fruitless searches
|
||||
in large trees taking too much time. */
|
||||
|
||||
match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size;
|
||||
|
||||
/* Create a compile context, and set a limit on the size of the compiled
|
||||
pattern. This stops the fuzzer using vast amounts of memory. */
|
||||
|
||||
compile_context = pcre2_compile_context_create(NULL);
|
||||
if (compile_context == NULL)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
fprintf(stderr, "** Failed to create compile context block\n");
|
||||
#endif
|
||||
abort();
|
||||
}
|
||||
pcre2_set_max_pattern_compiled_length(compile_context, 10*1024*1024);
|
||||
|
||||
/* Ensure that all undefined option bits are zero (waste of time trying them)
|
||||
and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the
|
||||
input is valid UTF. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is
|
||||
no reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set
|
||||
because \C in random patterns is highly likely to cause a crash. */
|
||||
|
||||
compile_options = ((random_options >> 32) & ALLOWED_COMPILE_OPTIONS) |
|
||||
PCRE2_NEVER_BACKSLASH_C;
|
||||
match_options = (((uint32_t)random_options) & ALLOWED_MATCH_OPTIONS) |
|
||||
BASE_MATCH_OPTIONS;
|
||||
|
||||
/* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not
|
||||
allowed together and just give an immediate error return. */
|
||||
|
||||
if (((compile_options|match_options) & PCRE2_ENDANCHORED) != 0)
|
||||
match_options &= ~(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT);
|
||||
|
||||
/* Do the compile with and without the options, and after a successful compile,
|
||||
likewise do the match with and without the options. */
|
||||
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
uint32_t callout_count;
|
||||
int errorcode;
|
||||
#ifdef SUPPORT_JIT
|
||||
int errorcode_jit;
|
||||
#ifdef SUPPORT_DIFF_FUZZ
|
||||
int matches = 0;
|
||||
int matches_jit = 0;
|
||||
#endif
|
||||
#endif
|
||||
PCRE2_SIZE erroroffset;
|
||||
pcre2_code *code;
|
||||
|
||||
#ifdef STANDALONE
|
||||
printf("\n");
|
||||
print_compile_options(stdout, compile_options);
|
||||
#endif
|
||||
|
||||
code = pcre2_compile((PCRE2_SPTR)wdata, (PCRE2_SIZE)size, compile_options,
|
||||
&errorcode, &erroroffset, compile_context);
|
||||
|
||||
/* Compilation succeeded */
|
||||
|
||||
if (code != NULL)
|
||||
{
|
||||
int j;
|
||||
uint32_t save_match_options = match_options;
|
||||
|
||||
/* Call JIT compile only if the compiled pattern is not too big. */
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
int jit_ret = -1;
|
||||
if (((struct pcre2_real_code *)code)->blocksize <= JIT_SIZE_LIMIT)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
printf("Compile succeeded; calling JIT compile\n");
|
||||
#endif
|
||||
jit_ret = pcre2_jit_compile(code, PCRE2_JIT_COMPLETE);
|
||||
#ifdef STANDALONE
|
||||
if (jit_ret < 0) printf("JIT compile error %d\n", jit_ret);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
printf("Not calling JIT: compiled pattern is too long "
|
||||
"(%ld bytes; limit=%d)\n",
|
||||
((struct pcre2_real_code *)code)->blocksize, JIT_SIZE_LIMIT);
|
||||
#endif
|
||||
}
|
||||
#endif /* SUPPORT_JIT */
|
||||
|
||||
/* Create match data and context blocks only when we first need them. Set
|
||||
low match and depth limits to avoid wasting too much searching large
|
||||
pattern trees. Almost all matches are going to fail. */
|
||||
|
||||
if (match_data == NULL)
|
||||
{
|
||||
match_data = pcre2_match_data_create(32, NULL);
|
||||
#ifdef SUPPORT_JIT
|
||||
match_data_jit = pcre2_match_data_create(32, NULL);
|
||||
if (match_data == NULL || match_data_jit == NULL)
|
||||
#else
|
||||
if (match_data == NULL)
|
||||
#endif
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
fprintf(stderr, "** Failed to create match data block\n");
|
||||
#endif
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
if (match_context == NULL)
|
||||
{
|
||||
match_context = pcre2_match_context_create(NULL);
|
||||
if (match_context == NULL)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
fprintf(stderr, "** Failed to create match context block\n");
|
||||
#endif
|
||||
abort();
|
||||
}
|
||||
(void)pcre2_set_match_limit(match_context, 100);
|
||||
(void)pcre2_set_depth_limit(match_context, 100);
|
||||
(void)pcre2_set_callout(match_context, callout_function, &callout_count);
|
||||
}
|
||||
|
||||
/* Match twice, with and without options. */
|
||||
|
||||
#ifdef STANDALONE
|
||||
printf("\n");
|
||||
#endif
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
print_match_options(stdout, match_options);
|
||||
#endif
|
||||
|
||||
callout_count = 0;
|
||||
errorcode = pcre2_match(code, (PCRE2_SPTR)wdata, (PCRE2_SIZE)match_size, 0,
|
||||
match_options, match_data, match_context);
|
||||
|
||||
#ifdef STANDALONE
|
||||
if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
|
||||
print_error(stdout, errorcode, "Match failed: error %d: ", errorcode);
|
||||
#endif
|
||||
|
||||
/* If JIT is enabled, do a JIT match and, if appropriately compiled, compare
|
||||
with the interpreter. */
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
if (jit_ret >= 0)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
printf("Matching with JIT\n");
|
||||
#endif
|
||||
callout_count = 0;
|
||||
errorcode_jit = pcre2_match(code, (PCRE2_SPTR)wdata, (PCRE2_SIZE)match_size, 0,
|
||||
match_options & ~PCRE2_NO_JIT, match_data_jit, match_context);
|
||||
|
||||
#ifdef STANDALONE
|
||||
if (errorcode_jit >= 0)
|
||||
printf("Match returned %d\n", errorcode_jit);
|
||||
else
|
||||
print_error(stdout, errorcode_jit, "JIT match failed: error %d: ",
|
||||
errorcode_jit);
|
||||
#else
|
||||
(void)errorcode_jit; /* Avoid compiler warning */
|
||||
#endif /* STANDALONE */
|
||||
|
||||
/* With differential matching enabled, compare with interpreter. */
|
||||
|
||||
#ifdef SUPPORT_DIFF_FUZZ
|
||||
matches = errorcode;
|
||||
matches_jit = errorcode_jit;
|
||||
|
||||
if (errorcode_jit != errorcode)
|
||||
{
|
||||
if (!(errorcode < 0 && errorcode_jit < 0) &&
|
||||
errorcode != PCRE2_ERROR_MATCHLIMIT && errorcode != PCRE2_ERROR_CALLOUT &&
|
||||
errorcode_jit != PCRE2_ERROR_MATCHLIMIT && errorcode_jit != PCRE2_ERROR_JIT_STACKLIMIT && errorcode_jit != PCRE2_ERROR_CALLOUT)
|
||||
{
|
||||
describe_failure("match errorcode comparison", wdata, size, compile_options, match_options, errorcode, errorcode_jit, matches, matches_jit, match_data, match_data_jit);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int index = 0; index < errorcode; index++)
|
||||
{
|
||||
PCRE2_UCHAR *bufferptr, *bufferptr_jit;
|
||||
PCRE2_SIZE bufflen, bufflen_jit;
|
||||
|
||||
bufferptr = bufferptr_jit = NULL;
|
||||
bufflen = bufflen_jit = 0;
|
||||
|
||||
errorcode = pcre2_substring_get_bynumber(match_data, (uint32_t) index, &bufferptr, &bufflen);
|
||||
errorcode_jit = pcre2_substring_get_bynumber(match_data_jit, (uint32_t) index, &bufferptr_jit, &bufflen_jit);
|
||||
|
||||
if (errorcode != errorcode_jit)
|
||||
{
|
||||
describe_failure("match entry errorcode comparison", wdata, size,
|
||||
compile_options, match_options, errorcode, errorcode_jit,
|
||||
matches, matches_jit, match_data, match_data_jit);
|
||||
}
|
||||
|
||||
if (errorcode >= 0)
|
||||
{
|
||||
if (bufflen != bufflen_jit)
|
||||
{
|
||||
describe_failure("match entry length comparison", wdata, size,
|
||||
compile_options, match_options, errorcode, errorcode_jit,
|
||||
matches, matches_jit, match_data, match_data_jit);
|
||||
}
|
||||
|
||||
if (memcmp(bufferptr, bufferptr_jit, bufflen) != 0)
|
||||
{
|
||||
describe_failure("match entry content comparison", wdata, size,
|
||||
compile_options, match_options, errorcode, errorcode_jit,
|
||||
matches, matches_jit, match_data, match_data_jit);
|
||||
}
|
||||
}
|
||||
|
||||
pcre2_substring_free(bufferptr);
|
||||
pcre2_substring_free(bufferptr_jit);
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_DIFF_FUZZ */
|
||||
}
|
||||
#endif /* SUPPORT_JIT */
|
||||
|
||||
if (match_options == BASE_MATCH_OPTIONS) break; /* Don't do same twice */
|
||||
match_options = BASE_MATCH_OPTIONS; /* For second time */
|
||||
}
|
||||
|
||||
/* Match with DFA twice, with and without options, but remove options that
|
||||
are not allowed with DFA. */
|
||||
|
||||
match_options = save_match_options & ~BASE_MATCH_OPTIONS;
|
||||
|
||||
#ifdef STANDALONE
|
||||
printf("\n");
|
||||
#endif
|
||||
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
printf("DFA match options %.8x =", match_options);
|
||||
printf("%s%s%s%s%s%s%s%s%s\n",
|
||||
((match_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
|
||||
((match_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
|
||||
((match_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
|
||||
((match_options & PCRE2_NOTBOL) != 0)? " notbol" : "",
|
||||
((match_options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
|
||||
((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
|
||||
((match_options & PCRE2_NOTEOL) != 0)? " noteol" : "",
|
||||
((match_options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
|
||||
((match_options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
|
||||
#endif
|
||||
|
||||
callout_count = 0;
|
||||
errorcode = pcre2_dfa_match(code, (PCRE2_SPTR)wdata,
|
||||
(PCRE2_SIZE)match_size, 0, match_options, match_data,
|
||||
match_context, dfa_workspace, DFA_WORKSPACE_COUNT);
|
||||
|
||||
#ifdef STANDALONE
|
||||
if (errorcode >= 0)
|
||||
printf("Match returned %d\n", errorcode);
|
||||
else
|
||||
print_error(stdout, errorcode, "DFA match failed: error %d: ", errorcode);
|
||||
#endif
|
||||
|
||||
if (match_options == 0) break; /* No point doing same twice */
|
||||
match_options = 0; /* For second time */
|
||||
}
|
||||
|
||||
match_options = save_match_options; /* Reset for the second compile */
|
||||
pcre2_code_free(code);
|
||||
}
|
||||
|
||||
/* Compilation failed */
|
||||
|
||||
else
|
||||
{
|
||||
#ifdef STANDALONE
|
||||
print_error(stdout, errorcode, "Error %d at offset %lu: ", errorcode,
|
||||
erroroffset);
|
||||
#else
|
||||
if (errorcode == PCRE2_ERROR_INTERNAL) abort();
|
||||
#endif
|
||||
}
|
||||
|
||||
if (compile_options == PCRE2_NEVER_BACKSLASH_C) break; /* Avoid same twice */
|
||||
compile_options = PCRE2_NEVER_BACKSLASH_C; /* For second time */
|
||||
}
|
||||
|
||||
/* Tidy up before exiting */
|
||||
|
||||
if (match_data != NULL) pcre2_match_data_free(match_data);
|
||||
#ifdef SUPPORT_JIT
|
||||
if (match_data_jit != NULL) pcre2_match_data_free(match_data_jit);
|
||||
#endif
|
||||
free(newwdata);
|
||||
if (match_context != NULL) pcre2_match_context_free(match_context);
|
||||
if (compile_context != NULL) pcre2_compile_context_free(compile_context);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Optional main program. */
|
||||
|
||||
#ifdef STANDALONE
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
LLVMFuzzerInitialize(&argc, &argv);
|
||||
|
||||
if (argc < 2)
|
||||
{
|
||||
printf("** No arguments given\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (int i = 1; i < argc; i++)
|
||||
{
|
||||
size_t filelen;
|
||||
size_t readsize;
|
||||
unsigned char *buffer;
|
||||
FILE *f;
|
||||
|
||||
/* Handle a literal string. Copy to an exact size buffer so that checks for
|
||||
overrunning work. */
|
||||
|
||||
if (argv[i][0] == '=')
|
||||
{
|
||||
readsize = strlen(argv[i]) - 1;
|
||||
printf("------ <Literal> ------\n");
|
||||
printf("Length = %lu\n", readsize);
|
||||
printf("%.*s\n", (int)readsize, argv[i]+1);
|
||||
buffer = (unsigned char *)malloc(readsize);
|
||||
if (buffer == NULL)
|
||||
printf("** Failed to allocate %lu bytes of memory\n", readsize);
|
||||
else
|
||||
{
|
||||
memcpy(buffer, argv[i]+1, readsize);
|
||||
LLVMFuzzerTestOneInput(buffer, readsize);
|
||||
free(buffer);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle a string given in a file */
|
||||
|
||||
f = fopen(argv[i], "rb");
|
||||
if (f == NULL)
|
||||
{
|
||||
printf("** Failed to open %s: %s\n", argv[i], strerror(errno));
|
||||
continue;
|
||||
}
|
||||
|
||||
printf("------ %s ------\n", argv[i]);
|
||||
|
||||
fseek(f, 0, SEEK_END);
|
||||
filelen = ftell(f);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
|
||||
buffer = (unsigned char *)malloc(filelen);
|
||||
if (buffer == NULL)
|
||||
{
|
||||
printf("** Failed to allocate %lu bytes of memory\n", filelen);
|
||||
fclose(f);
|
||||
continue;
|
||||
}
|
||||
|
||||
readsize = fread(buffer, 1, filelen, f);
|
||||
fclose(f);
|
||||
|
||||
if (readsize != filelen)
|
||||
printf("** File size is %lu but fread() returned %lu\n", filelen, readsize);
|
||||
else
|
||||
{
|
||||
printf("Length = %lu\n", filelen);
|
||||
LLVMFuzzerTestOneInput(buffer, filelen);
|
||||
}
|
||||
free(buffer);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* STANDALONE */
|
||||
|
||||
/* End */
|
||||
2235
3rd/pcre2/src/pcre2_internal.h
Normal file
2235
3rd/pcre2/src/pcre2_internal.h
Normal file
@@ -0,0 +1,2235 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE2 is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef PCRE2_INTERNAL_H_IDEMPOTENT_GUARD
|
||||
#define PCRE2_INTERNAL_H_IDEMPOTENT_GUARD
|
||||
|
||||
/* We do not support both EBCDIC and Unicode at the same time. The "configure"
|
||||
script prevents both being selected, but not everybody uses "configure". EBCDIC
|
||||
is only supported for the 8-bit library, but the check for this has to be later
|
||||
in this file, because the first part is not width-dependent, and is included by
|
||||
pcre2test.c with CODE_UNIT_WIDTH == 0. */
|
||||
|
||||
#if defined EBCDIC && defined SUPPORT_UNICODE
|
||||
#error The use of both EBCDIC and SUPPORT_UNICODE is not supported.
|
||||
#endif
|
||||
|
||||
/* When compiling one of the libraries, the value of PCRE2_CODE_UNIT_WIDTH must
|
||||
be 8, 16, or 32. AutoTools and CMake ensure that this is always the case, but
|
||||
other other building methods may not, so here is a check. It is cut out when
|
||||
building pcre2test, bcause that sets the value to zero. No other source should
|
||||
be including this file. There is no explicit way of forcing a compile to be
|
||||
abandoned, but trying to include a non-existent file seems cleanest. Otherwise
|
||||
there will be many irrelevant consequential errors. */
|
||||
|
||||
#if (!defined PCRE2_BUILDING_PCRE2TEST && !defined PCRE2_DFTABLES) && \
|
||||
(!defined PCRE2_CODE_UNIT_WIDTH || \
|
||||
(PCRE2_CODE_UNIT_WIDTH != 8 && \
|
||||
PCRE2_CODE_UNIT_WIDTH != 16 && \
|
||||
PCRE2_CODE_UNIT_WIDTH != 32))
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be defined as 8, 16, or 32.
|
||||
#include <AbandonCompile>
|
||||
#endif
|
||||
|
||||
|
||||
/* Standard C headers */
|
||||
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/* Macros to make boolean values more obvious. The #ifndef is to pacify
|
||||
compiler warnings in environments where these macros are defined elsewhere.
|
||||
Unfortunately, there is no way to do the same for the typedef. */
|
||||
|
||||
typedef int BOOL;
|
||||
#ifndef FALSE
|
||||
#define FALSE 0
|
||||
#define TRUE 1
|
||||
#endif
|
||||
|
||||
/* Helper macro for static (compile-time) assertions. Can be used inside
|
||||
functions, or at the top-level of a file. */
|
||||
#define STATIC_ASSERT_JOIN(a,b) a ## b
|
||||
#define STATIC_ASSERT(cond, msg) \
|
||||
typedef int STATIC_ASSERT_JOIN(static_assertion_,msg)[(cond)?1:-1]
|
||||
|
||||
/* Valgrind (memcheck) support */
|
||||
|
||||
#ifdef SUPPORT_VALGRIND
|
||||
#include <valgrind/memcheck.h>
|
||||
#endif
|
||||
|
||||
/* -ftrivial-auto-var-init support supports initializing all local variables
|
||||
to avoid some classes of bug, but this can cause an unacceptable slowdown
|
||||
for large on-stack arrays in hot functions. This macro lets us annotate
|
||||
such arrays. */
|
||||
|
||||
#ifdef HAVE_ATTRIBUTE_UNINITIALIZED
|
||||
#define PCRE2_KEEP_UNINITIALIZED __attribute__((uninitialized))
|
||||
#else
|
||||
#define PCRE2_KEEP_UNINITIALIZED
|
||||
#endif
|
||||
|
||||
/* Older versions of MSVC lack snprintf(). This define allows for
|
||||
warning/error-free compilation and testing with MSVC compilers back to at least
|
||||
MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
|
||||
/* When compiling a DLL for Windows, the exported symbols have to be declared
|
||||
using some MS magic. I found some useful information on this web page:
|
||||
http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the
|
||||
information there, using __declspec(dllexport) without "extern" we have a
|
||||
definition; with "extern" we have a declaration. The settings here override the
|
||||
setting in pcre2.h (which is included below); it defines only PCRE2_EXP_DECL,
|
||||
which is all that is needed for applications (they just import the symbols). We
|
||||
use:
|
||||
|
||||
PCRE2_EXP_DECL for declarations
|
||||
PCRE2_EXP_DEFN for definitions
|
||||
|
||||
The reason for wrapping this in #ifndef PCRE2_EXP_DECL is so that pcre2test,
|
||||
which is an application, but needs to import this file in order to "peek" at
|
||||
internals, can #include pcre2.h first to get an application's-eye view.
|
||||
|
||||
In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon,
|
||||
special-purpose environments) might want to stick other stuff in front of
|
||||
exported symbols. That's why, in the non-Windows case, we set PCRE2_EXP_DEFN
|
||||
only if it is not already set. */
|
||||
|
||||
#ifndef PCRE2_EXP_DECL
|
||||
# ifdef _WIN32
|
||||
# ifndef PCRE2_STATIC
|
||||
# define PCRE2_EXP_DECL extern __declspec(dllexport)
|
||||
# define PCRE2_EXP_DEFN __declspec(dllexport)
|
||||
# else
|
||||
# define PCRE2_EXP_DECL extern PCRE2_EXPORT
|
||||
# define PCRE2_EXP_DEFN
|
||||
# endif
|
||||
# else
|
||||
# ifdef __cplusplus
|
||||
# define PCRE2_EXP_DECL extern "C" PCRE2_EXPORT
|
||||
# else
|
||||
# define PCRE2_EXP_DECL extern PCRE2_EXPORT
|
||||
# endif
|
||||
# ifndef PCRE2_EXP_DEFN
|
||||
# define PCRE2_EXP_DEFN PCRE2_EXP_DECL
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Include the public PCRE2 header and the definitions of UCP character
|
||||
property values. This must follow the setting of PCRE2_EXP_DECL above. */
|
||||
|
||||
#include "pcre2.h"
|
||||
#include "pcre2_ucp.h"
|
||||
|
||||
/* When PCRE2 is compiled as a C++ library, the subject pointer can be replaced
|
||||
with a custom type. This makes it possible, for example, to allow pcre2_match()
|
||||
to process subject strings that are discontinuous by using a smart pointer
|
||||
class. It must always be possible to inspect all of the subject string in
|
||||
pcre2_match() because of the way it backtracks. */
|
||||
|
||||
/* WARNING: This is as yet untested for PCRE2. */
|
||||
|
||||
#ifdef CUSTOM_SUBJECT_PTR
|
||||
#undef PCRE2_SPTR
|
||||
#define PCRE2_SPTR CUSTOM_SUBJECT_PTR
|
||||
#endif
|
||||
|
||||
/* When checking for integer overflow, we need to handle large integers.
|
||||
If a 64-bit integer type is available, we can use that.
|
||||
Otherwise we have to cast to double, which of course requires floating point
|
||||
arithmetic. Handle this by defining a macro for the appropriate type. */
|
||||
|
||||
#if defined INT64_MAX || defined int64_t
|
||||
#define INT64_OR_DOUBLE int64_t
|
||||
#else
|
||||
#define INT64_OR_DOUBLE double
|
||||
#endif
|
||||
|
||||
/* External (in the C sense) functions and tables that are private to the
|
||||
libraries are always referenced using the PRIV macro. This makes it possible
|
||||
for pcre2test.c to include some of the source files from the libraries using a
|
||||
different PRIV definition to avoid name clashes. It also makes it clear in the
|
||||
code that a non-static object is being referenced. */
|
||||
|
||||
#ifndef PRIV
|
||||
#define PRIV(name) _pcre2_##name
|
||||
#endif
|
||||
|
||||
/* When compiling for use with the Virtual Pascal compiler, these functions
|
||||
need to have their names changed. PCRE2 must be compiled with the -DVPCOMPAT
|
||||
option on the command line. */
|
||||
|
||||
#ifdef VPCOMPAT
|
||||
#define strlen(s) _strlen(s)
|
||||
#define strncmp(s1,s2,m) _strncmp(s1,s2,m)
|
||||
#define memcmp(s,c,n) _memcmp(s,c,n)
|
||||
#define memcpy(d,s,n) _memcpy(d,s,n)
|
||||
#define memmove(d,s,n) _memmove(d,s,n)
|
||||
#define memset(s,c,n) _memset(s,c,n)
|
||||
#else /* VPCOMPAT */
|
||||
|
||||
/* Otherwise, to cope with SunOS4 and other systems that lack memmove(), define
|
||||
a macro that calls an emulating function. */
|
||||
|
||||
#ifndef HAVE_MEMMOVE
|
||||
#undef memmove /* Some systems may have a macro */
|
||||
#define memmove(a, b, c) PRIV(memmove)(a, b, c)
|
||||
#endif /* not HAVE_MEMMOVE */
|
||||
#endif /* not VPCOMPAT */
|
||||
|
||||
/* This is an unsigned int value that no UTF character can ever have, as
|
||||
Unicode doesn't go beyond 0x0010ffff. */
|
||||
|
||||
#define NOTACHAR 0xffffffff
|
||||
|
||||
/* This is the largest valid UTF/Unicode code point. */
|
||||
|
||||
#define MAX_UTF_CODE_POINT 0x10ffff
|
||||
|
||||
/* Compile-time positive error numbers (all except UTF errors, which are
|
||||
negative) start at this value. It should probably never be changed, in case
|
||||
some application is checking for specific numbers. There is a copy of this
|
||||
#define in pcre2posix.c (which now no longer includes this file). Ideally, a
|
||||
way of having a single definition should be found, but as the number is
|
||||
unlikely to change, this is not a pressing issue. The original reason for
|
||||
having a base other than 0 was to keep the absolute values of compile-time and
|
||||
run-time error numbers numerically different, but in the event the code does
|
||||
not rely on this. */
|
||||
|
||||
#define COMPILE_ERROR_BASE 100
|
||||
|
||||
/* The initial frames vector for remembering pcre2_match() backtracking points
|
||||
is allocated on the heap, of this size (bytes) or ten times the frame size if
|
||||
larger, unless the heap limit is smaller. Typical frame sizes are a few hundred
|
||||
bytes (it depends on the number of capturing parentheses) so 20KiB handles
|
||||
quite a few frames. A larger vector on the heap is obtained for matches that
|
||||
need more frames, subject to the heap limit. */
|
||||
|
||||
#define START_FRAMES_SIZE 20480
|
||||
|
||||
/* For DFA matching, an initial internal workspace vector is allocated on the
|
||||
stack. The heap is used only if this turns out to be too small. */
|
||||
|
||||
#define DFA_START_RWS_SIZE 30720
|
||||
|
||||
/* Define the default BSR convention. */
|
||||
|
||||
#ifdef BSR_ANYCRLF
|
||||
#define BSR_DEFAULT PCRE2_BSR_ANYCRLF
|
||||
#else
|
||||
#define BSR_DEFAULT PCRE2_BSR_UNICODE
|
||||
#endif
|
||||
|
||||
|
||||
/* ---------------- Basic UTF-8 macros ---------------- */
|
||||
|
||||
/* These UTF-8 macros are always defined because they are used in pcre2test for
|
||||
handling wide characters in 16-bit and 32-bit modes, even if an 8-bit library
|
||||
is not supported. */
|
||||
|
||||
/* Tests whether a UTF-8 code point needs extra bytes to decode. */
|
||||
|
||||
#define HASUTF8EXTRALEN(c) ((c) >= 0xc0)
|
||||
|
||||
/* The following macros were originally written in the form of loops that used
|
||||
data from the tables whose names start with PRIV(utf8_table). They were
|
||||
rewritten by a user so as not to use loops, because in some environments this
|
||||
gives a significant performance advantage, and it seems never to do any harm.
|
||||
*/
|
||||
|
||||
/* Base macro to pick up the remaining bytes of a UTF-8 character, not
|
||||
advancing the pointer. */
|
||||
|
||||
#define GETUTF8(c, eptr) \
|
||||
{ \
|
||||
if ((c & 0x20u) == 0) \
|
||||
c = ((c & 0x1fu) << 6) | (eptr[1] & 0x3fu); \
|
||||
else if ((c & 0x10u) == 0) \
|
||||
c = ((c & 0x0fu) << 12) | ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
|
||||
else if ((c & 0x08u) == 0) \
|
||||
c = ((c & 0x07u) << 18) | ((eptr[1] & 0x3fu) << 12) | \
|
||||
((eptr[2] & 0x3fu) << 6) | (eptr[3] & 0x3fu); \
|
||||
else if ((c & 0x04u) == 0) \
|
||||
c = ((c & 0x03u) << 24) | ((eptr[1] & 0x3fu) << 18) | \
|
||||
((eptr[2] & 0x3fu) << 12) | ((eptr[3] & 0x3fu) << 6) | \
|
||||
(eptr[4] & 0x3fu); \
|
||||
else \
|
||||
c = ((c & 0x01u) << 30) | ((eptr[1] & 0x3fu) << 24) | \
|
||||
((eptr[2] & 0x3fu) << 18) | ((eptr[3] & 0x3fu) << 12) | \
|
||||
((eptr[4] & 0x3fu) << 6) | (eptr[5] & 0x3fu); \
|
||||
}
|
||||
|
||||
/* Base macro to pick up the remaining bytes of a UTF-8 character, advancing
|
||||
the pointer. */
|
||||
|
||||
#define GETUTF8INC(c, eptr) \
|
||||
{ \
|
||||
if ((c & 0x20u) == 0) \
|
||||
c = ((c & 0x1fu) << 6) | (*eptr++ & 0x3fu); \
|
||||
else if ((c & 0x10u) == 0) \
|
||||
{ \
|
||||
c = ((c & 0x0fu) << 12) | ((*eptr & 0x3fu) << 6) | (eptr[1] & 0x3fu); \
|
||||
eptr += 2; \
|
||||
} \
|
||||
else if ((c & 0x08u) == 0) \
|
||||
{ \
|
||||
c = ((c & 0x07u) << 18) | ((*eptr & 0x3fu) << 12) | \
|
||||
((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
|
||||
eptr += 3; \
|
||||
} \
|
||||
else if ((c & 0x04u) == 0) \
|
||||
{ \
|
||||
c = ((c & 0x03u) << 24) | ((*eptr & 0x3fu) << 18) | \
|
||||
((eptr[1] & 0x3fu) << 12) | ((eptr[2] & 0x3fu) << 6) | \
|
||||
(eptr[3] & 0x3fu); \
|
||||
eptr += 4; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
c = ((c & 0x01u) << 30) | ((*eptr & 0x3fu) << 24) | \
|
||||
((eptr[1] & 0x3fu) << 18) | ((eptr[2] & 0x3fu) << 12) | \
|
||||
((eptr[3] & 0x3fu) << 6) | (eptr[4] & 0x3fu); \
|
||||
eptr += 5; \
|
||||
} \
|
||||
}
|
||||
|
||||
/* Base macro to pick up the remaining bytes of a UTF-8 character, not
|
||||
advancing the pointer, incrementing the length. */
|
||||
|
||||
#define GETUTF8LEN(c, eptr, len) \
|
||||
{ \
|
||||
if ((c & 0x20u) == 0) \
|
||||
{ \
|
||||
c = ((c & 0x1fu) << 6) | (eptr[1] & 0x3fu); \
|
||||
len++; \
|
||||
} \
|
||||
else if ((c & 0x10u) == 0) \
|
||||
{ \
|
||||
c = ((c & 0x0fu) << 12) | ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
|
||||
len += 2; \
|
||||
} \
|
||||
else if ((c & 0x08u) == 0) \
|
||||
{\
|
||||
c = ((c & 0x07u) << 18) | ((eptr[1] & 0x3fu) << 12) | \
|
||||
((eptr[2] & 0x3fu) << 6) | (eptr[3] & 0x3fu); \
|
||||
len += 3; \
|
||||
} \
|
||||
else if ((c & 0x04u) == 0) \
|
||||
{ \
|
||||
c = ((c & 0x03u) << 24) | ((eptr[1] & 0x3fu) << 18) | \
|
||||
((eptr[2] & 0x3fu) << 12) | ((eptr[3] & 0x3fu) << 6) | \
|
||||
(eptr[4] & 0x3fu); \
|
||||
len += 4; \
|
||||
} \
|
||||
else \
|
||||
{\
|
||||
c = ((c & 0x01u) << 30) | ((eptr[1] & 0x3fu) << 24) | \
|
||||
((eptr[2] & 0x3fu) << 18) | ((eptr[3] & 0x3fu) << 12) | \
|
||||
((eptr[4] & 0x3fu) << 6) | (eptr[5] & 0x3fu); \
|
||||
len += 5; \
|
||||
} \
|
||||
}
|
||||
|
||||
/* --------------- Whitespace macros ---------------- */
|
||||
|
||||
/* Tests for Unicode horizontal and vertical whitespace characters must check a
|
||||
number of different values. Using a switch statement for this generates the
|
||||
fastest code (no loop, no memory access), and there are several places in the
|
||||
interpreter code where this happens. In order to ensure that all the case lists
|
||||
remain in step, we use macros so that there is only one place where the lists
|
||||
are defined.
|
||||
|
||||
These values are also required as lists in pcre2_compile.c when processing \h,
|
||||
\H, \v and \V in a character class. The lists are defined in pcre2_tables.c,
|
||||
but macros that define the values are here so that all the definitions are
|
||||
together. The lists must be in ascending character order, terminated by
|
||||
NOTACHAR (which is 0xffffffff).
|
||||
|
||||
Any changes should ensure that the various macros are kept in step with each
|
||||
other. NOTE: The values also appear in pcre2_jit_compile.c. */
|
||||
|
||||
/* -------------- ASCII/Unicode environments -------------- */
|
||||
|
||||
#ifndef EBCDIC
|
||||
|
||||
/* Character U+180E (Mongolian Vowel Separator) is not included in the list of
|
||||
spaces in the Unicode file PropList.txt, and Perl does not recognize it as a
|
||||
space. However, in many other sources it is listed as a space and has been in
|
||||
PCRE (both APIs) for a long time. */
|
||||
|
||||
#define HSPACE_LIST \
|
||||
CHAR_HT, CHAR_SPACE, CHAR_NBSP, \
|
||||
0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \
|
||||
0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \
|
||||
NOTACHAR
|
||||
|
||||
#define HSPACE_MULTIBYTE_CASES \
|
||||
case 0x1680: /* OGHAM SPACE MARK */ \
|
||||
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ \
|
||||
case 0x2000: /* EN QUAD */ \
|
||||
case 0x2001: /* EM QUAD */ \
|
||||
case 0x2002: /* EN SPACE */ \
|
||||
case 0x2003: /* EM SPACE */ \
|
||||
case 0x2004: /* THREE-PER-EM SPACE */ \
|
||||
case 0x2005: /* FOUR-PER-EM SPACE */ \
|
||||
case 0x2006: /* SIX-PER-EM SPACE */ \
|
||||
case 0x2007: /* FIGURE SPACE */ \
|
||||
case 0x2008: /* PUNCTUATION SPACE */ \
|
||||
case 0x2009: /* THIN SPACE */ \
|
||||
case 0x200A: /* HAIR SPACE */ \
|
||||
case 0x202f: /* NARROW NO-BREAK SPACE */ \
|
||||
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ \
|
||||
case 0x3000 /* IDEOGRAPHIC SPACE */
|
||||
|
||||
#define HSPACE_BYTE_CASES \
|
||||
case CHAR_HT: \
|
||||
case CHAR_SPACE: \
|
||||
case CHAR_NBSP
|
||||
|
||||
#define HSPACE_CASES \
|
||||
HSPACE_BYTE_CASES: \
|
||||
HSPACE_MULTIBYTE_CASES
|
||||
|
||||
#define VSPACE_LIST \
|
||||
CHAR_LF, CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, 0x2028, 0x2029, NOTACHAR
|
||||
|
||||
#define VSPACE_MULTIBYTE_CASES \
|
||||
case 0x2028: /* LINE SEPARATOR */ \
|
||||
case 0x2029 /* PARAGRAPH SEPARATOR */
|
||||
|
||||
#define VSPACE_BYTE_CASES \
|
||||
case CHAR_LF: \
|
||||
case CHAR_VT: \
|
||||
case CHAR_FF: \
|
||||
case CHAR_CR: \
|
||||
case CHAR_NEL
|
||||
|
||||
#define VSPACE_CASES \
|
||||
VSPACE_BYTE_CASES: \
|
||||
VSPACE_MULTIBYTE_CASES
|
||||
|
||||
/* -------------- EBCDIC environments -------------- */
|
||||
|
||||
#else
|
||||
#define HSPACE_LIST CHAR_HT, CHAR_SPACE, CHAR_NBSP, NOTACHAR
|
||||
|
||||
#define HSPACE_BYTE_CASES \
|
||||
case CHAR_HT: \
|
||||
case CHAR_SPACE: \
|
||||
case CHAR_NBSP
|
||||
|
||||
#define HSPACE_CASES HSPACE_BYTE_CASES
|
||||
|
||||
#ifdef EBCDIC_NL25
|
||||
#define VSPACE_LIST \
|
||||
CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, CHAR_LF, NOTACHAR
|
||||
#else
|
||||
#define VSPACE_LIST \
|
||||
CHAR_VT, CHAR_FF, CHAR_CR, CHAR_LF, CHAR_NEL, NOTACHAR
|
||||
#endif
|
||||
|
||||
#define VSPACE_BYTE_CASES \
|
||||
case CHAR_LF: \
|
||||
case CHAR_VT: \
|
||||
case CHAR_FF: \
|
||||
case CHAR_CR: \
|
||||
case CHAR_NEL
|
||||
|
||||
#define VSPACE_CASES VSPACE_BYTE_CASES
|
||||
#endif /* EBCDIC */
|
||||
|
||||
/* -------------- End of whitespace macros -------------- */
|
||||
|
||||
|
||||
/* PCRE2 is able to support several different kinds of newline (CR, LF, CRLF,
|
||||
"any" and "anycrlf" at present). The following macros are used to package up
|
||||
testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various
|
||||
modules to indicate in which datablock the parameters exist, and what the
|
||||
start/end of string field names are. */
|
||||
|
||||
#define NLTYPE_FIXED 0 /* Newline is a fixed length string */
|
||||
#define NLTYPE_ANY 1 /* Newline is any Unicode line ending */
|
||||
#define NLTYPE_ANYCRLF 2 /* Newline is CR, LF, or CRLF */
|
||||
|
||||
/* This macro checks for a newline at the given position */
|
||||
|
||||
#define IS_NEWLINE(p) \
|
||||
((NLBLOCK->nltype != NLTYPE_FIXED)? \
|
||||
((p) < NLBLOCK->PSEND && \
|
||||
PRIV(is_newline)((p), NLBLOCK->nltype, NLBLOCK->PSEND, \
|
||||
&(NLBLOCK->nllen), utf)) \
|
||||
: \
|
||||
((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
|
||||
UCHAR21TEST(p) == NLBLOCK->nl[0] && \
|
||||
(NLBLOCK->nllen == 1 || UCHAR21TEST(p+1) == NLBLOCK->nl[1]) \
|
||||
) \
|
||||
)
|
||||
|
||||
/* This macro checks for a newline immediately preceding the given position */
|
||||
|
||||
#define WAS_NEWLINE(p) \
|
||||
((NLBLOCK->nltype != NLTYPE_FIXED)? \
|
||||
((p) > NLBLOCK->PSSTART && \
|
||||
PRIV(was_newline)((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \
|
||||
&(NLBLOCK->nllen), utf)) \
|
||||
: \
|
||||
((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
|
||||
UCHAR21TEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] && \
|
||||
(NLBLOCK->nllen == 1 || UCHAR21TEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \
|
||||
) \
|
||||
)
|
||||
|
||||
/* Private flags containing information about the compiled pattern. The first
|
||||
three must not be changed, because whichever is set is actually the number of
|
||||
bytes in a code unit in that mode. */
|
||||
|
||||
#define PCRE2_MODE8 0x00000001u /* compiled in 8 bit mode */
|
||||
#define PCRE2_MODE16 0x00000002u /* compiled in 16 bit mode */
|
||||
#define PCRE2_MODE32 0x00000004u /* compiled in 32 bit mode */
|
||||
#define PCRE2_FIRSTSET 0x00000010u /* first_code unit is set */
|
||||
#define PCRE2_FIRSTCASELESS 0x00000020u /* caseless first code unit */
|
||||
#define PCRE2_FIRSTMAPSET 0x00000040u /* bitmap of first code units is set */
|
||||
#define PCRE2_LASTSET 0x00000080u /* last code unit is set */
|
||||
#define PCRE2_LASTCASELESS 0x00000100u /* caseless last code unit */
|
||||
#define PCRE2_STARTLINE 0x00000200u /* start after \n for multiline */
|
||||
#define PCRE2_JCHANGED 0x00000400u /* j option used in pattern */
|
||||
#define PCRE2_HASCRORLF 0x00000800u /* explicit \r or \n in pattern */
|
||||
#define PCRE2_HASTHEN 0x00001000u /* pattern contains (*THEN) */
|
||||
#define PCRE2_MATCH_EMPTY 0x00002000u /* pattern can match empty string */
|
||||
#define PCRE2_BSR_SET 0x00004000u /* BSR was set in the pattern */
|
||||
#define PCRE2_NL_SET 0x00008000u /* newline was set in the pattern */
|
||||
#define PCRE2_NOTEMPTY_SET 0x00010000u /* (*NOTEMPTY) used ) keep */
|
||||
#define PCRE2_NE_ATST_SET 0x00020000u /* (*NOTEMPTY_ATSTART) used) together */
|
||||
#define PCRE2_DEREF_TABLES 0x00040000u /* release character tables */
|
||||
#define PCRE2_NOJIT 0x00080000u /* (*NOJIT) used */
|
||||
#define PCRE2_HASBKPORX 0x00100000u /* contains \P, \p, or \X */
|
||||
#define PCRE2_DUPCAPUSED 0x00200000u /* contains (?| */
|
||||
#define PCRE2_HASBKC 0x00400000u /* contains \C */
|
||||
#define PCRE2_HASACCEPT 0x00800000u /* contains (*ACCEPT) */
|
||||
|
||||
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
|
||||
|
||||
/* Values for the matchedby field in a match data block. */
|
||||
|
||||
enum { PCRE2_MATCHEDBY_INTERPRETER, /* pcre2_match() */
|
||||
PCRE2_MATCHEDBY_DFA_INTERPRETER, /* pcre2_dfa_match() */
|
||||
PCRE2_MATCHEDBY_JIT }; /* pcre2_jit_match() */
|
||||
|
||||
/* Values for the flags field in a match data block. */
|
||||
|
||||
#define PCRE2_MD_COPIED_SUBJECT 0x01u
|
||||
|
||||
/* Magic number to provide a small check against being handed junk. */
|
||||
|
||||
#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */
|
||||
|
||||
/* The maximum remaining length of subject we are prepared to search for a
|
||||
req_unit match from an anchored pattern. In 8-bit mode, memchr() is used and is
|
||||
much faster than the search loop that has to be used in 16-bit and 32-bit
|
||||
modes. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define REQ_CU_MAX 5000
|
||||
#else
|
||||
#define REQ_CU_MAX 2000
|
||||
#endif
|
||||
|
||||
/* The maximum nesting depth for Unicode character class sets.
|
||||
Currently fixed. Warning: the interpreter relies on this so it can encode
|
||||
the operand stack in a uint32_t. A nesting limit of 15 implies (15*2+1)=31
|
||||
stack operands required, due to the fact that we have two (and only two)
|
||||
levels of operator precedence. In the UTS#18 syntax, you can write 'x&&y[z]'
|
||||
and in Perl syntax you can write '(?[ x - y & (z) ])', both of which imply
|
||||
pushing the match results for x & y to the stack. */
|
||||
|
||||
#define ECLASS_NEST_LIMIT 15
|
||||
|
||||
/* Offsets for the bitmap tables in the cbits set of tables. Each table
|
||||
contains a set of bits for a class map. Some classes are built by combining
|
||||
these tables. */
|
||||
|
||||
#define cbit_space 0 /* [:space:] or \s */
|
||||
#define cbit_xdigit 32 /* [:xdigit:] */
|
||||
#define cbit_digit 64 /* [:digit:] or \d */
|
||||
#define cbit_upper 96 /* [:upper:] */
|
||||
#define cbit_lower 128 /* [:lower:] */
|
||||
#define cbit_word 160 /* [:word:] or \w */
|
||||
#define cbit_graph 192 /* [:graph:] */
|
||||
#define cbit_print 224 /* [:print:] */
|
||||
#define cbit_punct 256 /* [:punct:] */
|
||||
#define cbit_cntrl 288 /* [:cntrl:] */
|
||||
#define cbit_length 320 /* Length of the cbits table */
|
||||
|
||||
/* Bit definitions for entries in the ctypes table. Do not change these values
|
||||
without checking pcre2_jit_compile.c, which has an assertion to ensure that
|
||||
ctype_word has the value 16. */
|
||||
|
||||
#define ctype_space 0x01
|
||||
#define ctype_letter 0x02
|
||||
#define ctype_lcletter 0x04
|
||||
#define ctype_digit 0x08
|
||||
#define ctype_word 0x10 /* alphanumeric or '_' */
|
||||
|
||||
/* Offsets of the various tables from the base tables pointer, and
|
||||
total length of the tables. */
|
||||
|
||||
#define lcc_offset 0 /* Lower case */
|
||||
#define fcc_offset 256 /* Flip case */
|
||||
#define cbits_offset 512 /* Character classes */
|
||||
#define ctypes_offset (cbits_offset + cbit_length) /* Character types */
|
||||
#define TABLES_LENGTH (ctypes_offset + 256)
|
||||
|
||||
/* Private flags used in compile_context.optimization_flags */
|
||||
|
||||
#define PCRE2_OPTIM_AUTO_POSSESS 0x00000001u
|
||||
#define PCRE2_OPTIM_DOTSTAR_ANCHOR 0x00000002u
|
||||
#define PCRE2_OPTIM_START_OPTIMIZE 0x00000004u
|
||||
|
||||
#define PCRE2_OPTIMIZATION_ALL 0x00000007u
|
||||
|
||||
/* -------------------- Character and string names ------------------------ */
|
||||
|
||||
/* If PCRE2 is to support UTF-8 on EBCDIC platforms, we cannot use normal
|
||||
character constants like '*' because the compiler would emit their EBCDIC code,
|
||||
which is different from their ASCII/UTF-8 code. Instead we define macros for
|
||||
the characters so that they always use the ASCII/UTF-8 code when UTF-8 support
|
||||
is enabled. When UTF-8 support is not enabled, the definitions use character
|
||||
literals. Both character and string versions of each character are needed, and
|
||||
there are some longer strings as well.
|
||||
|
||||
This means that, on EBCDIC platforms, the PCRE2 library can handle either
|
||||
EBCDIC, or UTF-8, but not both. To support both in the same compiled library
|
||||
would need different lookups depending on whether PCRE2_UTF was set or not.
|
||||
This would make it impossible to use characters in switch/case statements,
|
||||
which would reduce performance. For a theoretical use (which nobody has asked
|
||||
for) in a minority area (EBCDIC platforms), this is not sensible. Any
|
||||
application that did need both could compile two versions of the library, using
|
||||
macros to give the functions distinct names. */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
|
||||
/* UTF-8 support is not enabled; use the platform-dependent character literals
|
||||
so that PCRE2 works in both ASCII and EBCDIC environments, but only in non-UTF
|
||||
mode. Newline characters are problematic in EBCDIC. Though it has CR and LF
|
||||
characters, a common practice has been to use its NL (0x15) character as the
|
||||
line terminator in C-like processing environments. However, sometimes the LF
|
||||
(0x25) character is used instead, according to this Unicode document:
|
||||
|
||||
http://unicode.org/standard/reports/tr13/tr13-5.html
|
||||
|
||||
PCRE2 defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25
|
||||
instead. Whichever is *not* chosen is defined as NEL.
|
||||
|
||||
In both ASCII and EBCDIC environments, CHAR_NL and CHAR_LF are synonyms for the
|
||||
same code point. */
|
||||
|
||||
#ifdef EBCDIC
|
||||
|
||||
#ifndef EBCDIC_NL25
|
||||
#define CHAR_NL '\x15'
|
||||
#define CHAR_NEL '\x25'
|
||||
#define STR_NL "\x15"
|
||||
#define STR_NEL "\x25"
|
||||
#else
|
||||
#define CHAR_NL '\x25'
|
||||
#define CHAR_NEL '\x15'
|
||||
#define STR_NL "\x25"
|
||||
#define STR_NEL "\x15"
|
||||
#endif
|
||||
|
||||
#define CHAR_LF CHAR_NL
|
||||
#define STR_LF STR_NL
|
||||
|
||||
#define CHAR_ESC '\047'
|
||||
#define CHAR_DEL '\007'
|
||||
#define CHAR_NBSP ((unsigned char)'\x41')
|
||||
#define STR_ESC "\047"
|
||||
#define STR_DEL "\007"
|
||||
|
||||
#else /* Not EBCDIC */
|
||||
|
||||
/* In ASCII/Unicode, linefeed is '\n' and we equate this to NL for
|
||||
compatibility. NEL is the Unicode newline character; make sure it is
|
||||
a positive value. */
|
||||
|
||||
#define CHAR_LF '\n'
|
||||
#define CHAR_NL CHAR_LF
|
||||
#define CHAR_NEL ((unsigned char)'\x85')
|
||||
#define CHAR_ESC '\033'
|
||||
#define CHAR_DEL '\177'
|
||||
#define CHAR_NBSP ((unsigned char)'\xa0')
|
||||
|
||||
#define STR_LF "\n"
|
||||
#define STR_NL STR_LF
|
||||
#define STR_NEL "\x85"
|
||||
#define STR_ESC "\033"
|
||||
#define STR_DEL "\177"
|
||||
|
||||
#endif /* EBCDIC */
|
||||
|
||||
/* The remaining definitions work in both environments. */
|
||||
|
||||
#define CHAR_NUL '\0'
|
||||
#define CHAR_HT '\t'
|
||||
#define CHAR_VT '\v'
|
||||
#define CHAR_FF '\f'
|
||||
#define CHAR_CR '\r'
|
||||
#define CHAR_BS '\b'
|
||||
#define CHAR_BEL '\a'
|
||||
|
||||
#define CHAR_SPACE ' '
|
||||
#define CHAR_EXCLAMATION_MARK '!'
|
||||
#define CHAR_QUOTATION_MARK '"'
|
||||
#define CHAR_NUMBER_SIGN '#'
|
||||
#define CHAR_DOLLAR_SIGN '$'
|
||||
#define CHAR_PERCENT_SIGN '%'
|
||||
#define CHAR_AMPERSAND '&'
|
||||
#define CHAR_APOSTROPHE '\''
|
||||
#define CHAR_LEFT_PARENTHESIS '('
|
||||
#define CHAR_RIGHT_PARENTHESIS ')'
|
||||
#define CHAR_ASTERISK '*'
|
||||
#define CHAR_PLUS '+'
|
||||
#define CHAR_COMMA ','
|
||||
#define CHAR_MINUS '-'
|
||||
#define CHAR_DOT '.'
|
||||
#define CHAR_SLASH '/'
|
||||
#define CHAR_0 '0'
|
||||
#define CHAR_1 '1'
|
||||
#define CHAR_2 '2'
|
||||
#define CHAR_3 '3'
|
||||
#define CHAR_4 '4'
|
||||
#define CHAR_5 '5'
|
||||
#define CHAR_6 '6'
|
||||
#define CHAR_7 '7'
|
||||
#define CHAR_8 '8'
|
||||
#define CHAR_9 '9'
|
||||
#define CHAR_COLON ':'
|
||||
#define CHAR_SEMICOLON ';'
|
||||
#define CHAR_LESS_THAN_SIGN '<'
|
||||
#define CHAR_EQUALS_SIGN '='
|
||||
#define CHAR_GREATER_THAN_SIGN '>'
|
||||
#define CHAR_QUESTION_MARK '?'
|
||||
#define CHAR_COMMERCIAL_AT '@'
|
||||
#define CHAR_A 'A'
|
||||
#define CHAR_B 'B'
|
||||
#define CHAR_C 'C'
|
||||
#define CHAR_D 'D'
|
||||
#define CHAR_E 'E'
|
||||
#define CHAR_F 'F'
|
||||
#define CHAR_G 'G'
|
||||
#define CHAR_H 'H'
|
||||
#define CHAR_I 'I'
|
||||
#define CHAR_J 'J'
|
||||
#define CHAR_K 'K'
|
||||
#define CHAR_L 'L'
|
||||
#define CHAR_M 'M'
|
||||
#define CHAR_N 'N'
|
||||
#define CHAR_O 'O'
|
||||
#define CHAR_P 'P'
|
||||
#define CHAR_Q 'Q'
|
||||
#define CHAR_R 'R'
|
||||
#define CHAR_S 'S'
|
||||
#define CHAR_T 'T'
|
||||
#define CHAR_U 'U'
|
||||
#define CHAR_V 'V'
|
||||
#define CHAR_W 'W'
|
||||
#define CHAR_X 'X'
|
||||
#define CHAR_Y 'Y'
|
||||
#define CHAR_Z 'Z'
|
||||
#define CHAR_LEFT_SQUARE_BRACKET '['
|
||||
#define CHAR_BACKSLASH '\\'
|
||||
#define CHAR_RIGHT_SQUARE_BRACKET ']'
|
||||
#define CHAR_CIRCUMFLEX_ACCENT '^'
|
||||
#define CHAR_UNDERSCORE '_'
|
||||
#define CHAR_GRAVE_ACCENT '`'
|
||||
#define CHAR_a 'a'
|
||||
#define CHAR_b 'b'
|
||||
#define CHAR_c 'c'
|
||||
#define CHAR_d 'd'
|
||||
#define CHAR_e 'e'
|
||||
#define CHAR_f 'f'
|
||||
#define CHAR_g 'g'
|
||||
#define CHAR_h 'h'
|
||||
#define CHAR_i 'i'
|
||||
#define CHAR_j 'j'
|
||||
#define CHAR_k 'k'
|
||||
#define CHAR_l 'l'
|
||||
#define CHAR_m 'm'
|
||||
#define CHAR_n 'n'
|
||||
#define CHAR_o 'o'
|
||||
#define CHAR_p 'p'
|
||||
#define CHAR_q 'q'
|
||||
#define CHAR_r 'r'
|
||||
#define CHAR_s 's'
|
||||
#define CHAR_t 't'
|
||||
#define CHAR_u 'u'
|
||||
#define CHAR_v 'v'
|
||||
#define CHAR_w 'w'
|
||||
#define CHAR_x 'x'
|
||||
#define CHAR_y 'y'
|
||||
#define CHAR_z 'z'
|
||||
#define CHAR_LEFT_CURLY_BRACKET '{'
|
||||
#define CHAR_VERTICAL_LINE '|'
|
||||
#define CHAR_RIGHT_CURLY_BRACKET '}'
|
||||
#define CHAR_TILDE '~'
|
||||
|
||||
#define STR_HT "\t"
|
||||
#define STR_VT "\v"
|
||||
#define STR_FF "\f"
|
||||
#define STR_CR "\r"
|
||||
#define STR_BS "\b"
|
||||
#define STR_BEL "\a"
|
||||
|
||||
#define STR_SPACE " "
|
||||
#define STR_EXCLAMATION_MARK "!"
|
||||
#define STR_QUOTATION_MARK "\""
|
||||
#define STR_NUMBER_SIGN "#"
|
||||
#define STR_DOLLAR_SIGN "$"
|
||||
#define STR_PERCENT_SIGN "%"
|
||||
#define STR_AMPERSAND "&"
|
||||
#define STR_APOSTROPHE "'"
|
||||
#define STR_LEFT_PARENTHESIS "("
|
||||
#define STR_RIGHT_PARENTHESIS ")"
|
||||
#define STR_ASTERISK "*"
|
||||
#define STR_PLUS "+"
|
||||
#define STR_COMMA ","
|
||||
#define STR_MINUS "-"
|
||||
#define STR_DOT "."
|
||||
#define STR_SLASH "/"
|
||||
#define STR_0 "0"
|
||||
#define STR_1 "1"
|
||||
#define STR_2 "2"
|
||||
#define STR_3 "3"
|
||||
#define STR_4 "4"
|
||||
#define STR_5 "5"
|
||||
#define STR_6 "6"
|
||||
#define STR_7 "7"
|
||||
#define STR_8 "8"
|
||||
#define STR_9 "9"
|
||||
#define STR_COLON ":"
|
||||
#define STR_SEMICOLON ";"
|
||||
#define STR_LESS_THAN_SIGN "<"
|
||||
#define STR_EQUALS_SIGN "="
|
||||
#define STR_GREATER_THAN_SIGN ">"
|
||||
#define STR_QUESTION_MARK "?"
|
||||
#define STR_COMMERCIAL_AT "@"
|
||||
#define STR_A "A"
|
||||
#define STR_B "B"
|
||||
#define STR_C "C"
|
||||
#define STR_D "D"
|
||||
#define STR_E "E"
|
||||
#define STR_F "F"
|
||||
#define STR_G "G"
|
||||
#define STR_H "H"
|
||||
#define STR_I "I"
|
||||
#define STR_J "J"
|
||||
#define STR_K "K"
|
||||
#define STR_L "L"
|
||||
#define STR_M "M"
|
||||
#define STR_N "N"
|
||||
#define STR_O "O"
|
||||
#define STR_P "P"
|
||||
#define STR_Q "Q"
|
||||
#define STR_R "R"
|
||||
#define STR_S "S"
|
||||
#define STR_T "T"
|
||||
#define STR_U "U"
|
||||
#define STR_V "V"
|
||||
#define STR_W "W"
|
||||
#define STR_X "X"
|
||||
#define STR_Y "Y"
|
||||
#define STR_Z "Z"
|
||||
#define STR_LEFT_SQUARE_BRACKET "["
|
||||
#define STR_BACKSLASH "\\"
|
||||
#define STR_RIGHT_SQUARE_BRACKET "]"
|
||||
#define STR_CIRCUMFLEX_ACCENT "^"
|
||||
#define STR_UNDERSCORE "_"
|
||||
#define STR_GRAVE_ACCENT "`"
|
||||
#define STR_a "a"
|
||||
#define STR_b "b"
|
||||
#define STR_c "c"
|
||||
#define STR_d "d"
|
||||
#define STR_e "e"
|
||||
#define STR_f "f"
|
||||
#define STR_g "g"
|
||||
#define STR_h "h"
|
||||
#define STR_i "i"
|
||||
#define STR_j "j"
|
||||
#define STR_k "k"
|
||||
#define STR_l "l"
|
||||
#define STR_m "m"
|
||||
#define STR_n "n"
|
||||
#define STR_o "o"
|
||||
#define STR_p "p"
|
||||
#define STR_q "q"
|
||||
#define STR_r "r"
|
||||
#define STR_s "s"
|
||||
#define STR_t "t"
|
||||
#define STR_u "u"
|
||||
#define STR_v "v"
|
||||
#define STR_w "w"
|
||||
#define STR_x "x"
|
||||
#define STR_y "y"
|
||||
#define STR_z "z"
|
||||
#define STR_LEFT_CURLY_BRACKET "{"
|
||||
#define STR_VERTICAL_LINE "|"
|
||||
#define STR_RIGHT_CURLY_BRACKET "}"
|
||||
#define STR_TILDE "~"
|
||||
|
||||
#define STRING_ACCEPT0 "ACCEPT\0"
|
||||
#define STRING_COMMIT0 "COMMIT\0"
|
||||
#define STRING_F0 "F\0"
|
||||
#define STRING_FAIL0 "FAIL\0"
|
||||
#define STRING_MARK0 "MARK\0"
|
||||
#define STRING_PRUNE0 "PRUNE\0"
|
||||
#define STRING_SKIP0 "SKIP\0"
|
||||
#define STRING_THEN "THEN"
|
||||
|
||||
#define STRING_atomic0 "atomic\0"
|
||||
#define STRING_pla0 "pla\0"
|
||||
#define STRING_plb0 "plb\0"
|
||||
#define STRING_napla0 "napla\0"
|
||||
#define STRING_naplb0 "naplb\0"
|
||||
#define STRING_nla0 "nla\0"
|
||||
#define STRING_nlb0 "nlb\0"
|
||||
#define STRING_scs0 "scs\0"
|
||||
#define STRING_sr0 "sr\0"
|
||||
#define STRING_asr0 "asr\0"
|
||||
#define STRING_positive_lookahead0 "positive_lookahead\0"
|
||||
#define STRING_positive_lookbehind0 "positive_lookbehind\0"
|
||||
#define STRING_non_atomic_positive_lookahead0 "non_atomic_positive_lookahead\0"
|
||||
#define STRING_non_atomic_positive_lookbehind0 "non_atomic_positive_lookbehind\0"
|
||||
#define STRING_negative_lookahead0 "negative_lookahead\0"
|
||||
#define STRING_negative_lookbehind0 "negative_lookbehind\0"
|
||||
#define STRING_script_run0 "script_run\0"
|
||||
#define STRING_atomic_script_run "atomic_script_run"
|
||||
#define STRING_scan_substring0 "scan_substring\0"
|
||||
|
||||
#define STRING_alpha0 "alpha\0"
|
||||
#define STRING_lower0 "lower\0"
|
||||
#define STRING_upper0 "upper\0"
|
||||
#define STRING_alnum0 "alnum\0"
|
||||
#define STRING_ascii0 "ascii\0"
|
||||
#define STRING_blank0 "blank\0"
|
||||
#define STRING_cntrl0 "cntrl\0"
|
||||
#define STRING_digit0 "digit\0"
|
||||
#define STRING_graph0 "graph\0"
|
||||
#define STRING_print0 "print\0"
|
||||
#define STRING_punct0 "punct\0"
|
||||
#define STRING_space0 "space\0"
|
||||
#define STRING_word0 "word\0"
|
||||
#define STRING_xdigit "xdigit"
|
||||
|
||||
#define STRING_DEFINE "DEFINE"
|
||||
#define STRING_VERSION "VERSION"
|
||||
#define STRING_WEIRD_STARTWORD "[:<:]]"
|
||||
#define STRING_WEIRD_ENDWORD "[:>:]]"
|
||||
|
||||
#define STRING_CR_RIGHTPAR "CR)"
|
||||
#define STRING_LF_RIGHTPAR "LF)"
|
||||
#define STRING_CRLF_RIGHTPAR "CRLF)"
|
||||
#define STRING_ANY_RIGHTPAR "ANY)"
|
||||
#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"
|
||||
#define STRING_NUL_RIGHTPAR "NUL)"
|
||||
#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
|
||||
#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
|
||||
#define STRING_UTF8_RIGHTPAR "UTF8)"
|
||||
#define STRING_UTF16_RIGHTPAR "UTF16)"
|
||||
#define STRING_UTF32_RIGHTPAR "UTF32)"
|
||||
#define STRING_UTF_RIGHTPAR "UTF)"
|
||||
#define STRING_UCP_RIGHTPAR "UCP)"
|
||||
#define STRING_NO_AUTO_POSSESS_RIGHTPAR "NO_AUTO_POSSESS)"
|
||||
#define STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR "NO_DOTSTAR_ANCHOR)"
|
||||
#define STRING_NO_JIT_RIGHTPAR "NO_JIT)"
|
||||
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
|
||||
#define STRING_NOTEMPTY_RIGHTPAR "NOTEMPTY)"
|
||||
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)"
|
||||
#define STRING_CASELESS_RESTRICT_RIGHTPAR "CASELESS_RESTRICT)"
|
||||
#define STRING_TURKISH_CASING_RIGHTPAR "TURKISH_CASING)"
|
||||
#define STRING_LIMIT_HEAP_EQ "LIMIT_HEAP="
|
||||
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
|
||||
#define STRING_LIMIT_DEPTH_EQ "LIMIT_DEPTH="
|
||||
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
|
||||
#define STRING_MARK "MARK"
|
||||
|
||||
#define STRING_bc "bc"
|
||||
#define STRING_bidiclass "bidiclass"
|
||||
#define STRING_sc "sc"
|
||||
#define STRING_script "script"
|
||||
#define STRING_scriptextensions "scriptextensions"
|
||||
#define STRING_scx "scx"
|
||||
|
||||
#else /* SUPPORT_UNICODE */
|
||||
|
||||
/* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This
|
||||
works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode
|
||||
only. */
|
||||
|
||||
#define CHAR_HT '\011'
|
||||
#define CHAR_VT '\013'
|
||||
#define CHAR_FF '\014'
|
||||
#define CHAR_CR '\015'
|
||||
#define CHAR_LF '\012'
|
||||
#define CHAR_NL CHAR_LF
|
||||
#define CHAR_NEL ((unsigned char)'\x85')
|
||||
#define CHAR_BS '\010'
|
||||
#define CHAR_BEL '\007'
|
||||
#define CHAR_ESC '\033'
|
||||
#define CHAR_DEL '\177'
|
||||
|
||||
#define CHAR_NUL '\0'
|
||||
#define CHAR_SPACE '\040'
|
||||
#define CHAR_EXCLAMATION_MARK '\041'
|
||||
#define CHAR_QUOTATION_MARK '\042'
|
||||
#define CHAR_NUMBER_SIGN '\043'
|
||||
#define CHAR_DOLLAR_SIGN '\044'
|
||||
#define CHAR_PERCENT_SIGN '\045'
|
||||
#define CHAR_AMPERSAND '\046'
|
||||
#define CHAR_APOSTROPHE '\047'
|
||||
#define CHAR_LEFT_PARENTHESIS '\050'
|
||||
#define CHAR_RIGHT_PARENTHESIS '\051'
|
||||
#define CHAR_ASTERISK '\052'
|
||||
#define CHAR_PLUS '\053'
|
||||
#define CHAR_COMMA '\054'
|
||||
#define CHAR_MINUS '\055'
|
||||
#define CHAR_DOT '\056'
|
||||
#define CHAR_SLASH '\057'
|
||||
#define CHAR_0 '\060'
|
||||
#define CHAR_1 '\061'
|
||||
#define CHAR_2 '\062'
|
||||
#define CHAR_3 '\063'
|
||||
#define CHAR_4 '\064'
|
||||
#define CHAR_5 '\065'
|
||||
#define CHAR_6 '\066'
|
||||
#define CHAR_7 '\067'
|
||||
#define CHAR_8 '\070'
|
||||
#define CHAR_9 '\071'
|
||||
#define CHAR_COLON '\072'
|
||||
#define CHAR_SEMICOLON '\073'
|
||||
#define CHAR_LESS_THAN_SIGN '\074'
|
||||
#define CHAR_EQUALS_SIGN '\075'
|
||||
#define CHAR_GREATER_THAN_SIGN '\076'
|
||||
#define CHAR_QUESTION_MARK '\077'
|
||||
#define CHAR_COMMERCIAL_AT '\100'
|
||||
#define CHAR_A '\101'
|
||||
#define CHAR_B '\102'
|
||||
#define CHAR_C '\103'
|
||||
#define CHAR_D '\104'
|
||||
#define CHAR_E '\105'
|
||||
#define CHAR_F '\106'
|
||||
#define CHAR_G '\107'
|
||||
#define CHAR_H '\110'
|
||||
#define CHAR_I '\111'
|
||||
#define CHAR_J '\112'
|
||||
#define CHAR_K '\113'
|
||||
#define CHAR_L '\114'
|
||||
#define CHAR_M '\115'
|
||||
#define CHAR_N '\116'
|
||||
#define CHAR_O '\117'
|
||||
#define CHAR_P '\120'
|
||||
#define CHAR_Q '\121'
|
||||
#define CHAR_R '\122'
|
||||
#define CHAR_S '\123'
|
||||
#define CHAR_T '\124'
|
||||
#define CHAR_U '\125'
|
||||
#define CHAR_V '\126'
|
||||
#define CHAR_W '\127'
|
||||
#define CHAR_X '\130'
|
||||
#define CHAR_Y '\131'
|
||||
#define CHAR_Z '\132'
|
||||
#define CHAR_LEFT_SQUARE_BRACKET '\133'
|
||||
#define CHAR_BACKSLASH '\134'
|
||||
#define CHAR_RIGHT_SQUARE_BRACKET '\135'
|
||||
#define CHAR_CIRCUMFLEX_ACCENT '\136'
|
||||
#define CHAR_UNDERSCORE '\137'
|
||||
#define CHAR_GRAVE_ACCENT '\140'
|
||||
#define CHAR_a '\141'
|
||||
#define CHAR_b '\142'
|
||||
#define CHAR_c '\143'
|
||||
#define CHAR_d '\144'
|
||||
#define CHAR_e '\145'
|
||||
#define CHAR_f '\146'
|
||||
#define CHAR_g '\147'
|
||||
#define CHAR_h '\150'
|
||||
#define CHAR_i '\151'
|
||||
#define CHAR_j '\152'
|
||||
#define CHAR_k '\153'
|
||||
#define CHAR_l '\154'
|
||||
#define CHAR_m '\155'
|
||||
#define CHAR_n '\156'
|
||||
#define CHAR_o '\157'
|
||||
#define CHAR_p '\160'
|
||||
#define CHAR_q '\161'
|
||||
#define CHAR_r '\162'
|
||||
#define CHAR_s '\163'
|
||||
#define CHAR_t '\164'
|
||||
#define CHAR_u '\165'
|
||||
#define CHAR_v '\166'
|
||||
#define CHAR_w '\167'
|
||||
#define CHAR_x '\170'
|
||||
#define CHAR_y '\171'
|
||||
#define CHAR_z '\172'
|
||||
#define CHAR_LEFT_CURLY_BRACKET '\173'
|
||||
#define CHAR_VERTICAL_LINE '\174'
|
||||
#define CHAR_RIGHT_CURLY_BRACKET '\175'
|
||||
#define CHAR_TILDE '\176'
|
||||
#define CHAR_NBSP ((unsigned char)'\xa0')
|
||||
|
||||
#define STR_HT "\011"
|
||||
#define STR_VT "\013"
|
||||
#define STR_FF "\014"
|
||||
#define STR_CR "\015"
|
||||
#define STR_NL "\012"
|
||||
#define STR_BS "\010"
|
||||
#define STR_BEL "\007"
|
||||
#define STR_ESC "\033"
|
||||
#define STR_DEL "\177"
|
||||
|
||||
#define STR_SPACE "\040"
|
||||
#define STR_EXCLAMATION_MARK "\041"
|
||||
#define STR_QUOTATION_MARK "\042"
|
||||
#define STR_NUMBER_SIGN "\043"
|
||||
#define STR_DOLLAR_SIGN "\044"
|
||||
#define STR_PERCENT_SIGN "\045"
|
||||
#define STR_AMPERSAND "\046"
|
||||
#define STR_APOSTROPHE "\047"
|
||||
#define STR_LEFT_PARENTHESIS "\050"
|
||||
#define STR_RIGHT_PARENTHESIS "\051"
|
||||
#define STR_ASTERISK "\052"
|
||||
#define STR_PLUS "\053"
|
||||
#define STR_COMMA "\054"
|
||||
#define STR_MINUS "\055"
|
||||
#define STR_DOT "\056"
|
||||
#define STR_SLASH "\057"
|
||||
#define STR_0 "\060"
|
||||
#define STR_1 "\061"
|
||||
#define STR_2 "\062"
|
||||
#define STR_3 "\063"
|
||||
#define STR_4 "\064"
|
||||
#define STR_5 "\065"
|
||||
#define STR_6 "\066"
|
||||
#define STR_7 "\067"
|
||||
#define STR_8 "\070"
|
||||
#define STR_9 "\071"
|
||||
#define STR_COLON "\072"
|
||||
#define STR_SEMICOLON "\073"
|
||||
#define STR_LESS_THAN_SIGN "\074"
|
||||
#define STR_EQUALS_SIGN "\075"
|
||||
#define STR_GREATER_THAN_SIGN "\076"
|
||||
#define STR_QUESTION_MARK "\077"
|
||||
#define STR_COMMERCIAL_AT "\100"
|
||||
#define STR_A "\101"
|
||||
#define STR_B "\102"
|
||||
#define STR_C "\103"
|
||||
#define STR_D "\104"
|
||||
#define STR_E "\105"
|
||||
#define STR_F "\106"
|
||||
#define STR_G "\107"
|
||||
#define STR_H "\110"
|
||||
#define STR_I "\111"
|
||||
#define STR_J "\112"
|
||||
#define STR_K "\113"
|
||||
#define STR_L "\114"
|
||||
#define STR_M "\115"
|
||||
#define STR_N "\116"
|
||||
#define STR_O "\117"
|
||||
#define STR_P "\120"
|
||||
#define STR_Q "\121"
|
||||
#define STR_R "\122"
|
||||
#define STR_S "\123"
|
||||
#define STR_T "\124"
|
||||
#define STR_U "\125"
|
||||
#define STR_V "\126"
|
||||
#define STR_W "\127"
|
||||
#define STR_X "\130"
|
||||
#define STR_Y "\131"
|
||||
#define STR_Z "\132"
|
||||
#define STR_LEFT_SQUARE_BRACKET "\133"
|
||||
#define STR_BACKSLASH "\134"
|
||||
#define STR_RIGHT_SQUARE_BRACKET "\135"
|
||||
#define STR_CIRCUMFLEX_ACCENT "\136"
|
||||
#define STR_UNDERSCORE "\137"
|
||||
#define STR_GRAVE_ACCENT "\140"
|
||||
#define STR_a "\141"
|
||||
#define STR_b "\142"
|
||||
#define STR_c "\143"
|
||||
#define STR_d "\144"
|
||||
#define STR_e "\145"
|
||||
#define STR_f "\146"
|
||||
#define STR_g "\147"
|
||||
#define STR_h "\150"
|
||||
#define STR_i "\151"
|
||||
#define STR_j "\152"
|
||||
#define STR_k "\153"
|
||||
#define STR_l "\154"
|
||||
#define STR_m "\155"
|
||||
#define STR_n "\156"
|
||||
#define STR_o "\157"
|
||||
#define STR_p "\160"
|
||||
#define STR_q "\161"
|
||||
#define STR_r "\162"
|
||||
#define STR_s "\163"
|
||||
#define STR_t "\164"
|
||||
#define STR_u "\165"
|
||||
#define STR_v "\166"
|
||||
#define STR_w "\167"
|
||||
#define STR_x "\170"
|
||||
#define STR_y "\171"
|
||||
#define STR_z "\172"
|
||||
#define STR_LEFT_CURLY_BRACKET "\173"
|
||||
#define STR_VERTICAL_LINE "\174"
|
||||
#define STR_RIGHT_CURLY_BRACKET "\175"
|
||||
#define STR_TILDE "\176"
|
||||
|
||||
#define STRING_ACCEPT0 STR_A STR_C STR_C STR_E STR_P STR_T "\0"
|
||||
#define STRING_COMMIT0 STR_C STR_O STR_M STR_M STR_I STR_T "\0"
|
||||
#define STRING_F0 STR_F "\0"
|
||||
#define STRING_FAIL0 STR_F STR_A STR_I STR_L "\0"
|
||||
#define STRING_MARK0 STR_M STR_A STR_R STR_K "\0"
|
||||
#define STRING_PRUNE0 STR_P STR_R STR_U STR_N STR_E "\0"
|
||||
#define STRING_SKIP0 STR_S STR_K STR_I STR_P "\0"
|
||||
#define STRING_THEN STR_T STR_H STR_E STR_N
|
||||
|
||||
#define STRING_atomic0 STR_a STR_t STR_o STR_m STR_i STR_c "\0"
|
||||
#define STRING_pla0 STR_p STR_l STR_a "\0"
|
||||
#define STRING_plb0 STR_p STR_l STR_b "\0"
|
||||
#define STRING_napla0 STR_n STR_a STR_p STR_l STR_a "\0"
|
||||
#define STRING_naplb0 STR_n STR_a STR_p STR_l STR_b "\0"
|
||||
#define STRING_nla0 STR_n STR_l STR_a "\0"
|
||||
#define STRING_nlb0 STR_n STR_l STR_b "\0"
|
||||
#define STRING_scs0 STR_s STR_c STR_s "\0"
|
||||
#define STRING_sr0 STR_s STR_r "\0"
|
||||
#define STRING_asr0 STR_a STR_s STR_r "\0"
|
||||
#define STRING_positive_lookahead0 STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
|
||||
#define STRING_positive_lookbehind0 STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
|
||||
#define STRING_non_atomic_positive_lookahead0 STR_n STR_o STR_n STR_UNDERSCORE STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
|
||||
#define STRING_non_atomic_positive_lookbehind0 STR_n STR_o STR_n STR_UNDERSCORE STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
|
||||
#define STRING_negative_lookahead0 STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
|
||||
#define STRING_negative_lookbehind0 STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
|
||||
#define STRING_script_run0 STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n "\0"
|
||||
#define STRING_atomic_script_run STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n
|
||||
#define STRING_scan_substring0 STR_s STR_c STR_a STR_n STR_UNDERSCORE STR_s STR_u STR_b STR_s STR_t STR_r STR_i STR_n STR_g "\0"
|
||||
|
||||
#define STRING_alpha0 STR_a STR_l STR_p STR_h STR_a "\0"
|
||||
#define STRING_lower0 STR_l STR_o STR_w STR_e STR_r "\0"
|
||||
#define STRING_upper0 STR_u STR_p STR_p STR_e STR_r "\0"
|
||||
#define STRING_alnum0 STR_a STR_l STR_n STR_u STR_m "\0"
|
||||
#define STRING_ascii0 STR_a STR_s STR_c STR_i STR_i "\0"
|
||||
#define STRING_blank0 STR_b STR_l STR_a STR_n STR_k "\0"
|
||||
#define STRING_cntrl0 STR_c STR_n STR_t STR_r STR_l "\0"
|
||||
#define STRING_digit0 STR_d STR_i STR_g STR_i STR_t "\0"
|
||||
#define STRING_graph0 STR_g STR_r STR_a STR_p STR_h "\0"
|
||||
#define STRING_print0 STR_p STR_r STR_i STR_n STR_t "\0"
|
||||
#define STRING_punct0 STR_p STR_u STR_n STR_c STR_t "\0"
|
||||
#define STRING_space0 STR_s STR_p STR_a STR_c STR_e "\0"
|
||||
#define STRING_word0 STR_w STR_o STR_r STR_d "\0"
|
||||
#define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t
|
||||
|
||||
#define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E
|
||||
#define STRING_VERSION STR_V STR_E STR_R STR_S STR_I STR_O STR_N
|
||||
#define STRING_WEIRD_STARTWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
|
||||
#define STRING_WEIRD_ENDWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
|
||||
|
||||
#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS
|
||||
#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
|
||||
#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NUL_RIGHTPAR STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
|
||||
#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF16_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF32_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NO_AUTO_POSSESS_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_A STR_U STR_T STR_O STR_UNDERSCORE STR_P STR_O STR_S STR_S STR_E STR_S STR_S STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_D STR_O STR_T STR_S STR_T STR_A STR_R STR_UNDERSCORE STR_A STR_N STR_C STR_H STR_O STR_R STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NO_JIT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_J STR_I STR_T STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NOTEMPTY_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS
|
||||
#define STRING_CASELESS_RESTRICT_RIGHTPAR STR_C STR_A STR_S STR_E STR_L STR_E STR_S STR_S STR_UNDERSCORE STR_R STR_E STR_S STR_T STR_R STR_I STR_C STR_T STR_RIGHT_PARENTHESIS
|
||||
#define STRING_TURKISH_CASING_RIGHTPAR STR_T STR_U STR_R STR_K STR_I STR_S STR_H STR_UNDERSCORE STR_C STR_A STR_S STR_I STR_N STR_G STR_RIGHT_PARENTHESIS
|
||||
#define STRING_LIMIT_HEAP_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_H STR_E STR_A STR_P STR_EQUALS_SIGN
|
||||
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
|
||||
#define STRING_LIMIT_DEPTH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_D STR_E STR_P STR_T STR_H STR_EQUALS_SIGN
|
||||
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
|
||||
#define STRING_MARK STR_M STR_A STR_R STR_K
|
||||
|
||||
#define STRING_bc STR_b STR_c
|
||||
#define STRING_bidiclass STR_b STR_i STR_d STR_i STR_c STR_l STR_a STR_s STR_s
|
||||
#define STRING_sc STR_s STR_c
|
||||
#define STRING_script STR_s STR_c STR_r STR_i STR_p STR_t
|
||||
#define STRING_scriptextensions STR_s STR_c STR_r STR_i STR_p STR_t STR_e STR_x STR_t STR_e STR_n STR_s STR_i STR_o STR_n STR_s
|
||||
#define STRING_scx STR_s STR_c STR_x
|
||||
|
||||
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* -------------------- End of character and string names -------------------*/
|
||||
|
||||
/* -------------------- Definitions for compiled patterns -------------------*/
|
||||
|
||||
/* Codes for different types of Unicode property. If these definitions are
|
||||
changed, the autopossessifying table in pcre2_auto_possess.c must be updated to
|
||||
match. */
|
||||
|
||||
#define PT_LAMP 0 /* L& - the union of Lu, Ll, Lt */
|
||||
#define PT_GC 1 /* Specified general characteristic (e.g. L) */
|
||||
#define PT_PC 2 /* Specified particular characteristic (e.g. Lu) */
|
||||
#define PT_SC 3 /* Script only (e.g. Han) */
|
||||
#define PT_SCX 4 /* Script extensions (includes SC) */
|
||||
#define PT_ALNUM 5 /* Alphanumeric - the union of L and N */
|
||||
#define PT_SPACE 6 /* Perl space - general category Z plus 9,10,12,13 */
|
||||
#define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */
|
||||
#define PT_WORD 8 /* Word - L, N, Mn, or Pc */
|
||||
#define PT_CLIST 9 /* Pseudo-property: match character list */
|
||||
#define PT_UCNC 10 /* Universal Character nameable character */
|
||||
#define PT_BIDICL 11 /* Specified bidi class */
|
||||
#define PT_BOOL 12 /* Boolean property */
|
||||
#define PT_ANY 13 /* Must be the last entry!
|
||||
Any property - matches all chars */
|
||||
#define PT_TABSIZE PT_ANY /* Size of square table for autopossessify tests */
|
||||
|
||||
/* The following special properties are used only in XCLASS items, when POSIX
|
||||
classes are specified and PCRE2_UCP is set - in other words, for Unicode
|
||||
handling of these classes. They are not available via the \p or \P escapes like
|
||||
those in the above list, and so they do not take part in the autopossessifying
|
||||
table. */
|
||||
|
||||
#define PT_PXGRAPH 14 /* [:graph:] - characters that mark the paper */
|
||||
#define PT_PXPRINT 15 /* [:print:] - [:graph:] plus non-control spaces */
|
||||
#define PT_PXPUNCT 16 /* [:punct:] - punctuation characters */
|
||||
#define PT_PXXDIGIT 17 /* [:xdigit:] - hex digits */
|
||||
|
||||
/* This value is used when parsing \p and \P escapes to indicate that neither
|
||||
\p{script:...} nor \p{scx:...} has been encountered. */
|
||||
|
||||
#define PT_NOTSCRIPT 255
|
||||
|
||||
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
|
||||
contain characters with values greater than 255. */
|
||||
|
||||
#define XCL_NOT 0x01 /* Flag: this is a negative class */
|
||||
#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */
|
||||
#define XCL_HASPROP 0x04 /* Flag: property checks are present. */
|
||||
|
||||
#define XCL_END 0 /* Marks end of individual items */
|
||||
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */
|
||||
#define XCL_RANGE 2 /* A range (two multibyte chars) follows */
|
||||
#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */
|
||||
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
|
||||
/* This value represents the beginning of character lists. The value
|
||||
is 16 bit long, and stored as a high and low byte pair in 8 bit mode.
|
||||
The lower 12 bit contains information about character lists (see later). */
|
||||
#define XCL_LIST (sizeof(PCRE2_UCHAR) == 1 ? 0x10 : 0x1000)
|
||||
|
||||
/* When a character class contains many characters/ranges,
|
||||
they are stored in character lists. There are four character
|
||||
lists which contain characters/ranges within a given range.
|
||||
|
||||
The name, character range and item size for each list:
|
||||
Low16 [0x100 - 0x7fff] 16 bit items
|
||||
High16 [0x8000 - 0xffff] 16 bit items
|
||||
Low32 [0x10000 - 0x7fffffff] 32 bit items
|
||||
High32 [0x80000000 - 0xffffffff] 32 bit items
|
||||
|
||||
The Low32 character list is used only when utf encoding or 32 bit
|
||||
character width is enabled, and the High32 character is used only
|
||||
when 32 bit character width is enabled.
|
||||
|
||||
Each character list contain items. The lowest bit represents that
|
||||
an item is the beginning of a range (bit is cleared), or not (bit
|
||||
is set). The other bits represent the character shifted left by
|
||||
one, so its highest bit is discarded. Due to the layout of character
|
||||
lists, the highest bit of a character is always known:
|
||||
|
||||
Low16 and Low32: the highest bit is always zero
|
||||
High16 and High32: the highest bit is always one
|
||||
|
||||
The items are ordered in increasing order, so binary search can be
|
||||
used to find the lower bound of an input character. The lower bound
|
||||
is the highest item, which value is less or equal than the input
|
||||
character. If the lower bit of the item is cleard, or the character
|
||||
stored in the item equals to the input character, the input
|
||||
character is in the character list. */
|
||||
|
||||
/* Character list constants. */
|
||||
#define XCL_CHAR_LIST_LOW_16_START 0x100
|
||||
#define XCL_CHAR_LIST_LOW_16_END 0x7fff
|
||||
#define XCL_CHAR_LIST_LOW_16_ADD 0x0
|
||||
|
||||
#define XCL_CHAR_LIST_HIGH_16_START 0x8000
|
||||
#define XCL_CHAR_LIST_HIGH_16_END 0xffff
|
||||
#define XCL_CHAR_LIST_HIGH_16_ADD 0x8000
|
||||
|
||||
#define XCL_CHAR_LIST_LOW_32_START 0x10000
|
||||
#define XCL_CHAR_LIST_LOW_32_END 0x7fffffff
|
||||
#define XCL_CHAR_LIST_LOW_32_ADD 0x0
|
||||
|
||||
#define XCL_CHAR_LIST_HIGH_32_START 0x80000000
|
||||
#define XCL_CHAR_LIST_HIGH_32_END 0xffffffff
|
||||
#define XCL_CHAR_LIST_HIGH_32_ADD 0x80000000
|
||||
|
||||
/* Mask for getting the descriptors of character list ranges.
|
||||
Each descriptor has XCL_TYPE_BIT_LEN bits, and can be processed
|
||||
by XCL_BEGIN_WITH_RANGE and XCL_ITEM_COUNT_MASK macros. */
|
||||
#define XCL_TYPE_MASK 0xfff
|
||||
#define XCL_TYPE_BIT_LEN 3
|
||||
/* If this bit is set, the first item of the character list is the
|
||||
end of a range, which started before the starting character of the
|
||||
character list. */
|
||||
#define XCL_BEGIN_WITH_RANGE 0x4
|
||||
/* Number of items in the character list: 0, 1, or 2. The value 3
|
||||
represents that the item count is stored at the begining of the
|
||||
character list. The item count has the same width as the items
|
||||
in the character list (e.g. 16 bit for Low16 and High16 lists). */
|
||||
#define XCL_ITEM_COUNT_MASK 0x3
|
||||
/* Shift and flag for constructing character list items. The XCL_CHAR_END
|
||||
is set, when the item is not the beginning of a range. The XCL_CHAR_SHIFT
|
||||
can be used to encode / decode the character value stored in an item. */
|
||||
#define XCL_CHAR_END 0x1
|
||||
#define XCL_CHAR_SHIFT 1
|
||||
|
||||
/* Flag bits for an extended class (OP_ECLASS), which is used for complex
|
||||
character matches such as [\p{Greek} && \p{Ll}]. */
|
||||
|
||||
#define ECL_MAP 0x01 /* Flag: a 32-byte map is present */
|
||||
|
||||
/* Type tags for the items stored in an extended class (OP_ECLASS). These items
|
||||
follow the OP_ECLASS's flag char and bitmap, and represent a Reverse Polish
|
||||
Notation list of operands and operators manipulating a stack of bits. */
|
||||
|
||||
#define ECL_AND 1 /* Pop two from the stack, AND, and push result. */
|
||||
#define ECL_OR 2 /* Pop two from the stack, OR, and push result. */
|
||||
#define ECL_XOR 3 /* Pop two from the stack, XOR, and push result. */
|
||||
#define ECL_NOT 4 /* Pop one from the stack, NOT, and push result. */
|
||||
#define ECL_XCLASS 5 /* XCLASS nested within ECLASS; match and push result. */
|
||||
#define ECL_ANY 6 /* Temporary, only used during compilation. */
|
||||
#define ECL_NONE 7 /* Temporary, only used during compilation. */
|
||||
|
||||
/* These are escaped items that aren't just an encoding of a particular data
|
||||
value such as \n. They must have non-zero values, as check_escape() returns 0
|
||||
for a data character. In the escapes[] table in pcre2_compile.c their values
|
||||
are negated in order to distinguish them from data values.
|
||||
|
||||
They must appear here in the same order as in the opcode definitions below, up
|
||||
to ESC_z. There's a dummy for OP_ALLANY because it corresponds to "." in DOTALL
|
||||
mode rather than an escape sequence. It is also used for [^] in JavaScript
|
||||
compatibility mode, and for \C in non-utf mode. In non-DOTALL mode, "." behaves
|
||||
like \N.
|
||||
|
||||
ESC_ub is a special return from check_escape() when, in BSUX mode, \u{ is not
|
||||
followed by hex digits and }, in which case it should mean a literal "u"
|
||||
followed by a literal "{". This hack is necessary for cases like \u{ 12}
|
||||
because without it, this is interpreted as u{12} now that spaces are allowed in
|
||||
quantifiers.
|
||||
|
||||
Negative numbers are used to encode a backreference (\1, \2, \3, etc.) in
|
||||
check_escape(). There are tests in the code for an escape greater than ESC_b
|
||||
and less than ESC_Z to detect the types that may be repeated. These are the
|
||||
types that consume characters. If any new escapes are put in between that don't
|
||||
consume a character, that code will have to change. */
|
||||
|
||||
enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
|
||||
ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,
|
||||
ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z,
|
||||
ESC_E, ESC_Q, ESC_g, ESC_k, ESC_ub };
|
||||
|
||||
|
||||
/********************** Opcode definitions ******************/
|
||||
|
||||
/****** NOTE NOTE NOTE ******
|
||||
|
||||
Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in
|
||||
order to the list of escapes immediately above. Furthermore, values up to
|
||||
OP_DOLLM must not be changed without adjusting the table called autoposstab in
|
||||
pcre2_auto_possess.c.
|
||||
|
||||
Whenever this list is updated, the two macro definitions that follow must be
|
||||
updated to match. The possessification table called "opcode_possessify" in
|
||||
pcre2_compile.c must also be updated, and also the tables called "coptable"
|
||||
and "poptable" in pcre2_dfa_match.c.
|
||||
|
||||
****** NOTE NOTE NOTE ******/
|
||||
|
||||
|
||||
/* The values between FIRST_AUTOTAB_OP and LAST_AUTOTAB_RIGHT_OP, inclusive,
|
||||
are used in a table for deciding whether a repeated character type can be
|
||||
auto-possessified. */
|
||||
|
||||
#define FIRST_AUTOTAB_OP OP_NOT_DIGIT
|
||||
#define LAST_AUTOTAB_LEFT_OP OP_EXTUNI
|
||||
#define LAST_AUTOTAB_RIGHT_OP OP_DOLLM
|
||||
|
||||
enum {
|
||||
OP_END, /* 0 End of pattern */
|
||||
|
||||
/* Values corresponding to backslashed metacharacters */
|
||||
|
||||
OP_SOD, /* 1 Start of data: \A */
|
||||
OP_SOM, /* 2 Start of match (subject + offset): \G */
|
||||
OP_SET_SOM, /* 3 Set start of match (\K) */
|
||||
OP_NOT_WORD_BOUNDARY, /* 4 \B -- see also OP_NOT_UCP_WORD_BOUNDARY */
|
||||
OP_WORD_BOUNDARY, /* 5 \b -- see also OP_UCP_WORD_BOUNDARY */
|
||||
OP_NOT_DIGIT, /* 6 \D */
|
||||
OP_DIGIT, /* 7 \d */
|
||||
OP_NOT_WHITESPACE, /* 8 \S */
|
||||
OP_WHITESPACE, /* 9 \s */
|
||||
OP_NOT_WORDCHAR, /* 10 \W */
|
||||
OP_WORDCHAR, /* 11 \w */
|
||||
|
||||
OP_ANY, /* 12 Match any character except newline (\N) */
|
||||
OP_ALLANY, /* 13 Match any character */
|
||||
OP_ANYBYTE, /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */
|
||||
OP_NOTPROP, /* 15 \P (not Unicode property) */
|
||||
OP_PROP, /* 16 \p (Unicode property) */
|
||||
OP_ANYNL, /* 17 \R (any newline sequence) */
|
||||
OP_NOT_HSPACE, /* 18 \H (not horizontal whitespace) */
|
||||
OP_HSPACE, /* 19 \h (horizontal whitespace) */
|
||||
OP_NOT_VSPACE, /* 20 \V (not vertical whitespace) */
|
||||
OP_VSPACE, /* 21 \v (vertical whitespace) */
|
||||
OP_EXTUNI, /* 22 \X (extended Unicode sequence */
|
||||
OP_EODN, /* 23 End of data or \n at end of data (\Z) */
|
||||
OP_EOD, /* 24 End of data (\z) */
|
||||
|
||||
/* Line end assertions */
|
||||
|
||||
OP_DOLL, /* 25 End of line - not multiline */
|
||||
OP_DOLLM, /* 26 End of line - multiline */
|
||||
OP_CIRC, /* 27 Start of line - not multiline */
|
||||
OP_CIRCM, /* 28 Start of line - multiline */
|
||||
|
||||
/* Single characters; caseful must precede the caseless ones, and these
|
||||
must remain in this order, and adjacent. */
|
||||
|
||||
OP_CHAR, /* 29 Match one character, casefully */
|
||||
OP_CHARI, /* 30 Match one character, caselessly */
|
||||
OP_NOT, /* 31 Match one character, not the given one, casefully */
|
||||
OP_NOTI, /* 32 Match one character, not the given one, caselessly */
|
||||
|
||||
/* The following sets of 13 opcodes must always be kept in step because
|
||||
the offset from the first one is used to generate the others. */
|
||||
|
||||
/* Repeated characters; caseful must precede the caseless ones */
|
||||
|
||||
OP_STAR, /* 33 The maximizing and minimizing versions of */
|
||||
OP_MINSTAR, /* 34 these six opcodes must come in pairs, with */
|
||||
OP_PLUS, /* 35 the minimizing one second. */
|
||||
OP_MINPLUS, /* 36 */
|
||||
OP_QUERY, /* 37 */
|
||||
OP_MINQUERY, /* 38 */
|
||||
|
||||
OP_UPTO, /* 39 From 0 to n matches of one character, caseful*/
|
||||
OP_MINUPTO, /* 40 */
|
||||
OP_EXACT, /* 41 Exactly n matches */
|
||||
|
||||
OP_POSSTAR, /* 42 Possessified star, caseful */
|
||||
OP_POSPLUS, /* 43 Possessified plus, caseful */
|
||||
OP_POSQUERY, /* 44 Posesssified query, caseful */
|
||||
OP_POSUPTO, /* 45 Possessified upto, caseful */
|
||||
|
||||
/* Repeated characters; caseless must follow the caseful ones */
|
||||
|
||||
OP_STARI, /* 46 */
|
||||
OP_MINSTARI, /* 47 */
|
||||
OP_PLUSI, /* 48 */
|
||||
OP_MINPLUSI, /* 49 */
|
||||
OP_QUERYI, /* 50 */
|
||||
OP_MINQUERYI, /* 51 */
|
||||
|
||||
OP_UPTOI, /* 52 From 0 to n matches of one character, caseless */
|
||||
OP_MINUPTOI, /* 53 */
|
||||
OP_EXACTI, /* 54 */
|
||||
|
||||
OP_POSSTARI, /* 55 Possessified star, caseless */
|
||||
OP_POSPLUSI, /* 56 Possessified plus, caseless */
|
||||
OP_POSQUERYI, /* 57 Posesssified query, caseless */
|
||||
OP_POSUPTOI, /* 58 Possessified upto, caseless */
|
||||
|
||||
/* The negated ones must follow the non-negated ones, and match them */
|
||||
/* Negated repeated character, caseful; must precede the caseless ones */
|
||||
|
||||
OP_NOTSTAR, /* 59 The maximizing and minimizing versions of */
|
||||
OP_NOTMINSTAR, /* 60 these six opcodes must come in pairs, with */
|
||||
OP_NOTPLUS, /* 61 the minimizing one second. They must be in */
|
||||
OP_NOTMINPLUS, /* 62 exactly the same order as those above. */
|
||||
OP_NOTQUERY, /* 63 */
|
||||
OP_NOTMINQUERY, /* 64 */
|
||||
|
||||
OP_NOTUPTO, /* 65 From 0 to n matches, caseful */
|
||||
OP_NOTMINUPTO, /* 66 */
|
||||
OP_NOTEXACT, /* 67 Exactly n matches */
|
||||
|
||||
OP_NOTPOSSTAR, /* 68 Possessified versions, caseful */
|
||||
OP_NOTPOSPLUS, /* 69 */
|
||||
OP_NOTPOSQUERY, /* 70 */
|
||||
OP_NOTPOSUPTO, /* 71 */
|
||||
|
||||
/* Negated repeated character, caseless; must follow the caseful ones */
|
||||
|
||||
OP_NOTSTARI, /* 72 */
|
||||
OP_NOTMINSTARI, /* 73 */
|
||||
OP_NOTPLUSI, /* 74 */
|
||||
OP_NOTMINPLUSI, /* 75 */
|
||||
OP_NOTQUERYI, /* 76 */
|
||||
OP_NOTMINQUERYI, /* 77 */
|
||||
|
||||
OP_NOTUPTOI, /* 78 From 0 to n matches, caseless */
|
||||
OP_NOTMINUPTOI, /* 79 */
|
||||
OP_NOTEXACTI, /* 80 Exactly n matches */
|
||||
|
||||
OP_NOTPOSSTARI, /* 81 Possessified versions, caseless */
|
||||
OP_NOTPOSPLUSI, /* 82 */
|
||||
OP_NOTPOSQUERYI, /* 83 */
|
||||
OP_NOTPOSUPTOI, /* 84 */
|
||||
|
||||
/* Character types */
|
||||
|
||||
OP_TYPESTAR, /* 85 The maximizing and minimizing versions of */
|
||||
OP_TYPEMINSTAR, /* 86 these six opcodes must come in pairs, with */
|
||||
OP_TYPEPLUS, /* 87 the minimizing one second. These codes must */
|
||||
OP_TYPEMINPLUS, /* 88 be in exactly the same order as those above. */
|
||||
OP_TYPEQUERY, /* 89 */
|
||||
OP_TYPEMINQUERY, /* 90 */
|
||||
|
||||
OP_TYPEUPTO, /* 91 From 0 to n matches */
|
||||
OP_TYPEMINUPTO, /* 92 */
|
||||
OP_TYPEEXACT, /* 93 Exactly n matches */
|
||||
|
||||
OP_TYPEPOSSTAR, /* 94 Possessified versions */
|
||||
OP_TYPEPOSPLUS, /* 95 */
|
||||
OP_TYPEPOSQUERY, /* 96 */
|
||||
OP_TYPEPOSUPTO, /* 97 */
|
||||
|
||||
/* These are used for character classes and back references; only the
|
||||
first six are the same as the sets above. */
|
||||
|
||||
OP_CRSTAR, /* 98 The maximizing and minimizing versions of */
|
||||
OP_CRMINSTAR, /* 99 all these opcodes must come in pairs, with */
|
||||
OP_CRPLUS, /* 100 the minimizing one second. These codes must */
|
||||
OP_CRMINPLUS, /* 101 be in exactly the same order as those above. */
|
||||
OP_CRQUERY, /* 102 */
|
||||
OP_CRMINQUERY, /* 103 */
|
||||
|
||||
OP_CRRANGE, /* 104 These are different to the three sets above. */
|
||||
OP_CRMINRANGE, /* 105 */
|
||||
|
||||
OP_CRPOSSTAR, /* 106 Possessified versions */
|
||||
OP_CRPOSPLUS, /* 107 */
|
||||
OP_CRPOSQUERY, /* 108 */
|
||||
OP_CRPOSRANGE, /* 109 */
|
||||
|
||||
/* End of quantifier opcodes */
|
||||
|
||||
OP_CLASS, /* 110 Match a character class, chars < 256 only */
|
||||
OP_NCLASS, /* 111 Same, but the bitmap was created from a negative
|
||||
class - the difference is relevant only when a
|
||||
character > 255 is encountered. */
|
||||
OP_XCLASS, /* 112 Extended class for handling > 255 chars within the
|
||||
class. This does both positive and negative. */
|
||||
OP_ECLASS, /* 113 Really-extended class, for handling logical
|
||||
expressions computed over characters. */
|
||||
OP_REF, /* 114 Match a back reference, casefully */
|
||||
OP_REFI, /* 115 Match a back reference, caselessly */
|
||||
OP_DNREF, /* 116 Match a duplicate name backref, casefully */
|
||||
OP_DNREFI, /* 117 Match a duplicate name backref, caselessly */
|
||||
OP_RECURSE, /* 118 Match a numbered subpattern (possibly recursive) */
|
||||
OP_CALLOUT, /* 119 Call out to external function if provided */
|
||||
OP_CALLOUT_STR, /* 120 Call out with string argument */
|
||||
|
||||
OP_ALT, /* 121 Start of alternation */
|
||||
OP_KET, /* 122 End of group that doesn't have an unbounded repeat */
|
||||
OP_KETRMAX, /* 123 These two must remain together and in this */
|
||||
OP_KETRMIN, /* 124 order. They are for groups the repeat for ever. */
|
||||
OP_KETRPOS, /* 125 Possessive unlimited repeat. */
|
||||
|
||||
/* The assertions must come before BRA, CBRA, ONCE, and COND. */
|
||||
|
||||
OP_REVERSE, /* 126 Move pointer back - used in lookbehind assertions */
|
||||
OP_VREVERSE, /* 127 Move pointer back - variable */
|
||||
OP_ASSERT, /* 128 Positive lookahead */
|
||||
OP_ASSERT_NOT, /* 129 Negative lookahead */
|
||||
OP_ASSERTBACK, /* 130 Positive lookbehind */
|
||||
OP_ASSERTBACK_NOT, /* 131 Negative lookbehind */
|
||||
OP_ASSERT_NA, /* 132 Positive non-atomic lookahead */
|
||||
OP_ASSERTBACK_NA, /* 133 Positive non-atomic lookbehind */
|
||||
OP_ASSERT_SCS, /* 134 Scan substring */
|
||||
|
||||
/* ONCE, SCRIPT_RUN, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come
|
||||
immediately after the assertions, with ONCE first, as there's a test for >=
|
||||
ONCE for a subpattern that isn't an assertion. The POS versions must
|
||||
immediately follow the non-POS versions in each case. */
|
||||
|
||||
OP_ONCE, /* 135 Atomic group, contains captures */
|
||||
OP_SCRIPT_RUN, /* 136 Non-capture, but check characters' scripts */
|
||||
OP_BRA, /* 137 Start of non-capturing bracket */
|
||||
OP_BRAPOS, /* 138 Ditto, with unlimited, possessive repeat */
|
||||
OP_CBRA, /* 139 Start of capturing bracket */
|
||||
OP_CBRAPOS, /* 140 Ditto, with unlimited, possessive repeat */
|
||||
OP_COND, /* 141 Conditional group */
|
||||
|
||||
/* These five must follow the previous five, in the same order. There's a
|
||||
check for >= SBRA to distinguish the two sets. */
|
||||
|
||||
OP_SBRA, /* 142 Start of non-capturing bracket, check empty */
|
||||
OP_SBRAPOS, /* 143 Ditto, with unlimited, possessive repeat */
|
||||
OP_SCBRA, /* 144 Start of capturing bracket, check empty */
|
||||
OP_SCBRAPOS, /* 145 Ditto, with unlimited, possessive repeat */
|
||||
OP_SCOND, /* 146 Conditional group, check empty */
|
||||
|
||||
/* The next two pairs must (respectively) be kept together. */
|
||||
|
||||
OP_CREF, /* 147 Used to hold a capture number as condition */
|
||||
OP_DNCREF, /* 148 Used to point to duplicate names as a condition */
|
||||
OP_RREF, /* 149 Used to hold a recursion number as condition */
|
||||
OP_DNRREF, /* 150 Used to point to duplicate names as a condition */
|
||||
OP_FALSE, /* 151 Always false (used by DEFINE and VERSION) */
|
||||
OP_TRUE, /* 152 Always true (used by VERSION) */
|
||||
|
||||
OP_BRAZERO, /* 153 These two must remain together and in this */
|
||||
OP_BRAMINZERO, /* 154 order. */
|
||||
OP_BRAPOSZERO, /* 155 */
|
||||
|
||||
/* These are backtracking control verbs */
|
||||
|
||||
OP_MARK, /* 156 always has an argument */
|
||||
OP_PRUNE, /* 157 */
|
||||
OP_PRUNE_ARG, /* 158 same, but with argument */
|
||||
OP_SKIP, /* 159 */
|
||||
OP_SKIP_ARG, /* 160 same, but with argument */
|
||||
OP_THEN, /* 161 */
|
||||
OP_THEN_ARG, /* 162 same, but with argument */
|
||||
OP_COMMIT, /* 163 */
|
||||
OP_COMMIT_ARG, /* 164 same, but with argument */
|
||||
|
||||
/* These are forced failure and success verbs. FAIL and ACCEPT do accept an
|
||||
argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL)
|
||||
without the need for a special opcode. */
|
||||
|
||||
OP_FAIL, /* 165 */
|
||||
OP_ACCEPT, /* 166 */
|
||||
OP_ASSERT_ACCEPT, /* 167 Used inside assertions */
|
||||
OP_CLOSE, /* 168 Used before OP_ACCEPT to close open captures */
|
||||
|
||||
/* This is used to skip a subpattern with a {0} quantifier */
|
||||
|
||||
OP_SKIPZERO, /* 169 */
|
||||
|
||||
/* This is used to identify a DEFINE group during compilation so that it can
|
||||
be checked for having only one branch. It is changed to OP_FALSE before
|
||||
compilation finishes. */
|
||||
|
||||
OP_DEFINE, /* 170 */
|
||||
|
||||
/* These opcodes replace their normal counterparts in UCP mode when
|
||||
PCRE2_EXTRA_ASCII_BSW is not set. */
|
||||
|
||||
OP_NOT_UCP_WORD_BOUNDARY, /* 171 */
|
||||
OP_UCP_WORD_BOUNDARY, /* 172 */
|
||||
|
||||
/* This is not an opcode, but is used to check that tables indexed by opcode
|
||||
are the correct length, in order to catch updating errors - there have been
|
||||
some in the past. */
|
||||
|
||||
OP_TABLE_LENGTH
|
||||
|
||||
};
|
||||
|
||||
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
|
||||
definitions that follow must also be updated to match. There are also tables
|
||||
called "opcode_possessify" in pcre2_compile.c and "coptable" and "poptable" in
|
||||
pcre2_dfa_match.c that must be updated. */
|
||||
|
||||
|
||||
/* This macro defines textual names for all the opcodes. These are used only
|
||||
for debugging, and some of them are only partial names. The macro is referenced
|
||||
only in pcre2_printint.c, which fills out the full names in many cases (and in
|
||||
some cases doesn't actually use these names at all). */
|
||||
|
||||
#define OP_NAME_LIST \
|
||||
"End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d", \
|
||||
"\\S", "\\s", "\\W", "\\w", "Any", "AllAny", "Anybyte", \
|
||||
"notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \
|
||||
"extuni", "\\Z", "\\z", \
|
||||
"$", "$", "^", "^", "char", "chari", "not", "noti", \
|
||||
"*", "*?", "+", "+?", "?", "??", \
|
||||
"{", "{", "{", \
|
||||
"*+","++", "?+", "{", \
|
||||
"*", "*?", "+", "+?", "?", "??", \
|
||||
"{", "{", "{", \
|
||||
"*+","++", "?+", "{", \
|
||||
"*", "*?", "+", "+?", "?", "??", \
|
||||
"{", "{", "{", \
|
||||
"*+","++", "?+", "{", \
|
||||
"*", "*?", "+", "+?", "?", "??", \
|
||||
"{", "{", "{", \
|
||||
"*+","++", "?+", "{", \
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
||||
"*+","++", "?+", "{", \
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", \
|
||||
"*+","++", "?+", "{", \
|
||||
"class", "nclass", "xclass", "eclass", \
|
||||
"Ref", "Refi", "DnRef", "DnRefi", \
|
||||
"Recurse", "Callout", "CalloutStr", \
|
||||
"Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \
|
||||
"Reverse", "VReverse", "Assert", "Assert not", \
|
||||
"Assert back", "Assert back not", \
|
||||
"Non-atomic assert", "Non-atomic assert back", \
|
||||
"Scan substring", \
|
||||
"Once", \
|
||||
"Script run", \
|
||||
"Bra", "BraPos", "CBra", "CBraPos", \
|
||||
"Cond", \
|
||||
"SBra", "SBraPos", "SCBra", "SCBraPos", \
|
||||
"SCond", \
|
||||
"Capture ref", "Capture dnref", "Cond rec", "Cond dnrec", \
|
||||
"Cond false", "Cond true", \
|
||||
"Brazero", "Braminzero", "Braposzero", \
|
||||
"*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \
|
||||
"*THEN", "*THEN", "*COMMIT", "*COMMIT", "*FAIL", \
|
||||
"*ACCEPT", "*ASSERT_ACCEPT", \
|
||||
"Close", "Skip zero", "Define", "\\B (ucp)", "\\b (ucp)"
|
||||
|
||||
|
||||
/* This macro defines the length of fixed length operations in the compiled
|
||||
regex. The lengths are used when searching for specific things, and also in the
|
||||
debugging printing of a compiled regex. We use a macro so that it can be
|
||||
defined close to the definitions of the opcodes themselves.
|
||||
|
||||
As things have been extended, some of these are no longer fixed lenths, but are
|
||||
minima instead. For example, the length of a single-character repeat may vary
|
||||
in UTF-8 mode. The code that uses this table must know about such things. */
|
||||
|
||||
#define OP_LENGTHS \
|
||||
1, /* End */ \
|
||||
1, 1, 1, 1, 1, /* \A, \G, \K, \B, \b */ \
|
||||
1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ \
|
||||
1, 1, 1, /* Any, AllAny, Anybyte */ \
|
||||
3, 3, /* \P, \p */ \
|
||||
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \
|
||||
1, /* \X */ \
|
||||
1, 1, 1, 1, 1, 1, /* \Z, \z, $, $M ^, ^M */ \
|
||||
2, /* Char - the minimum length */ \
|
||||
2, /* Chari - the minimum length */ \
|
||||
2, /* not */ \
|
||||
2, /* noti */ \
|
||||
/* Positive single-char repeats ** These are */ \
|
||||
2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \
|
||||
2+IMM2_SIZE, 2+IMM2_SIZE, /* upto, minupto ** mode */ \
|
||||
2+IMM2_SIZE, /* exact */ \
|
||||
2, 2, 2, 2+IMM2_SIZE, /* *+, ++, ?+, upto+ */ \
|
||||
2, 2, 2, 2, 2, 2, /* *I, *?I, +I, +?I, ?I, ??I ** UTF-8 */ \
|
||||
2+IMM2_SIZE, 2+IMM2_SIZE, /* upto I, minupto I */ \
|
||||
2+IMM2_SIZE, /* exact I */ \
|
||||
2, 2, 2, 2+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */ \
|
||||
/* Negative single-char repeats - only for chars < 256 */ \
|
||||
2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \
|
||||
2+IMM2_SIZE, 2+IMM2_SIZE, /* NOT upto, minupto */ \
|
||||
2+IMM2_SIZE, /* NOT exact */ \
|
||||
2, 2, 2, 2+IMM2_SIZE, /* Possessive NOT *, +, ?, upto */ \
|
||||
2, 2, 2, 2, 2, 2, /* NOT *I, *?I, +I, +?I, ?I, ??I */ \
|
||||
2+IMM2_SIZE, 2+IMM2_SIZE, /* NOT upto I, minupto I */ \
|
||||
2+IMM2_SIZE, /* NOT exact I */ \
|
||||
2, 2, 2, 2+IMM2_SIZE, /* Possessive NOT *I, +I, ?I, upto I */ \
|
||||
/* Positive type repeats */ \
|
||||
2, 2, 2, 2, 2, 2, /* Type *, *?, +, +?, ?, ?? */ \
|
||||
2+IMM2_SIZE, 2+IMM2_SIZE, /* Type upto, minupto */ \
|
||||
2+IMM2_SIZE, /* Type exact */ \
|
||||
2, 2, 2, 2+IMM2_SIZE, /* Possessive *+, ++, ?+, upto+ */ \
|
||||
/* Character class & ref repeats */ \
|
||||
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \
|
||||
1+2*IMM2_SIZE, 1+2*IMM2_SIZE, /* CRRANGE, CRMINRANGE */ \
|
||||
1, 1, 1, 1+2*IMM2_SIZE, /* Possessive *+, ++, ?+, CRPOSRANGE */ \
|
||||
1+(32/sizeof(PCRE2_UCHAR)), /* CLASS */ \
|
||||
1+(32/sizeof(PCRE2_UCHAR)), /* NCLASS */ \
|
||||
0, /* XCLASS - variable length */ \
|
||||
0, /* ECLASS - variable length */ \
|
||||
1+IMM2_SIZE, /* REF */ \
|
||||
1+IMM2_SIZE+1, /* REFI */ \
|
||||
1+2*IMM2_SIZE, /* DNREF */ \
|
||||
1+2*IMM2_SIZE+1, /* DNREFI */ \
|
||||
1+LINK_SIZE, /* RECURSE */ \
|
||||
1+2*LINK_SIZE+1, /* CALLOUT */ \
|
||||
0, /* CALLOUT_STR - variable length */ \
|
||||
1+LINK_SIZE, /* Alt */ \
|
||||
1+LINK_SIZE, /* Ket */ \
|
||||
1+LINK_SIZE, /* KetRmax */ \
|
||||
1+LINK_SIZE, /* KetRmin */ \
|
||||
1+LINK_SIZE, /* KetRpos */ \
|
||||
1+IMM2_SIZE, /* Reverse */ \
|
||||
1+2*IMM2_SIZE, /* VReverse */ \
|
||||
1+LINK_SIZE, /* Assert */ \
|
||||
1+LINK_SIZE, /* Assert not */ \
|
||||
1+LINK_SIZE, /* Assert behind */ \
|
||||
1+LINK_SIZE, /* Assert behind not */ \
|
||||
1+LINK_SIZE, /* NA Assert */ \
|
||||
1+LINK_SIZE, /* NA Assert behind */ \
|
||||
1+LINK_SIZE, /* Scan substring */ \
|
||||
1+LINK_SIZE, /* ONCE */ \
|
||||
1+LINK_SIZE, /* SCRIPT_RUN */ \
|
||||
1+LINK_SIZE, /* BRA */ \
|
||||
1+LINK_SIZE, /* BRAPOS */ \
|
||||
1+LINK_SIZE+IMM2_SIZE, /* CBRA */ \
|
||||
1+LINK_SIZE+IMM2_SIZE, /* CBRAPOS */ \
|
||||
1+LINK_SIZE, /* COND */ \
|
||||
1+LINK_SIZE, /* SBRA */ \
|
||||
1+LINK_SIZE, /* SBRAPOS */ \
|
||||
1+LINK_SIZE+IMM2_SIZE, /* SCBRA */ \
|
||||
1+LINK_SIZE+IMM2_SIZE, /* SCBRAPOS */ \
|
||||
1+LINK_SIZE, /* SCOND */ \
|
||||
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* CREF, DNCREF */ \
|
||||
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* RREF, DNRREF */ \
|
||||
1, 1, /* FALSE, TRUE */ \
|
||||
1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \
|
||||
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \
|
||||
1, 3, /* SKIP, SKIP_ARG */ \
|
||||
1, 3, /* THEN, THEN_ARG */ \
|
||||
1, 3, /* COMMIT, COMMIT_ARG */ \
|
||||
1, 1, 1, /* FAIL, ACCEPT, ASSERT_ACCEPT */ \
|
||||
1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \
|
||||
1, /* DEFINE */ \
|
||||
1, 1 /* \B and \b in UCP mode */
|
||||
|
||||
/* A magic value for OP_RREF to indicate the "any recursion" condition. */
|
||||
|
||||
#define RREF_ANY 0xffff
|
||||
|
||||
/* Constants used by OP_REFI and OP_DNREFI to control matching behaviour. */
|
||||
|
||||
#define REFI_FLAG_CASELESS_RESTRICT 0x1
|
||||
#define REFI_FLAG_TURKISH_CASING 0x2
|
||||
|
||||
|
||||
/* ---------- Private structures that are mode-independent. ---------- */
|
||||
|
||||
/* Structure to hold data for custom memory management. */
|
||||
|
||||
typedef struct pcre2_memctl {
|
||||
void * (*malloc)(size_t, void *);
|
||||
void (*free)(void *, void *);
|
||||
void *memory_data;
|
||||
} pcre2_memctl;
|
||||
|
||||
/* Structure for building a chain of open capturing subpatterns during
|
||||
compiling, so that instructions to close them can be compiled when (*ACCEPT) is
|
||||
encountered. */
|
||||
|
||||
typedef struct open_capitem {
|
||||
struct open_capitem *next; /* Chain link */
|
||||
uint16_t number; /* Capture number */
|
||||
uint16_t assert_depth; /* Assertion depth when opened */
|
||||
} open_capitem;
|
||||
|
||||
/* Layout of the UCP type table that translates property names into types and
|
||||
codes. Each entry used to point directly to a name, but to reduce the number of
|
||||
relocations in shared libraries, it now has an offset into a single string
|
||||
instead. */
|
||||
|
||||
typedef struct {
|
||||
uint16_t name_offset;
|
||||
uint16_t type;
|
||||
uint16_t value;
|
||||
} ucp_type_table;
|
||||
|
||||
/* Unicode character database (UCD) record format */
|
||||
|
||||
typedef struct {
|
||||
uint8_t script; /* ucp_Arabic, etc. */
|
||||
uint8_t chartype; /* ucp_Cc, etc. (general categories) */
|
||||
uint8_t gbprop; /* ucp_gbControl, etc. (grapheme break property) */
|
||||
uint8_t caseset; /* offset to multichar other cases or zero */
|
||||
int32_t other_case; /* offset to other case, or zero if none */
|
||||
uint16_t scriptx_bidiclass; /* script extension (11 bit) and bidi class (5 bit) values */
|
||||
uint16_t bprops; /* binary properties offset */
|
||||
} ucd_record;
|
||||
|
||||
/* UCD access macros */
|
||||
|
||||
#define UCD_BLOCK_SIZE 128
|
||||
#define REAL_GET_UCD(ch) (PRIV(ucd_records) + \
|
||||
PRIV(ucd_stage2)[PRIV(ucd_stage1)[(int)(ch) / UCD_BLOCK_SIZE] * \
|
||||
UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE])
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#define GET_UCD(ch) ((ch > MAX_UTF_CODE_POINT)? \
|
||||
PRIV(dummy_ucd_record) : REAL_GET_UCD(ch))
|
||||
#else
|
||||
#define GET_UCD(ch) REAL_GET_UCD(ch)
|
||||
#endif
|
||||
|
||||
#define UCD_SCRIPTX_MASK 0x3ff
|
||||
#define UCD_BIDICLASS_SHIFT 11
|
||||
#define UCD_BPROPS_MASK 0xfff
|
||||
|
||||
#define UCD_SCRIPTX_PROP(prop) ((prop)->scriptx_bidiclass & UCD_SCRIPTX_MASK)
|
||||
#define UCD_BIDICLASS_PROP(prop) ((prop)->scriptx_bidiclass >> UCD_BIDICLASS_SHIFT)
|
||||
#define UCD_BPROPS_PROP(prop) ((prop)->bprops & UCD_BPROPS_MASK)
|
||||
|
||||
#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
|
||||
#define UCD_SCRIPT(ch) GET_UCD(ch)->script
|
||||
#define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
|
||||
#define UCD_GRAPHBREAK(ch) GET_UCD(ch)->gbprop
|
||||
#define UCD_CASESET(ch) GET_UCD(ch)->caseset
|
||||
#define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case)))
|
||||
#define UCD_SCRIPTX(ch) UCD_SCRIPTX_PROP(GET_UCD(ch))
|
||||
#define UCD_BPROPS(ch) UCD_BPROPS_PROP(GET_UCD(ch))
|
||||
#define UCD_BIDICLASS(ch) UCD_BIDICLASS_PROP(GET_UCD(ch))
|
||||
#define UCD_ANY_I(ch) \
|
||||
/* match any of the four characters 'i', 'I', U+0130, U+0131 */ \
|
||||
(((uint32_t)(ch) | 0x20u) == 0x69u || ((uint32_t)(ch) | 1u) == 0x0131u)
|
||||
#define UCD_DOTTED_I(ch) \
|
||||
((uint32_t)(ch) == 0x69u || (uint32_t)(ch) == 0x0130u)
|
||||
#define UCD_FOLD_I_TURKISH(ch) \
|
||||
((uint32_t)(ch) == 0x0130u ? 0x69u : \
|
||||
(uint32_t)(ch) == 0x49u ? 0x0131u : (uint32_t)(ch))
|
||||
|
||||
/* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words
|
||||
that form a bitmap representing a list of scripts or boolean properties. These
|
||||
macros test or set a bit in the map by number. */
|
||||
|
||||
#define MAPBIT(map,n) ((map)[(n)/32]&(1u<<((n)%32)))
|
||||
#define MAPSET(map,n) ((map)[(n)/32]|=(1u<<((n)%32)))
|
||||
|
||||
/* Header for serialized pcre2 codes. */
|
||||
|
||||
typedef struct pcre2_serialized_data {
|
||||
uint32_t magic;
|
||||
uint32_t version;
|
||||
uint32_t config;
|
||||
int32_t number_of_codes;
|
||||
} pcre2_serialized_data;
|
||||
|
||||
|
||||
|
||||
/* ----------------- Items that need PCRE2_CODE_UNIT_WIDTH ----------------- */
|
||||
|
||||
/* When this file is included by pcre2test, PCRE2_CODE_UNIT_WIDTH is defined as
|
||||
0, so the following items are omitted. */
|
||||
|
||||
#if defined PCRE2_CODE_UNIT_WIDTH && PCRE2_CODE_UNIT_WIDTH != 0
|
||||
|
||||
/* EBCDIC is supported only for the 8-bit library. */
|
||||
|
||||
#if defined EBCDIC && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
#error EBCDIC is not supported for the 16-bit or 32-bit libraries
|
||||
#endif
|
||||
|
||||
/* This is the largest non-UTF code point. */
|
||||
|
||||
#define MAX_NON_UTF_CHAR (0xffffffffU >> (32 - PCRE2_CODE_UNIT_WIDTH))
|
||||
|
||||
/* Internal shared data tables and variables. These are used by more than one
|
||||
of the exported public functions. They have to be "external" in the C sense,
|
||||
but are not part of the PCRE2 public API. Although the data for some of them is
|
||||
identical in all libraries, they must have different names so that multiple
|
||||
libraries can be simultaneously linked to a single application. However, UTF-8
|
||||
tables are needed only when compiling the 8-bit library. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
extern const int PRIV(utf8_table1)[];
|
||||
extern const int PRIV(utf8_table1_size);
|
||||
extern const int PRIV(utf8_table2)[];
|
||||
extern const int PRIV(utf8_table3)[];
|
||||
extern const uint8_t PRIV(utf8_table4)[];
|
||||
#endif
|
||||
|
||||
#define _pcre2_OP_lengths PCRE2_SUFFIX(_pcre2_OP_lengths_)
|
||||
#define _pcre2_callout_end_delims PCRE2_SUFFIX(_pcre2_callout_end_delims_)
|
||||
#define _pcre2_callout_start_delims PCRE2_SUFFIX(_pcre2_callout_start_delims_)
|
||||
#define _pcre2_default_compile_context PCRE2_SUFFIX(_pcre2_default_compile_context_)
|
||||
#define _pcre2_default_convert_context PCRE2_SUFFIX(_pcre2_default_convert_context_)
|
||||
#define _pcre2_default_match_context PCRE2_SUFFIX(_pcre2_default_match_context_)
|
||||
#define _pcre2_default_tables PCRE2_SUFFIX(_pcre2_default_tables_)
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#define _pcre2_dummy_ucd_record PCRE2_SUFFIX(_pcre2_dummy_ucd_record_)
|
||||
#endif
|
||||
#define _pcre2_hspace_list PCRE2_SUFFIX(_pcre2_hspace_list_)
|
||||
#define _pcre2_vspace_list PCRE2_SUFFIX(_pcre2_vspace_list_)
|
||||
#define _pcre2_ucd_boolprop_sets PCRE2_SUFFIX(_pcre2_ucd_boolprop_sets_)
|
||||
#define _pcre2_ucd_caseless_sets PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_)
|
||||
#define _pcre2_ucd_turkish_dotted_i_caseset PCRE2_SUFFIX(_pcre2_ucd_turkish_dotted_i_caseset_)
|
||||
#define _pcre2_ucd_nocase_ranges PCRE2_SUFFIX(_pcre2_ucd_nocase_ranges_)
|
||||
#define _pcre2_ucd_nocase_ranges_size PCRE2_SUFFIX(_pcre2_ucd_nocase_ranges_size_)
|
||||
#define _pcre2_ucd_digit_sets PCRE2_SUFFIX(_pcre2_ucd_digit_sets_)
|
||||
#define _pcre2_ucd_script_sets PCRE2_SUFFIX(_pcre2_ucd_script_sets_)
|
||||
#define _pcre2_ucd_records PCRE2_SUFFIX(_pcre2_ucd_records_)
|
||||
#define _pcre2_ucd_stage1 PCRE2_SUFFIX(_pcre2_ucd_stage1_)
|
||||
#define _pcre2_ucd_stage2 PCRE2_SUFFIX(_pcre2_ucd_stage2_)
|
||||
#define _pcre2_ucp_gbtable PCRE2_SUFFIX(_pcre2_ucp_gbtable_)
|
||||
#define _pcre2_ucp_gentype PCRE2_SUFFIX(_pcre2_ucp_gentype_)
|
||||
#define _pcre2_ucp_typerange PCRE2_SUFFIX(_pcre2_ucp_typerange_)
|
||||
#define _pcre2_unicode_version PCRE2_SUFFIX(_pcre2_unicode_version_)
|
||||
#define _pcre2_utt PCRE2_SUFFIX(_pcre2_utt_)
|
||||
#define _pcre2_utt_names PCRE2_SUFFIX(_pcre2_utt_names_)
|
||||
#define _pcre2_utt_size PCRE2_SUFFIX(_pcre2_utt_size_)
|
||||
|
||||
extern const uint8_t PRIV(OP_lengths)[];
|
||||
extern const uint32_t PRIV(callout_end_delims)[];
|
||||
extern const uint32_t PRIV(callout_start_delims)[];
|
||||
extern pcre2_compile_context PRIV(default_compile_context);
|
||||
extern pcre2_convert_context PRIV(default_convert_context);
|
||||
extern pcre2_match_context PRIV(default_match_context);
|
||||
extern const uint8_t PRIV(default_tables)[];
|
||||
extern const uint32_t PRIV(hspace_list)[];
|
||||
extern const uint32_t PRIV(vspace_list)[];
|
||||
extern const uint32_t PRIV(ucd_boolprop_sets)[];
|
||||
extern const uint32_t PRIV(ucd_caseless_sets)[];
|
||||
extern const uint32_t PRIV(ucd_turkish_dotted_i_caseset);
|
||||
extern const uint32_t PRIV(ucd_nocase_ranges)[];
|
||||
extern const uint32_t PRIV(ucd_nocase_ranges_size);
|
||||
extern const uint32_t PRIV(ucd_digit_sets)[];
|
||||
extern const uint32_t PRIV(ucd_script_sets)[];
|
||||
extern const ucd_record PRIV(ucd_records)[];
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
extern const ucd_record PRIV(dummy_ucd_record)[];
|
||||
#endif
|
||||
extern const uint16_t PRIV(ucd_stage1)[];
|
||||
extern const uint16_t PRIV(ucd_stage2)[];
|
||||
extern const uint32_t PRIV(ucp_gbtable)[];
|
||||
extern const uint32_t PRIV(ucp_gentype)[];
|
||||
#ifdef SUPPORT_JIT
|
||||
extern const int PRIV(ucp_typerange)[];
|
||||
#endif
|
||||
extern const char *PRIV(unicode_version);
|
||||
extern const ucp_type_table PRIV(utt)[];
|
||||
extern const char PRIV(utt_names)[];
|
||||
extern const size_t PRIV(utt_size);
|
||||
|
||||
/* Mode-dependent macros and hidden and private structures are defined in a
|
||||
separate file so that pcre2test can include them at all supported widths. When
|
||||
compiling the library, PCRE2_CODE_UNIT_WIDTH will be defined, and we can
|
||||
include them at the appropriate width, after setting up suffix macros for the
|
||||
private structures. */
|
||||
|
||||
#define branch_chain PCRE2_SUFFIX(branch_chain_)
|
||||
#define compile_block PCRE2_SUFFIX(compile_block_)
|
||||
#define dfa_match_block PCRE2_SUFFIX(dfa_match_block_)
|
||||
#define match_block PCRE2_SUFFIX(match_block_)
|
||||
#define named_group PCRE2_SUFFIX(named_group_)
|
||||
|
||||
#include "pcre2_intmodedep.h"
|
||||
|
||||
/* Private "external" functions. These are internal functions that are called
|
||||
from modules other than the one in which they are defined. They have to be
|
||||
"external" in the C sense, but are not part of the PCRE2 public API. They are
|
||||
not referenced from pcre2test, and must not be defined when no code unit width
|
||||
is available. */
|
||||
|
||||
#define _pcre2_auto_possessify PCRE2_SUFFIX(_pcre2_auto_possessify_)
|
||||
#define _pcre2_check_escape PCRE2_SUFFIX(_pcre2_check_escape_)
|
||||
#define _pcre2_extuni PCRE2_SUFFIX(_pcre2_extuni_)
|
||||
#define _pcre2_find_bracket PCRE2_SUFFIX(_pcre2_find_bracket_)
|
||||
#define _pcre2_is_newline PCRE2_SUFFIX(_pcre2_is_newline_)
|
||||
#define _pcre2_jit_free_rodata PCRE2_SUFFIX(_pcre2_jit_free_rodata_)
|
||||
#define _pcre2_jit_free PCRE2_SUFFIX(_pcre2_jit_free_)
|
||||
#define _pcre2_jit_get_size PCRE2_SUFFIX(_pcre2_jit_get_size_)
|
||||
#define _pcre2_jit_get_target PCRE2_SUFFIX(_pcre2_jit_get_target_)
|
||||
#define _pcre2_memctl_malloc PCRE2_SUFFIX(_pcre2_memctl_malloc_)
|
||||
#define _pcre2_ord2utf PCRE2_SUFFIX(_pcre2_ord2utf_)
|
||||
#define _pcre2_script_run PCRE2_SUFFIX(_pcre2_script_run_)
|
||||
#define _pcre2_strcmp PCRE2_SUFFIX(_pcre2_strcmp_)
|
||||
#define _pcre2_strcmp_c8 PCRE2_SUFFIX(_pcre2_strcmp_c8_)
|
||||
#define _pcre2_strcpy_c8 PCRE2_SUFFIX(_pcre2_strcpy_c8_)
|
||||
#define _pcre2_strlen PCRE2_SUFFIX(_pcre2_strlen_)
|
||||
#define _pcre2_strncmp PCRE2_SUFFIX(_pcre2_strncmp_)
|
||||
#define _pcre2_strncmp_c8 PCRE2_SUFFIX(_pcre2_strncmp_c8_)
|
||||
#define _pcre2_study PCRE2_SUFFIX(_pcre2_study_)
|
||||
#define _pcre2_valid_utf PCRE2_SUFFIX(_pcre2_valid_utf_)
|
||||
#define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_)
|
||||
#define _pcre2_xclass PCRE2_SUFFIX(_pcre2_xclass_)
|
||||
#define _pcre2_eclass PCRE2_SUFFIX(_pcre2_eclass_)
|
||||
|
||||
extern int _pcre2_auto_possessify(PCRE2_UCHAR *,
|
||||
const compile_block *);
|
||||
extern int _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *,
|
||||
int *, uint32_t, uint32_t, uint32_t, BOOL, compile_block *);
|
||||
extern PCRE2_SPTR _pcre2_extuni(uint32_t, PCRE2_SPTR, PCRE2_SPTR, PCRE2_SPTR,
|
||||
BOOL, int *);
|
||||
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
|
||||
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
|
||||
uint32_t *, BOOL);
|
||||
extern void _pcre2_jit_free_rodata(void *, void *);
|
||||
extern void _pcre2_jit_free(void *, pcre2_memctl *);
|
||||
extern size_t _pcre2_jit_get_size(void *);
|
||||
const char * _pcre2_jit_get_target(void);
|
||||
extern void * _pcre2_memctl_malloc(size_t, pcre2_memctl *);
|
||||
extern unsigned int _pcre2_ord2utf(uint32_t, PCRE2_UCHAR *);
|
||||
extern BOOL _pcre2_script_run(PCRE2_SPTR, PCRE2_SPTR, BOOL);
|
||||
extern int _pcre2_strcmp(PCRE2_SPTR, PCRE2_SPTR);
|
||||
extern int _pcre2_strcmp_c8(PCRE2_SPTR, const char *);
|
||||
extern PCRE2_SIZE _pcre2_strcpy_c8(PCRE2_UCHAR *, const char *);
|
||||
extern PCRE2_SIZE _pcre2_strlen(PCRE2_SPTR);
|
||||
extern int _pcre2_strncmp(PCRE2_SPTR, PCRE2_SPTR, size_t);
|
||||
extern int _pcre2_strncmp_c8(PCRE2_SPTR, const char *, size_t);
|
||||
extern int _pcre2_study(pcre2_real_code *);
|
||||
extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *);
|
||||
extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
|
||||
uint32_t *, BOOL);
|
||||
extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, const uint8_t *, BOOL);
|
||||
extern BOOL _pcre2_eclass(uint32_t, PCRE2_SPTR, PCRE2_SPTR,
|
||||
const uint8_t *, BOOL);
|
||||
|
||||
/* This function is needed only when memmove() is not available. */
|
||||
|
||||
#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
|
||||
#define _pcre2_memmove PCRE2_SUFFIX(_pcre2_memmove)
|
||||
extern void * _pcre2_memmove(void *, const void *, size_t);
|
||||
#endif
|
||||
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH */
|
||||
|
||||
extern BOOL PRIV(ckd_smul)(PCRE2_SIZE *, int, int);
|
||||
|
||||
#include "pcre2_util.h"
|
||||
|
||||
#endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */
|
||||
|
||||
/* End of pcre2_internal.h */
|
||||
973
3rd/pcre2/src/pcre2_intmodedep.h
Normal file
973
3rd/pcre2/src/pcre2_intmodedep.h
Normal file
@@ -0,0 +1,973 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains mode-dependent macro and structure definitions. The
|
||||
file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined.
|
||||
These mode-dependent items are kept in a separate file so that they can also be
|
||||
#included multiple times for different code unit widths by pcre2test in order
|
||||
to have access to the hidden structures at all supported widths.
|
||||
|
||||
Some of the mode-dependent macros are required at different widths for
|
||||
different parts of the pcre2test code (in particular, the included
|
||||
pcre2_printint.c file). We undefine them here so that they can be re-defined for
|
||||
multiple inclusions. Not all of these are used in pcre2test, but it's easier
|
||||
just to undefine them all. */
|
||||
|
||||
#undef ACROSSCHAR
|
||||
#undef BACKCHAR
|
||||
#undef BYTES2CU
|
||||
#undef CHMAX_255
|
||||
#undef CU2BYTES
|
||||
#undef FORWARDCHAR
|
||||
#undef FORWARDCHARTEST
|
||||
#undef GET
|
||||
#undef GET2
|
||||
#undef GETCHAR
|
||||
#undef GETCHARINC
|
||||
#undef GETCHARINCTEST
|
||||
#undef GETCHARLEN
|
||||
#undef GETCHARLENTEST
|
||||
#undef GETCHARTEST
|
||||
#undef GET_EXTRALEN
|
||||
#undef HAS_EXTRALEN
|
||||
#undef IMM2_SIZE
|
||||
#undef MAX_255
|
||||
#undef MAX_MARK
|
||||
#undef MAX_PATTERN_SIZE
|
||||
#undef MAX_UTF_SINGLE_CU
|
||||
#undef NOT_FIRSTCU
|
||||
#undef PUT
|
||||
#undef PUT2
|
||||
#undef PUT2INC
|
||||
#undef PUTCHAR
|
||||
#undef PUTINC
|
||||
#undef TABLE_GET
|
||||
|
||||
|
||||
|
||||
/* -------------------------- MACROS ----------------------------- */
|
||||
|
||||
/* PCRE keeps offsets in its compiled code as at least 16-bit quantities
|
||||
(always stored in big-endian order in 8-bit mode) by default. These are used,
|
||||
for example, to link from the start of a subpattern to its alternatives and its
|
||||
end. The use of 16 bits per offset limits the size of an 8-bit compiled regex
|
||||
to around 64K, which is big enough for almost everybody. However, I received a
|
||||
request for an even bigger limit. For this reason, and also to make the code
|
||||
easier to maintain, the storing and loading of offsets from the compiled code
|
||||
unit string is now handled by the macros that are defined here.
|
||||
|
||||
The macros are controlled by the value of LINK_SIZE. This defaults to 2, but
|
||||
values of 3 or 4 are also supported. */
|
||||
|
||||
/* ------------------- 8-bit support ------------------ */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
|
||||
#if LINK_SIZE == 2
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (PCRE2_UCHAR)((d) >> 8)), \
|
||||
(a[(n)+1] = (PCRE2_UCHAR)((d) & 255))
|
||||
#define GET(a,n) \
|
||||
(unsigned int)(((a)[n] << 8) | (a)[(n)+1])
|
||||
#define MAX_PATTERN_SIZE (1 << 16)
|
||||
|
||||
#elif LINK_SIZE == 3
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (PCRE2_UCHAR)((d) >> 16)), \
|
||||
(a[(n)+1] = (PCRE2_UCHAR)((d) >> 8)), \
|
||||
(a[(n)+2] = (PCRE2_UCHAR)((d) & 255))
|
||||
#define GET(a,n) \
|
||||
(unsigned int)(((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2])
|
||||
#define MAX_PATTERN_SIZE (1 << 24)
|
||||
|
||||
#elif LINK_SIZE == 4
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (PCRE2_UCHAR)((d) >> 24)), \
|
||||
(a[(n)+1] = (PCRE2_UCHAR)((d) >> 16)), \
|
||||
(a[(n)+2] = (PCRE2_UCHAR)((d) >> 8)), \
|
||||
(a[(n)+3] = (PCRE2_UCHAR)((d) & 255))
|
||||
#define GET(a,n) \
|
||||
(unsigned int)(((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3])
|
||||
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
|
||||
|
||||
#else
|
||||
#error LINK_SIZE must be 2, 3, or 4
|
||||
#endif
|
||||
|
||||
|
||||
/* ------------------- 16-bit support ------------------ */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
|
||||
#if LINK_SIZE == 2
|
||||
#undef LINK_SIZE
|
||||
#define LINK_SIZE 1
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (PCRE2_UCHAR)(d))
|
||||
#define GET(a,n) \
|
||||
(a[n])
|
||||
#define MAX_PATTERN_SIZE (1 << 16)
|
||||
|
||||
#elif LINK_SIZE == 3 || LINK_SIZE == 4
|
||||
#undef LINK_SIZE
|
||||
#define LINK_SIZE 2
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (PCRE2_UCHAR)((d) >> 16)), \
|
||||
(a[(n)+1] = (PCRE2_UCHAR)((d) & 65535))
|
||||
#define GET(a,n) \
|
||||
(unsigned int)(((a)[n] << 16) | (a)[(n)+1])
|
||||
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
|
||||
|
||||
#else
|
||||
#error LINK_SIZE must be 2, 3, or 4
|
||||
#endif
|
||||
|
||||
|
||||
/* ------------------- 32-bit support ------------------ */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#undef LINK_SIZE
|
||||
#define LINK_SIZE 1
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (d))
|
||||
#define GET(a,n) \
|
||||
(a[n])
|
||||
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
|
||||
|
||||
#else
|
||||
#error Unsupported compiling mode
|
||||
#endif
|
||||
|
||||
|
||||
/* --------------- Other mode-specific macros ----------------- */
|
||||
|
||||
/* PCRE uses some other (at least) 16-bit quantities that do not change when
|
||||
the size of offsets changes. There are used for repeat counts and for other
|
||||
things such as capturing parenthesis numbers in back references.
|
||||
|
||||
Define the number of code units required to hold a 16-bit count/offset, and
|
||||
macros to load and store such a value. For reasons that I do not understand,
|
||||
the expression in the 8-bit GET2 macro is treated by gcc as a signed
|
||||
expression, even when a is declared as unsigned. It seems that any kind of
|
||||
arithmetic results in a signed value. Hence the cast. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define IMM2_SIZE 2
|
||||
#define GET2(a,n) (unsigned int)(((a)[n] << 8) | (a)[(n)+1])
|
||||
#define PUT2(a,n,d) a[n] = (d) >> 8, a[(n)+1] = (d) & 255
|
||||
|
||||
#else /* Code units are 16 or 32 bits */
|
||||
#define IMM2_SIZE 1
|
||||
#define GET2(a,n) a[n]
|
||||
#define PUT2(a,n,d) a[n] = d
|
||||
#endif
|
||||
|
||||
/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
|
||||
whether its argument, which is assumed to be one code unit, is less than 256.
|
||||
The CHMAX_255 macro does not assume one code unit. The maximum length of a MARK
|
||||
name must fit in one code unit; currently it is set to 255 or 65535. The
|
||||
TABLE_GET macro is used to access elements of tables containing exactly 256
|
||||
items. Its argument is a code unit. When code points can be greater than 255, a
|
||||
check is needed before accessing these tables. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define MAX_255(c) TRUE
|
||||
#define MAX_MARK ((1u << 8) - 1)
|
||||
#define TABLE_GET(c, table, default) ((table)[c])
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#define SUPPORT_WIDE_CHARS
|
||||
#define CHMAX_255(c) ((c) <= 255u)
|
||||
#else
|
||||
#define CHMAX_255(c) TRUE
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
#else /* Code units are 16 or 32 bits */
|
||||
#define CHMAX_255(c) ((c) <= 255u)
|
||||
#define MAX_255(c) ((c) <= 255u)
|
||||
#define MAX_MARK ((1u << 16) - 1)
|
||||
#define SUPPORT_WIDE_CHARS
|
||||
#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
|
||||
#endif
|
||||
|
||||
|
||||
/* ----------------- Character-handling macros ----------------- */
|
||||
|
||||
/* There is a proposed future special "UTF-21" mode, in which only the lowest
|
||||
21 bits of a 32-bit character are interpreted as UTF, with the remaining 11
|
||||
high-order bits available to the application for other uses. In preparation for
|
||||
the future implementation of this mode, there are macros that load a data item
|
||||
and, if in this special mode, mask it to 21 bits. These macros all have names
|
||||
starting with UCHAR21. In all other modes, including the normal 32-bit
|
||||
library, the macros all have the same simple definitions. When the new mode is
|
||||
implemented, it is expected that these definitions will be varied appropriately
|
||||
using #ifdef when compiling the library that supports the special mode. */
|
||||
|
||||
#define UCHAR21(eptr) (*(eptr))
|
||||
#define UCHAR21TEST(eptr) (*(eptr))
|
||||
#define UCHAR21INC(eptr) (*(eptr)++)
|
||||
#define UCHAR21INCTEST(eptr) (*(eptr)++)
|
||||
|
||||
/* When UTF encoding is being used, a character is no longer just a single
|
||||
byte in 8-bit mode or a single short in 16-bit mode. The macros for character
|
||||
handling generate simple sequences when used in the basic mode, and more
|
||||
complicated ones for UTF characters. GETCHARLENTEST and other macros are not
|
||||
used when UTF is not supported. To make sure they can never even appear when
|
||||
UTF support is omitted, we don't even define them. */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
|
||||
/* #define MAX_UTF_SINGLE_CU */
|
||||
/* #define HAS_EXTRALEN(c) */
|
||||
/* #define GET_EXTRALEN(c) */
|
||||
/* #define NOT_FIRSTCU(c) */
|
||||
#define GETCHAR(c, eptr) c = *eptr;
|
||||
#define GETCHARTEST(c, eptr) c = *eptr;
|
||||
#define GETCHARINC(c, eptr) c = *eptr++;
|
||||
#define GETCHARINCTEST(c, eptr) c = *eptr++;
|
||||
#define GETCHARLEN(c, eptr, len) c = *eptr;
|
||||
#define PUTCHAR(c, p) (*p = c, 1)
|
||||
/* #define GETCHARLENTEST(c, eptr, len) */
|
||||
/* #define BACKCHAR(eptr) */
|
||||
/* #define FORWARDCHAR(eptr) */
|
||||
/* #define FORWARCCHARTEST(eptr,end) */
|
||||
/* #define ACROSSCHAR(condition, eptr, action) */
|
||||
|
||||
#else /* SUPPORT_UNICODE */
|
||||
|
||||
/* ------------------- 8-bit support ------------------ */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
|
||||
|
||||
/* The largest UTF code point that can be encoded as a single code unit. */
|
||||
|
||||
#define MAX_UTF_SINGLE_CU 127
|
||||
|
||||
/* Tests whether the code point needs extra characters to decode. */
|
||||
|
||||
#define HAS_EXTRALEN(c) HASUTF8EXTRALEN(c)
|
||||
|
||||
/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
|
||||
Otherwise it has an undefined behaviour. */
|
||||
|
||||
#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3fu])
|
||||
|
||||
/* Returns TRUE, if the given value is not the first code unit of a UTF
|
||||
sequence. */
|
||||
|
||||
#define NOT_FIRSTCU(c) (((c) & 0xc0u) == 0x80u)
|
||||
|
||||
/* Get the next UTF-8 character, not advancing the pointer. This is called when
|
||||
we know we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHAR(c, eptr) \
|
||||
c = *eptr; \
|
||||
if (c >= 0xc0u) GETUTF8(c, eptr);
|
||||
|
||||
/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
|
||||
pointer. */
|
||||
|
||||
#define GETCHARTEST(c, eptr) \
|
||||
c = *eptr; \
|
||||
if (utf && c >= 0xc0u) GETUTF8(c, eptr);
|
||||
|
||||
/* Get the next UTF-8 character, advancing the pointer. This is called when we
|
||||
know we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHARINC(c, eptr) \
|
||||
c = *eptr++; \
|
||||
if (c >= 0xc0u) GETUTF8INC(c, eptr);
|
||||
|
||||
/* Get the next character, testing for UTF-8 mode, and advancing the pointer.
|
||||
This is called when we don't know if we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHARINCTEST(c, eptr) \
|
||||
c = *eptr++; \
|
||||
if (utf && c >= 0xc0u) GETUTF8INC(c, eptr);
|
||||
|
||||
/* Get the next UTF-8 character, not advancing the pointer, incrementing length
|
||||
if there are extra bytes. This is called when we know we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHARLEN(c, eptr, len) \
|
||||
c = *eptr; \
|
||||
if (c >= 0xc0u) GETUTF8LEN(c, eptr, len);
|
||||
|
||||
/* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the
|
||||
pointer, incrementing length if there are extra bytes. This is called when we
|
||||
do not know if we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHARLENTEST(c, eptr, len) \
|
||||
c = *eptr; \
|
||||
if (utf && c >= 0xc0u) GETUTF8LEN(c, eptr, len);
|
||||
|
||||
/* If the pointer is not at the start of a character, move it back until
|
||||
it is. This is called only in UTF-8 mode - we don't put a test within the macro
|
||||
because almost all calls are already within a block of UTF-8 only code. */
|
||||
|
||||
#define BACKCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr--
|
||||
|
||||
/* Same as above, just in the other direction. */
|
||||
#define FORWARDCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr++
|
||||
#define FORWARDCHARTEST(eptr,end) while(eptr < end && (*eptr & 0xc0u) == 0x80u) eptr++
|
||||
|
||||
/* Same as above, but it allows a fully customizable form. */
|
||||
#define ACROSSCHAR(condition, eptr, action) \
|
||||
while((condition) && ((*eptr) & 0xc0u) == 0x80u) action
|
||||
|
||||
/* Deposit a character into memory, returning the number of code units. */
|
||||
|
||||
#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
|
||||
PRIV(ord2utf)(c,p) : (*p = c, 1))
|
||||
|
||||
|
||||
/* ------------------- 16-bit support ------------------ */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
|
||||
|
||||
/* The largest UTF code point that can be encoded as a single code unit. */
|
||||
|
||||
#define MAX_UTF_SINGLE_CU 65535
|
||||
|
||||
/* Tests whether the code point needs extra characters to decode. */
|
||||
|
||||
#define HAS_EXTRALEN(c) (((c) & 0xfc00u) == 0xd800u)
|
||||
|
||||
/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
|
||||
Otherwise it has an undefined behaviour. */
|
||||
|
||||
#define GET_EXTRALEN(c) 1
|
||||
|
||||
/* Returns TRUE, if the given value is not the first code unit of a UTF
|
||||
sequence. */
|
||||
|
||||
#define NOT_FIRSTCU(c) (((c) & 0xfc00u) == 0xdc00u)
|
||||
|
||||
/* Base macro to pick up the low surrogate of a UTF-16 character, not
|
||||
advancing the pointer. */
|
||||
|
||||
#define GETUTF16(c, eptr) \
|
||||
{ c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; }
|
||||
|
||||
/* Get the next UTF-16 character, not advancing the pointer. This is called when
|
||||
we know we are in UTF-16 mode. */
|
||||
|
||||
#define GETCHAR(c, eptr) \
|
||||
c = *eptr; \
|
||||
if ((c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr);
|
||||
|
||||
/* Get the next UTF-16 character, testing for UTF-16 mode, and not advancing the
|
||||
pointer. */
|
||||
|
||||
#define GETCHARTEST(c, eptr) \
|
||||
c = *eptr; \
|
||||
if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr);
|
||||
|
||||
/* Base macro to pick up the low surrogate of a UTF-16 character, advancing
|
||||
the pointer. */
|
||||
|
||||
#define GETUTF16INC(c, eptr) \
|
||||
{ c = (((c & 0x3ffu) << 10) | (*eptr++ & 0x3ffu)) + 0x10000u; }
|
||||
|
||||
/* Get the next UTF-16 character, advancing the pointer. This is called when we
|
||||
know we are in UTF-16 mode. */
|
||||
|
||||
#define GETCHARINC(c, eptr) \
|
||||
c = *eptr++; \
|
||||
if ((c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr);
|
||||
|
||||
/* Get the next character, testing for UTF-16 mode, and advancing the pointer.
|
||||
This is called when we don't know if we are in UTF-16 mode. */
|
||||
|
||||
#define GETCHARINCTEST(c, eptr) \
|
||||
c = *eptr++; \
|
||||
if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr);
|
||||
|
||||
/* Base macro to pick up the low surrogate of a UTF-16 character, not
|
||||
advancing the pointer, incrementing the length. */
|
||||
|
||||
#define GETUTF16LEN(c, eptr, len) \
|
||||
{ c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; len++; }
|
||||
|
||||
/* Get the next UTF-16 character, not advancing the pointer, incrementing
|
||||
length if there is a low surrogate. This is called when we know we are in
|
||||
UTF-16 mode. */
|
||||
|
||||
#define GETCHARLEN(c, eptr, len) \
|
||||
c = *eptr; \
|
||||
if ((c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len);
|
||||
|
||||
/* Get the next UTF-16 character, testing for UTF-16 mode, not advancing the
|
||||
pointer, incrementing length if there is a low surrogate. This is called when
|
||||
we do not know if we are in UTF-16 mode. */
|
||||
|
||||
#define GETCHARLENTEST(c, eptr, len) \
|
||||
c = *eptr; \
|
||||
if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len);
|
||||
|
||||
/* If the pointer is not at the start of a character, move it back until
|
||||
it is. This is called only in UTF-16 mode - we don't put a test within the
|
||||
macro because almost all calls are already within a block of UTF-16 only
|
||||
code. */
|
||||
|
||||
#define BACKCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr--
|
||||
|
||||
/* Same as above, just in the other direction. */
|
||||
#define FORWARDCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr++
|
||||
#define FORWARDCHARTEST(eptr,end) if (eptr < end && (*eptr & 0xfc00u) == 0xdc00u) eptr++
|
||||
|
||||
/* Same as above, but it allows a fully customizable form. */
|
||||
#define ACROSSCHAR(condition, eptr, action) \
|
||||
if ((condition) && ((*eptr) & 0xfc00u) == 0xdc00u) action
|
||||
|
||||
/* Deposit a character into memory, returning the number of code units. */
|
||||
|
||||
#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
|
||||
PRIV(ord2utf)(c,p) : (*p = c, 1))
|
||||
|
||||
|
||||
/* ------------------- 32-bit support ------------------ */
|
||||
|
||||
#else
|
||||
|
||||
/* These are trivial for the 32-bit library, since all UTF-32 characters fit
|
||||
into one PCRE2_UCHAR unit. */
|
||||
|
||||
#define MAX_UTF_SINGLE_CU (0x10ffffu)
|
||||
#define HAS_EXTRALEN(c) (0)
|
||||
#define GET_EXTRALEN(c) (0)
|
||||
#define NOT_FIRSTCU(c) (0)
|
||||
|
||||
/* Get the next UTF-32 character, not advancing the pointer. This is called when
|
||||
we know we are in UTF-32 mode. */
|
||||
|
||||
#define GETCHAR(c, eptr) \
|
||||
c = *(eptr);
|
||||
|
||||
/* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the
|
||||
pointer. */
|
||||
|
||||
#define GETCHARTEST(c, eptr) \
|
||||
c = *(eptr);
|
||||
|
||||
/* Get the next UTF-32 character, advancing the pointer. This is called when we
|
||||
know we are in UTF-32 mode. */
|
||||
|
||||
#define GETCHARINC(c, eptr) \
|
||||
c = *((eptr)++);
|
||||
|
||||
/* Get the next character, testing for UTF-32 mode, and advancing the pointer.
|
||||
This is called when we don't know if we are in UTF-32 mode. */
|
||||
|
||||
#define GETCHARINCTEST(c, eptr) \
|
||||
c = *((eptr)++);
|
||||
|
||||
/* Get the next UTF-32 character, not advancing the pointer, not incrementing
|
||||
length (since all UTF-32 is of length 1). This is called when we know we are in
|
||||
UTF-32 mode. */
|
||||
|
||||
#define GETCHARLEN(c, eptr, len) \
|
||||
GETCHAR(c, eptr)
|
||||
|
||||
/* Get the next UTF-32character, testing for UTF-32 mode, not advancing the
|
||||
pointer, not incrementing the length (since all UTF-32 is of length 1).
|
||||
This is called when we do not know if we are in UTF-32 mode. */
|
||||
|
||||
#define GETCHARLENTEST(c, eptr, len) \
|
||||
GETCHARTEST(c, eptr)
|
||||
|
||||
/* If the pointer is not at the start of a character, move it back until
|
||||
it is. This is called only in UTF-32 mode - we don't put a test within the
|
||||
macro because almost all calls are already within a block of UTF-32 only
|
||||
code.
|
||||
|
||||
These are all no-ops since all UTF-32 characters fit into one PCRE2_UCHAR. */
|
||||
|
||||
#define BACKCHAR(eptr) do { } while (0)
|
||||
|
||||
/* Same as above, just in the other direction. */
|
||||
|
||||
#define FORWARDCHAR(eptr) do { } while (0)
|
||||
#define FORWARDCHARTEST(eptr,end) do { } while (0)
|
||||
|
||||
/* Same as above, but it allows a fully customizable form. */
|
||||
|
||||
#define ACROSSCHAR(condition, eptr, action) do { } while (0)
|
||||
|
||||
/* Deposit a character into memory, returning the number of code units. */
|
||||
|
||||
#define PUTCHAR(c, p) (*p = c, 1)
|
||||
|
||||
#endif /* UTF-32 character handling */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
|
||||
/* Mode-dependent macros that have the same definition in all modes. */
|
||||
|
||||
#define CU2BYTES(x) ((x)*((PCRE2_CODE_UNIT_WIDTH/8)))
|
||||
#define BYTES2CU(x) ((x)/((PCRE2_CODE_UNIT_WIDTH/8)))
|
||||
#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE
|
||||
#define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE
|
||||
|
||||
|
||||
/* ----------------------- HIDDEN STRUCTURES ----------------------------- */
|
||||
|
||||
/* NOTE: All these structures *must* start with a pcre2_memctl structure. The
|
||||
code that uses them is simpler because it assumes this. */
|
||||
|
||||
/* The real general context structure. At present it holds only data for custom
|
||||
memory control. */
|
||||
|
||||
/* WARNING: if this is ever changed, code in pcre2_substitute.c will have to be
|
||||
changed because it builds a general context "by hand" in order to avoid the
|
||||
malloc() call in pcre2_general_context)_create(). There is also code in
|
||||
pcre2_match.c that makes the same assumption. */
|
||||
|
||||
typedef struct pcre2_real_general_context {
|
||||
pcre2_memctl memctl;
|
||||
} pcre2_real_general_context;
|
||||
|
||||
/* The real compile context structure */
|
||||
|
||||
typedef struct pcre2_real_compile_context {
|
||||
pcre2_memctl memctl;
|
||||
int (*stack_guard)(uint32_t, void *);
|
||||
void *stack_guard_data;
|
||||
const uint8_t *tables;
|
||||
PCRE2_SIZE max_pattern_length;
|
||||
PCRE2_SIZE max_pattern_compiled_length;
|
||||
uint16_t bsr_convention;
|
||||
uint16_t newline_convention;
|
||||
uint32_t parens_nest_limit;
|
||||
uint32_t extra_options;
|
||||
uint32_t max_varlookbehind;
|
||||
uint32_t optimization_flags;
|
||||
} pcre2_real_compile_context;
|
||||
|
||||
/* The real match context structure. */
|
||||
|
||||
typedef struct pcre2_real_match_context {
|
||||
pcre2_memctl memctl;
|
||||
#ifdef SUPPORT_JIT
|
||||
pcre2_jit_callback jit_callback;
|
||||
void *jit_callback_data;
|
||||
#endif
|
||||
int (*callout)(pcre2_callout_block *, void *);
|
||||
void *callout_data;
|
||||
int (*substitute_callout)(pcre2_substitute_callout_block *, void *);
|
||||
void *substitute_callout_data;
|
||||
PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *,
|
||||
PCRE2_SIZE, int, void *);
|
||||
void *substitute_case_callout_data;
|
||||
PCRE2_SIZE offset_limit;
|
||||
uint32_t heap_limit;
|
||||
uint32_t match_limit;
|
||||
uint32_t depth_limit;
|
||||
} pcre2_real_match_context;
|
||||
|
||||
/* The real convert context structure. */
|
||||
|
||||
typedef struct pcre2_real_convert_context {
|
||||
pcre2_memctl memctl;
|
||||
uint32_t glob_separator;
|
||||
uint32_t glob_escape;
|
||||
} pcre2_real_convert_context;
|
||||
|
||||
/* The real compiled code structure. The type for the blocksize field is
|
||||
defined specially because it is required in pcre2_serialize_decode() when
|
||||
copying the size from possibly unaligned memory into a variable of the same
|
||||
type. Use a macro rather than a typedef to avoid compiler warnings when this
|
||||
file is included multiple times by pcre2test. LOOKBEHIND_MAX specifies the
|
||||
largest lookbehind that is supported. (OP_REVERSE and OP_VREVERSE in a pattern
|
||||
have 16-bit arguments in 8-bit and 16-bit modes, so we need no more than a
|
||||
16-bit field here.) */
|
||||
|
||||
#undef CODE_BLOCKSIZE_TYPE
|
||||
#define CODE_BLOCKSIZE_TYPE PCRE2_SIZE
|
||||
|
||||
#undef LOOKBEHIND_MAX
|
||||
#define LOOKBEHIND_MAX UINT16_MAX
|
||||
|
||||
typedef struct pcre2_real_code {
|
||||
pcre2_memctl memctl; /* Memory control fields */
|
||||
const uint8_t *tables; /* The character tables */
|
||||
void *executable_jit; /* Pointer to JIT code */
|
||||
uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */
|
||||
CODE_BLOCKSIZE_TYPE blocksize; /* Total (bytes) that was malloc-ed */
|
||||
CODE_BLOCKSIZE_TYPE code_start; /* Byte code start offset */
|
||||
uint32_t magic_number; /* Paranoid and endianness check */
|
||||
uint32_t compile_options; /* Options passed to pcre2_compile() */
|
||||
uint32_t overall_options; /* Options after processing the pattern */
|
||||
uint32_t extra_options; /* Taken from compile_context */
|
||||
uint32_t flags; /* Various state flags */
|
||||
uint32_t limit_heap; /* Limit set in the pattern */
|
||||
uint32_t limit_match; /* Limit set in the pattern */
|
||||
uint32_t limit_depth; /* Limit set in the pattern */
|
||||
uint32_t first_codeunit; /* Starting code unit */
|
||||
uint32_t last_codeunit; /* This codeunit must be seen */
|
||||
uint16_t bsr_convention; /* What \R matches */
|
||||
uint16_t newline_convention; /* What is a newline? */
|
||||
uint16_t max_lookbehind; /* Longest lookbehind (characters) */
|
||||
uint16_t minlength; /* Minimum length of match */
|
||||
uint16_t top_bracket; /* Highest numbered group */
|
||||
uint16_t top_backref; /* Highest numbered back reference */
|
||||
uint16_t name_entry_size; /* Size (code units) of table entries */
|
||||
uint16_t name_count; /* Number of name entries in the table */
|
||||
uint32_t optimization_flags; /* Optimizations enabled at compile time */
|
||||
} pcre2_real_code;
|
||||
|
||||
/* The real match data structure. Define ovector as large as it can ever
|
||||
actually be so that array bound checkers don't grumble. Memory for this
|
||||
structure is obtained by calling pcre2_match_data_create(), which sets the size
|
||||
as the offset of ovector plus a pair of elements for each capturable string, so
|
||||
the size varies from call to call. As the maximum number of capturing
|
||||
subpatterns is 65535 we must allow for 65536 strings to include the overall
|
||||
match. (See also the heapframe structure below.) */
|
||||
|
||||
struct heapframe; /* Forward reference */
|
||||
|
||||
typedef struct pcre2_real_match_data {
|
||||
pcre2_memctl memctl; /* Memory control fields */
|
||||
const pcre2_real_code *code; /* The pattern used for the match */
|
||||
PCRE2_SPTR subject; /* The subject that was matched */
|
||||
PCRE2_SPTR mark; /* Pointer to last mark */
|
||||
struct heapframe *heapframes; /* Backtracking frames heap memory */
|
||||
PCRE2_SIZE heapframes_size; /* Malloc-ed size */
|
||||
PCRE2_SIZE subject_length; /* Subject length */
|
||||
PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
|
||||
PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
|
||||
PCRE2_SIZE startchar; /* Offset to starting code unit */
|
||||
uint8_t matchedby; /* Type of match (normal, JIT, DFA) */
|
||||
uint8_t flags; /* Various flags */
|
||||
uint16_t oveccount; /* Number of pairs */
|
||||
int rc; /* The return code from the match */
|
||||
PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
|
||||
} pcre2_real_match_data;
|
||||
|
||||
|
||||
/* ----------------------- PRIVATE STRUCTURES ----------------------------- */
|
||||
|
||||
/* These structures are not needed for pcre2test. */
|
||||
|
||||
#ifndef PCRE2_PCRE2TEST
|
||||
|
||||
/* Structures for checking for mutual function recursion when scanning compiled
|
||||
or parsed code. */
|
||||
|
||||
typedef struct recurse_check {
|
||||
struct recurse_check *prev;
|
||||
PCRE2_SPTR group;
|
||||
} recurse_check;
|
||||
|
||||
typedef struct parsed_recurse_check {
|
||||
struct parsed_recurse_check *prev;
|
||||
uint32_t *groupptr;
|
||||
} parsed_recurse_check;
|
||||
|
||||
/* Structure for building a cache when filling in pattern recursion offsets. */
|
||||
|
||||
typedef struct recurse_cache {
|
||||
PCRE2_SPTR group;
|
||||
int groupnumber;
|
||||
} recurse_cache;
|
||||
|
||||
/* Structure for maintaining a chain of pointers to the currently incomplete
|
||||
branches, for testing for left recursion while compiling. */
|
||||
|
||||
typedef struct branch_chain {
|
||||
struct branch_chain *outer;
|
||||
PCRE2_UCHAR *current_branch;
|
||||
} branch_chain;
|
||||
|
||||
/* Structure for building a list of named groups during the first pass of
|
||||
compiling. */
|
||||
|
||||
typedef struct named_group {
|
||||
PCRE2_SPTR name; /* Points to the name in the pattern */
|
||||
uint32_t number; /* Group number */
|
||||
uint16_t length; /* Length of the name */
|
||||
uint16_t isdup; /* TRUE if a duplicate */
|
||||
} named_group;
|
||||
|
||||
/* Structure for caching sorted ranges. This improves the performance
|
||||
of translating META code to byte code. */
|
||||
|
||||
typedef struct class_ranges {
|
||||
struct class_ranges *next; /* Next class ranges */
|
||||
size_t char_lists_size; /* Total size of encoded char lists */
|
||||
size_t char_lists_start; /* Start offset of encoded char lists */
|
||||
uint16_t range_list_size; /* Size of ranges array */
|
||||
uint16_t char_lists_types; /* The XCL_LIST header of char lists */
|
||||
/* Followed by the list of ranges (start/end pairs) */
|
||||
} class_ranges;
|
||||
|
||||
typedef union class_bits_storage {
|
||||
uint8_t classbits[32];
|
||||
uint32_t classwords[8];
|
||||
} class_bits_storage;
|
||||
|
||||
/* Structure for passing "static" information around between the functions
|
||||
doing the compiling, so that they are thread-safe. */
|
||||
|
||||
typedef struct compile_block {
|
||||
pcre2_real_compile_context *cx; /* Points to the compile context */
|
||||
const uint8_t *lcc; /* Points to lower casing table */
|
||||
const uint8_t *fcc; /* Points to case-flipping table */
|
||||
const uint8_t *cbits; /* Points to character type table */
|
||||
const uint8_t *ctypes; /* Points to table of type maps */
|
||||
PCRE2_UCHAR *start_workspace; /* The start of working space */
|
||||
PCRE2_UCHAR *start_code; /* The start of the compiled code */
|
||||
PCRE2_SPTR start_pattern; /* The start of the pattern */
|
||||
PCRE2_SPTR end_pattern; /* The end of the pattern */
|
||||
PCRE2_UCHAR *name_table; /* The name/number table */
|
||||
PCRE2_SIZE workspace_size; /* Size of workspace */
|
||||
PCRE2_SIZE small_ref_offset[10]; /* Offsets for \1 to \9 */
|
||||
PCRE2_SIZE erroroffset; /* Offset of error in pattern */
|
||||
class_bits_storage classbits; /* Temporary store for classbits */
|
||||
uint16_t names_found; /* Number of entries so far */
|
||||
uint16_t name_entry_size; /* Size of each entry */
|
||||
uint16_t parens_depth; /* Depth of nested parentheses */
|
||||
uint16_t assert_depth; /* Depth of nested assertions */
|
||||
named_group *named_groups; /* Points to vector in pre-compile */
|
||||
uint32_t named_group_list_size; /* Number of entries in the list */
|
||||
uint32_t external_options; /* External (initial) options */
|
||||
uint32_t external_flags; /* External flag bits to be set */
|
||||
uint32_t bracount; /* Count of capturing parentheses */
|
||||
uint32_t lastcapture; /* Last capture encountered */
|
||||
uint32_t *parsed_pattern; /* Parsed pattern buffer */
|
||||
uint32_t *parsed_pattern_end; /* Parsed pattern should not get here */
|
||||
uint32_t *groupinfo; /* Group info vector */
|
||||
uint32_t top_backref; /* Maximum back reference */
|
||||
uint32_t backref_map; /* Bitmap of low back refs */
|
||||
uint32_t nltype; /* Newline type */
|
||||
uint32_t nllen; /* Newline string length */
|
||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
|
||||
uint8_t class_op_used[ECLASS_NEST_LIMIT]; /* Operation used for
|
||||
extended classes */
|
||||
uint32_t req_varyopt; /* "After variable item" flag for reqbyte */
|
||||
uint32_t max_varlookbehind; /* Limit for variable lookbehinds */
|
||||
int max_lookbehind; /* Maximum lookbehind encountered (characters) */
|
||||
BOOL had_accept; /* (*ACCEPT) encountered */
|
||||
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
|
||||
BOOL had_recurse; /* Had a pattern recursion or subroutine call */
|
||||
BOOL dupnames; /* Duplicate names exist */
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
class_ranges *cranges; /* First class range. */
|
||||
class_ranges *next_cranges; /* Next class range. */
|
||||
size_t char_lists_size; /* Current size of character lists */
|
||||
#endif
|
||||
} compile_block;
|
||||
|
||||
/* Structure for keeping the properties of the in-memory stack used
|
||||
by the JIT matcher. */
|
||||
|
||||
typedef struct pcre2_real_jit_stack {
|
||||
pcre2_memctl memctl;
|
||||
void* stack;
|
||||
} pcre2_real_jit_stack;
|
||||
|
||||
/* Structure for items in a linked list that represents an explicit recursive
|
||||
call within the pattern when running pcre2_dfa_match(). */
|
||||
|
||||
typedef struct dfa_recursion_info {
|
||||
struct dfa_recursion_info *prevrec;
|
||||
PCRE2_SPTR subject_position;
|
||||
PCRE2_SPTR last_used_ptr;
|
||||
uint32_t group_num;
|
||||
} dfa_recursion_info;
|
||||
|
||||
/* Structure for "stack" frames that are used for remembering backtracking
|
||||
positions during matching. As these are used in a vector, with the ovector item
|
||||
being extended, the size of the structure must be a multiple of PCRE2_SIZE. The
|
||||
only way to check this at compile time is to force an error by generating an
|
||||
array with a negative size. By putting this in a typedef (which is never used),
|
||||
we don't generate any code when all is well. */
|
||||
|
||||
typedef struct heapframe {
|
||||
|
||||
/* The first set of fields are variables that have to be preserved over calls
|
||||
to RRMATCH(), but which do not need to be copied to new frames. */
|
||||
|
||||
PCRE2_SPTR ecode; /* The current position in the pattern */
|
||||
PCRE2_SPTR temp_sptr[2]; /* Used for short-term PCRE2_SPTR values */
|
||||
PCRE2_SIZE length; /* Used for character, string, or code lengths */
|
||||
PCRE2_SIZE back_frame; /* Amount to subtract on RRETURN */
|
||||
PCRE2_SIZE temp_size; /* Used for short-term PCRE2_SIZE values */
|
||||
uint32_t rdepth; /* Function "recursion" depth within pcre2_match() */
|
||||
uint32_t group_frame_type; /* Type information for group frames */
|
||||
uint32_t temp_32[4]; /* Used for short-term 32-bit or BOOL values */
|
||||
uint8_t return_id; /* Where to go on in internal "return" */
|
||||
uint8_t op; /* Processing opcode */
|
||||
|
||||
/* At this point, the structure is 16-bit aligned. On most architectures
|
||||
the alignment requirement for a pointer will ensure that the eptr field below
|
||||
is 32-bit or 64-bit aligned. However, on m68k it is fine to have a pointer
|
||||
that is 16-bit aligned. We must therefore ensure that what comes between here
|
||||
and eptr is an odd multiple of 16 bits so as to get back into 32-bit
|
||||
alignment. This happens naturally when PCRE2_UCHAR is 8 bits wide, but needs
|
||||
fudges in the other cases. In the 32-bit case the padding comes first so that
|
||||
the occu field itself is 32-bit aligned. Without the padding, this structure
|
||||
is no longer a multiple of PCRE2_SIZE on m68k, and the check below fails. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
PCRE2_UCHAR occu[6]; /* Used for other case code units */
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
PCRE2_UCHAR occu[2]; /* Used for other case code units */
|
||||
uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */
|
||||
#else
|
||||
uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */
|
||||
PCRE2_UCHAR occu[1]; /* Used for other case code units */
|
||||
#endif
|
||||
|
||||
/* The rest have to be copied from the previous frame whenever a new frame
|
||||
becomes current. The final field is specified as a large vector so that
|
||||
runtime array bound checks don't catch references to it. However, for any
|
||||
specific call to pcre2_match() the memory allocated for each frame structure
|
||||
allows for exactly the right size ovector for the number of capturing
|
||||
parentheses. (See also the comment for pcre2_real_match_data above.) */
|
||||
|
||||
PCRE2_SPTR eptr; /* MUST BE FIRST */
|
||||
PCRE2_SPTR start_match; /* Can be adjusted by \K */
|
||||
PCRE2_SPTR mark; /* Most recent mark on the success path */
|
||||
PCRE2_SPTR recurse_last_used; /* Last character used at time of pattern recursion */
|
||||
uint32_t current_recurse; /* Group number of current (deepest) pattern recursion */
|
||||
uint32_t capture_last; /* Most recent capture */
|
||||
PCRE2_SIZE last_group_offset; /* Saved offset to most recent group frame */
|
||||
PCRE2_SIZE offset_top; /* Offset after highest capture */
|
||||
PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
|
||||
} heapframe;
|
||||
|
||||
/* Assert that the size of the heapframe structure is a multiple of PCRE2_SIZE.
|
||||
See various comments above. */
|
||||
|
||||
STATIC_ASSERT((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0, heapframe_size);
|
||||
|
||||
/* Structure for computing the alignment of heapframe. */
|
||||
|
||||
typedef struct heapframe_align {
|
||||
char unalign; /* Completely unalign the current offset */
|
||||
heapframe frame; /* Offset is its alignment */
|
||||
} heapframe_align;
|
||||
|
||||
/* This define is the minimum alignment required for a heapframe, in bytes. */
|
||||
|
||||
#define HEAPFRAME_ALIGNMENT offsetof(heapframe_align, frame)
|
||||
|
||||
/* Structure for passing "static" information around between the functions
|
||||
doing traditional NFA matching (pcre2_match() and friends). */
|
||||
|
||||
typedef struct match_block {
|
||||
pcre2_memctl memctl; /* For general use */
|
||||
uint32_t heap_limit; /* As it says */
|
||||
uint32_t match_limit; /* As it says */
|
||||
uint32_t match_limit_depth; /* As it says */
|
||||
uint32_t match_call_count; /* Number of times a new frame is created */
|
||||
BOOL hitend; /* Hit the end of the subject at some point */
|
||||
BOOL hasthen; /* Pattern contains (*THEN) */
|
||||
BOOL allowemptypartial; /* Allow empty hard partial */
|
||||
const uint8_t *lcc; /* Points to lower casing table */
|
||||
const uint8_t *fcc; /* Points to case-flipping table */
|
||||
const uint8_t *ctypes; /* Points to table of type maps */
|
||||
PCRE2_SIZE start_offset; /* The start offset value */
|
||||
PCRE2_SIZE end_offset_top; /* Highwater mark at end of match */
|
||||
uint16_t partial; /* PARTIAL options */
|
||||
uint16_t bsr_convention; /* \R interpretation */
|
||||
uint16_t name_count; /* Number of names in name table */
|
||||
uint16_t name_entry_size; /* Size of entry in names table */
|
||||
PCRE2_SPTR name_table; /* Table of group names */
|
||||
PCRE2_SPTR start_code; /* For use in pattern recursion */
|
||||
PCRE2_SPTR start_subject; /* Start of the subject string */
|
||||
PCRE2_SPTR check_subject; /* Where UTF-checked from */
|
||||
PCRE2_SPTR end_subject; /* Usable end of the subject string */
|
||||
PCRE2_SPTR true_end_subject; /* Actual end of the subject string */
|
||||
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
|
||||
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
||||
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
|
||||
PCRE2_SPTR mark; /* Mark pointer to pass back on success */
|
||||
PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
|
||||
PCRE2_SPTR verb_ecode_ptr; /* For passing back info */
|
||||
PCRE2_SPTR verb_skip_ptr; /* For passing back a (*SKIP) name */
|
||||
uint32_t verb_current_recurse; /* Current recursion group when (*VERB) happens */
|
||||
uint32_t moptions; /* Match options */
|
||||
uint32_t poptions; /* Pattern options */
|
||||
uint32_t skip_arg_count; /* For counting SKIP_ARGs */
|
||||
uint32_t ignore_skip_arg; /* For re-run when SKIP arg name not found */
|
||||
uint32_t nltype; /* Newline type */
|
||||
uint32_t nllen; /* Newline string length */
|
||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
||||
pcre2_callout_block *cb; /* Points to a callout block */
|
||||
void *callout_data; /* To pass back to callouts */
|
||||
int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
|
||||
} match_block;
|
||||
|
||||
/* A similar structure is used for the same purpose by the DFA matching
|
||||
functions. */
|
||||
|
||||
typedef struct dfa_match_block {
|
||||
pcre2_memctl memctl; /* For general use */
|
||||
PCRE2_SPTR start_code; /* Start of the compiled pattern */
|
||||
PCRE2_SPTR start_subject ; /* Start of the subject string */
|
||||
PCRE2_SPTR end_subject; /* End of subject string */
|
||||
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
||||
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
|
||||
const uint8_t *tables; /* Character tables */
|
||||
PCRE2_SIZE start_offset; /* The start offset value */
|
||||
uint32_t heap_limit; /* As it says */
|
||||
PCRE2_SIZE heap_used; /* As it says */
|
||||
uint32_t match_limit; /* As it says */
|
||||
uint32_t match_limit_depth; /* As it says */
|
||||
uint32_t match_call_count; /* Number of calls of internal function */
|
||||
uint32_t moptions; /* Match options */
|
||||
uint32_t poptions; /* Pattern options */
|
||||
uint32_t nltype; /* Newline type */
|
||||
uint32_t nllen; /* Newline string length */
|
||||
BOOL allowemptypartial; /* Allow empty hard partial */
|
||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
||||
uint16_t bsr_convention; /* \R interpretation */
|
||||
pcre2_callout_block *cb; /* Points to a callout block */
|
||||
void *callout_data; /* To pass back to callouts */
|
||||
int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
|
||||
dfa_recursion_info *recursive; /* Linked list of pattern recursion data */
|
||||
} dfa_match_block;
|
||||
|
||||
#endif /* PCRE2_PCRE2TEST */
|
||||
|
||||
/* End of pcre2_intmodedep.h */
|
||||
2280
3rd/pcre2/src/pcre2_jit_char_inc.h
Normal file
2280
3rd/pcre2/src/pcre2_jit_char_inc.h
Normal file
@@ -0,0 +1,2280 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
This module by Zoltan Herczeg
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* XClass matching code. */
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
|
||||
#define ECLASS_CHAR_DATA STACK_TOP
|
||||
#define ECLASS_STACK_DATA STACK_LIMIT
|
||||
|
||||
#define SET_CHAR_OFFSET(value) \
|
||||
if ((value) != charoffset) \
|
||||
{ \
|
||||
if ((value) < charoffset) \
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
|
||||
else \
|
||||
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
|
||||
} \
|
||||
charoffset = (value);
|
||||
|
||||
#define READ_FROM_CHAR_LIST(destination) \
|
||||
if (list_ind <= 1) \
|
||||
{ \
|
||||
destination = *(const uint16_t*)next_char; \
|
||||
next_char += 2; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
destination = *(const uint32_t*)next_char; \
|
||||
next_char += 4; \
|
||||
}
|
||||
|
||||
#define XCLASS_LOCAL_RANGES_SIZE 32
|
||||
#define XCLASS_LOCAL_RANGES_LOG2_SIZE 5
|
||||
|
||||
typedef struct xclass_stack_item {
|
||||
sljit_u32 first_item;
|
||||
sljit_u32 last_item;
|
||||
struct sljit_jump *jump;
|
||||
} xclass_stack_item;
|
||||
|
||||
typedef struct xclass_ranges {
|
||||
size_t range_count;
|
||||
/* Pointer to ranges. A stack area is provided when a small buffer is enough. */
|
||||
uint32_t *ranges;
|
||||
uint32_t local_ranges[XCLASS_LOCAL_RANGES_SIZE * 2];
|
||||
/* Stack size must be log2(ranges / 2). */
|
||||
xclass_stack_item *stack;
|
||||
xclass_stack_item local_stack[XCLASS_LOCAL_RANGES_LOG2_SIZE];
|
||||
} xclass_ranges;
|
||||
|
||||
static void xclass_compute_ranges(compiler_common *common, PCRE2_SPTR cc, xclass_ranges *ranges)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
size_t range_count = 0, est_range_count;
|
||||
size_t est_stack_size, tmp;
|
||||
uint32_t type, list_ind;
|
||||
uint32_t est_type;
|
||||
uint32_t char_list_add, range_start, range_end;
|
||||
const uint8_t *next_char;
|
||||
const uint8_t *est_next_char;
|
||||
#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
|
||||
BOOL utf = common->utf;
|
||||
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
|
||||
|
||||
if (*cc == XCL_SINGLE || *cc == XCL_RANGE)
|
||||
{
|
||||
/* Only a few ranges are present. */
|
||||
do
|
||||
{
|
||||
type = *cc++;
|
||||
SLJIT_ASSERT(type == XCL_SINGLE || type == XCL_RANGE);
|
||||
GETCHARINCTEST(range_end, cc);
|
||||
ranges->ranges[range_count] = range_end;
|
||||
|
||||
if (type == XCL_RANGE)
|
||||
{
|
||||
GETCHARINCTEST(range_end, cc);
|
||||
}
|
||||
|
||||
ranges->ranges[range_count + 1] = range_end;
|
||||
range_count += 2;
|
||||
}
|
||||
while (*cc != XCL_END);
|
||||
|
||||
SLJIT_ASSERT(range_count <= XCLASS_LOCAL_RANGES_SIZE);
|
||||
ranges->range_count = range_count;
|
||||
return;
|
||||
}
|
||||
|
||||
SLJIT_ASSERT(cc[0] >= XCL_LIST);
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
type = (uint32_t)(cc[0] << 8) | cc[1];
|
||||
cc += 2;
|
||||
#else
|
||||
type = cc[0];
|
||||
cc++;
|
||||
#endif /* CODE_UNIT_WIDTH */
|
||||
|
||||
/* Align characters. */
|
||||
next_char = (const uint8_t*)common->start - (GET(cc, 0) << 1);
|
||||
type &= XCL_TYPE_MASK;
|
||||
|
||||
/* Estimate size. */
|
||||
est_next_char = next_char;
|
||||
est_type = type;
|
||||
est_range_count = 0;
|
||||
list_ind = 0;
|
||||
|
||||
while (est_type > 0)
|
||||
{
|
||||
uint32_t item_count = est_type & XCL_ITEM_COUNT_MASK;
|
||||
|
||||
if (item_count == XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
if (list_ind <= 1)
|
||||
{
|
||||
item_count = *(const uint16_t*)est_next_char;
|
||||
est_next_char += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
item_count = *(const uint32_t*)est_next_char;
|
||||
est_next_char += 4;
|
||||
}
|
||||
}
|
||||
|
||||
est_type >>= XCL_TYPE_BIT_LEN;
|
||||
est_next_char += (size_t)item_count << (list_ind <= 1 ? 1 : 2);
|
||||
list_ind++;
|
||||
est_range_count += item_count + 1;
|
||||
}
|
||||
|
||||
if (est_range_count > XCLASS_LOCAL_RANGES_SIZE)
|
||||
{
|
||||
est_stack_size = 0;
|
||||
tmp = est_range_count - 1;
|
||||
|
||||
/* Compute log2(est_range_count) */
|
||||
while (tmp > 0)
|
||||
{
|
||||
est_stack_size++;
|
||||
tmp >>= 1;
|
||||
}
|
||||
|
||||
ranges->stack = (xclass_stack_item*)SLJIT_MALLOC((sizeof(xclass_stack_item) * est_stack_size)
|
||||
+ ((sizeof(uint32_t) << 1) * (size_t)est_range_count), compiler->allocator_data);
|
||||
|
||||
if (ranges->stack == NULL)
|
||||
{
|
||||
sljit_set_compiler_memory_error(compiler);
|
||||
ranges->ranges = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
ranges->ranges = (uint32_t*)(ranges->stack + est_stack_size);
|
||||
}
|
||||
|
||||
char_list_add = XCL_CHAR_LIST_LOW_16_ADD;
|
||||
range_start = ~(uint32_t)0;
|
||||
list_ind = 0;
|
||||
|
||||
if ((type & XCL_BEGIN_WITH_RANGE) != 0)
|
||||
range_start = XCL_CHAR_LIST_LOW_16_START;
|
||||
|
||||
while (type > 0)
|
||||
{
|
||||
uint32_t item_count = type & XCL_ITEM_COUNT_MASK;
|
||||
|
||||
if (item_count == XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
READ_FROM_CHAR_LIST(item_count);
|
||||
SLJIT_ASSERT(item_count >= XCL_ITEM_COUNT_MASK);
|
||||
}
|
||||
|
||||
while (item_count > 0)
|
||||
{
|
||||
READ_FROM_CHAR_LIST(range_end);
|
||||
|
||||
if ((range_end & XCL_CHAR_END) != 0)
|
||||
{
|
||||
range_end = char_list_add + (range_end >> XCL_CHAR_SHIFT);
|
||||
|
||||
if (range_start == ~(uint32_t)0)
|
||||
range_start = range_end;
|
||||
|
||||
ranges->ranges[range_count] = range_start;
|
||||
ranges->ranges[range_count + 1] = range_end;
|
||||
range_count += 2;
|
||||
range_start = ~(uint32_t)0;
|
||||
}
|
||||
else
|
||||
range_start = char_list_add + (range_end >> XCL_CHAR_SHIFT);
|
||||
|
||||
item_count--;
|
||||
}
|
||||
|
||||
list_ind++;
|
||||
type >>= XCL_TYPE_BIT_LEN;
|
||||
|
||||
if (range_start == ~(uint32_t)0)
|
||||
{
|
||||
if ((type & XCL_BEGIN_WITH_RANGE) != 0)
|
||||
{
|
||||
if (list_ind == 1) range_start = XCL_CHAR_LIST_HIGH_16_START;
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
else if (list_ind == 2) range_start = XCL_CHAR_LIST_LOW_32_START;
|
||||
else range_start = XCL_CHAR_LIST_HIGH_32_START;
|
||||
#else
|
||||
else range_start = XCL_CHAR_LIST_LOW_32_START;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else if ((type & XCL_BEGIN_WITH_RANGE) == 0)
|
||||
{
|
||||
if (list_ind == 1) range_end = XCL_CHAR_LIST_LOW_16_END;
|
||||
else if (list_ind == 2) range_end = XCL_CHAR_LIST_HIGH_16_END;
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
else if (list_ind == 3) range_end = XCL_CHAR_LIST_LOW_32_END;
|
||||
else range_end = XCL_CHAR_LIST_HIGH_32_END;
|
||||
#else
|
||||
else range_end = XCL_CHAR_LIST_LOW_32_END;
|
||||
#endif
|
||||
|
||||
ranges->ranges[range_count] = range_start;
|
||||
ranges->ranges[range_count + 1] = range_end;
|
||||
range_count += 2;
|
||||
range_start = ~(uint32_t)0;
|
||||
}
|
||||
|
||||
if (list_ind == 1) char_list_add = XCL_CHAR_LIST_HIGH_16_ADD;
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
else if (list_ind == 2) char_list_add = XCL_CHAR_LIST_LOW_32_ADD;
|
||||
else char_list_add = XCL_CHAR_LIST_HIGH_32_ADD;
|
||||
#else
|
||||
else char_list_add = XCL_CHAR_LIST_LOW_32_ADD;
|
||||
#endif
|
||||
}
|
||||
|
||||
SLJIT_ASSERT(range_count > 0 && range_count <= (est_range_count << 1));
|
||||
SLJIT_ASSERT(next_char <= (const uint8_t*)common->start);
|
||||
ranges->range_count = range_count;
|
||||
}
|
||||
|
||||
static void xclass_check_bitset(compiler_common *common, const sljit_u8 *bitset, jump_list **found, jump_list **backtracks)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
struct sljit_jump *jump;
|
||||
|
||||
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
|
||||
if (!optimize_class(common, bitset, (bitset[31] & 0x80) != 0, TRUE, found))
|
||||
{
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
|
||||
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
|
||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)bitset);
|
||||
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
|
||||
add_jump(compiler, found, JUMP(SLJIT_NOT_ZERO));
|
||||
}
|
||||
|
||||
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
|
||||
JUMPHERE(jump);
|
||||
}
|
||||
|
||||
#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
|
||||
|
||||
static void xclass_update_min_max(compiler_common *common, PCRE2_SPTR cc, sljit_u32 *min_ptr, sljit_u32 *max_ptr)
|
||||
{
|
||||
uint32_t type, list_ind, c;
|
||||
sljit_u32 min = *min_ptr;
|
||||
sljit_u32 max = *max_ptr;
|
||||
uint32_t char_list_add;
|
||||
const uint8_t *next_char;
|
||||
BOOL utf = TRUE;
|
||||
|
||||
/* This function is pointless without utf 8/16. */
|
||||
SLJIT_ASSERT(common->utf);
|
||||
if (*cc == XCL_SINGLE || *cc == XCL_RANGE)
|
||||
{
|
||||
/* Only a few ranges are present. */
|
||||
do
|
||||
{
|
||||
type = *cc++;
|
||||
SLJIT_ASSERT(type == XCL_SINGLE || type == XCL_RANGE);
|
||||
GETCHARINCTEST(c, cc);
|
||||
|
||||
if (c < min)
|
||||
min = c;
|
||||
|
||||
if (type == XCL_RANGE)
|
||||
{
|
||||
GETCHARINCTEST(c, cc);
|
||||
}
|
||||
|
||||
if (c > max)
|
||||
max = c;
|
||||
}
|
||||
while (*cc != XCL_END);
|
||||
|
||||
SLJIT_ASSERT(min <= MAX_UTF_CODE_POINT && max <= MAX_UTF_CODE_POINT && min <= max);
|
||||
*min_ptr = min;
|
||||
*max_ptr = max;
|
||||
return;
|
||||
}
|
||||
|
||||
SLJIT_ASSERT(cc[0] >= XCL_LIST);
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
type = (uint32_t)(cc[0] << 8) | cc[1];
|
||||
cc += 2;
|
||||
#else
|
||||
type = cc[0];
|
||||
cc++;
|
||||
#endif /* CODE_UNIT_WIDTH */
|
||||
|
||||
/* Align characters. */
|
||||
next_char = (const uint8_t*)common->start - (GET(cc, 0) << 1);
|
||||
type &= XCL_TYPE_MASK;
|
||||
|
||||
SLJIT_ASSERT(type != 0);
|
||||
|
||||
/* Detect minimum. */
|
||||
|
||||
/* Skip unused ranges. */
|
||||
list_ind = 0;
|
||||
while ((type & (XCL_BEGIN_WITH_RANGE | XCL_ITEM_COUNT_MASK)) == 0)
|
||||
{
|
||||
type >>= XCL_TYPE_BIT_LEN;
|
||||
list_ind++;
|
||||
}
|
||||
|
||||
SLJIT_ASSERT(list_ind <= 2);
|
||||
switch (list_ind)
|
||||
{
|
||||
case 0:
|
||||
char_list_add = XCL_CHAR_LIST_LOW_16_ADD;
|
||||
c = XCL_CHAR_LIST_LOW_16_START;
|
||||
break;
|
||||
|
||||
case 1:
|
||||
char_list_add = XCL_CHAR_LIST_HIGH_16_ADD;
|
||||
c = XCL_CHAR_LIST_HIGH_16_START;
|
||||
break;
|
||||
|
||||
default:
|
||||
char_list_add = XCL_CHAR_LIST_LOW_32_ADD;
|
||||
c = XCL_CHAR_LIST_LOW_32_START;
|
||||
break;
|
||||
}
|
||||
|
||||
if ((type & XCL_BEGIN_WITH_RANGE) != 0)
|
||||
{
|
||||
if (c < min)
|
||||
min = c;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((type & XCL_ITEM_COUNT_MASK) == XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
if (list_ind <= 1)
|
||||
c = *(const uint16_t*)(next_char + 2);
|
||||
else
|
||||
c = *(const uint32_t*)(next_char + 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (list_ind <= 1)
|
||||
c = *(const uint16_t*)next_char;
|
||||
else
|
||||
c = *(const uint32_t*)next_char;
|
||||
}
|
||||
|
||||
c = char_list_add + (c >> XCL_CHAR_SHIFT);
|
||||
if (c < min)
|
||||
min = c;
|
||||
}
|
||||
|
||||
/* Detect maximum. */
|
||||
|
||||
/* Skip intermediate ranges. */
|
||||
while (TRUE)
|
||||
{
|
||||
if ((type & XCL_ITEM_COUNT_MASK) == XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
if (list_ind <= 1)
|
||||
{
|
||||
c = *(const uint16_t*)next_char;
|
||||
next_char += (c + 1) << 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
c = *(const uint32_t*)next_char;
|
||||
next_char += (c + 1) << 2;
|
||||
}
|
||||
}
|
||||
else
|
||||
next_char += (type & XCL_ITEM_COUNT_MASK) << (list_ind <= 1 ? 1 : 2);
|
||||
|
||||
if ((type >> XCL_TYPE_BIT_LEN) == 0)
|
||||
break;
|
||||
|
||||
list_ind++;
|
||||
type >>= XCL_TYPE_BIT_LEN;
|
||||
}
|
||||
|
||||
SLJIT_ASSERT(list_ind <= 2 && type != 0);
|
||||
switch (list_ind)
|
||||
{
|
||||
case 0:
|
||||
char_list_add = XCL_CHAR_LIST_LOW_16_ADD;
|
||||
c = XCL_CHAR_LIST_LOW_16_END;
|
||||
break;
|
||||
|
||||
case 1:
|
||||
char_list_add = XCL_CHAR_LIST_HIGH_16_ADD;
|
||||
c = XCL_CHAR_LIST_HIGH_16_END;
|
||||
break;
|
||||
|
||||
default:
|
||||
char_list_add = XCL_CHAR_LIST_LOW_32_ADD;
|
||||
c = XCL_CHAR_LIST_LOW_32_END;
|
||||
break;
|
||||
}
|
||||
|
||||
if ((type & XCL_ITEM_COUNT_MASK) != 0)
|
||||
{
|
||||
/* Type is reused as temporary. */
|
||||
if (list_ind <= 1)
|
||||
type = *(const uint16_t*)(next_char - 2);
|
||||
else
|
||||
type = *(const uint32_t*)(next_char - 4);
|
||||
|
||||
if (type & XCL_CHAR_END)
|
||||
c = char_list_add + (type >> XCL_CHAR_SHIFT);
|
||||
}
|
||||
|
||||
if (c > max)
|
||||
max = c;
|
||||
|
||||
SLJIT_ASSERT(min <= MAX_UTF_CODE_POINT && max <= MAX_UTF_CODE_POINT && min <= max);
|
||||
*min_ptr = min;
|
||||
*max_ptr = max;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
|
||||
|
||||
#define XCLASS_IS_ECLASS 0x001
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#define XCLASS_SAVE_CHAR 0x002
|
||||
#define XCLASS_HAS_TYPE 0x004
|
||||
#define XCLASS_HAS_SCRIPT 0x008
|
||||
#define XCLASS_HAS_SCRIPT_EXTENSION 0x010
|
||||
#define XCLASS_HAS_BOOL 0x020
|
||||
#define XCLASS_HAS_BIDICL 0x040
|
||||
#define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL)
|
||||
#define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080
|
||||
#define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100
|
||||
#define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCAL0 0x200
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
|
||||
|
||||
/* TMP3 must be preserved because it is used by compile_iterator_matchingpath. */
|
||||
static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, sljit_u32 status)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
jump_list *found = NULL;
|
||||
jump_list *check_result = NULL;
|
||||
jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
|
||||
sljit_uw c, charoffset;
|
||||
sljit_u32 max = READ_CHAR_MAX, min = 0;
|
||||
struct sljit_jump *jump = NULL;
|
||||
PCRE2_UCHAR flags;
|
||||
PCRE2_SPTR ccbegin;
|
||||
sljit_u32 compares, invertcmp, depth;
|
||||
sljit_u32 first_item, last_item, mid_item;
|
||||
sljit_u32 range_start, range_end;
|
||||
xclass_ranges ranges;
|
||||
BOOL has_cmov, last_range_set;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
sljit_u32 category_list = 0;
|
||||
sljit_u32 items;
|
||||
int typereg = TMP1;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
SLJIT_ASSERT(common->locals_size >= SSIZE_OF(sw));
|
||||
/* Scanning the necessary info. */
|
||||
flags = *cc++;
|
||||
ccbegin = cc;
|
||||
compares = 0;
|
||||
|
||||
if (flags & XCL_MAP)
|
||||
cc += 32 / sizeof(PCRE2_UCHAR);
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
|
||||
{
|
||||
compares++;
|
||||
cc++;
|
||||
|
||||
items = 0;
|
||||
|
||||
switch(*cc)
|
||||
{
|
||||
case PT_LAMP:
|
||||
items = UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt);
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
items = UCPCAT_RANGE(PRIV(ucp_typerange)[(int)cc[1] * 2], PRIV(ucp_typerange)[(int)cc[1] * 2 + 1]);
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
items = UCPCAT(cc[1]);
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
items = UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N;
|
||||
break;
|
||||
|
||||
case PT_ALNUM:
|
||||
items = UCPCAT_L | UCPCAT_N;
|
||||
break;
|
||||
|
||||
case PT_SCX:
|
||||
status |= XCLASS_HAS_SCRIPT_EXTENSION;
|
||||
if (cc[-1] == XCL_NOTPROP)
|
||||
{
|
||||
status |= XCLASS_SCRIPT_EXTENSION_NOTPROP;
|
||||
break;
|
||||
}
|
||||
compares++;
|
||||
/* Fall through */
|
||||
|
||||
case PT_SC:
|
||||
status |= XCLASS_HAS_SCRIPT;
|
||||
break;
|
||||
|
||||
case PT_SPACE:
|
||||
case PT_PXSPACE:
|
||||
case PT_PXGRAPH:
|
||||
case PT_PXPRINT:
|
||||
case PT_PXPUNCT:
|
||||
status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE;
|
||||
break;
|
||||
|
||||
case PT_UCNC:
|
||||
case PT_PXXDIGIT:
|
||||
status |= XCLASS_SAVE_CHAR;
|
||||
break;
|
||||
|
||||
case PT_BOOL:
|
||||
status |= XCLASS_HAS_BOOL;
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
status |= XCLASS_HAS_BIDICL;
|
||||
break;
|
||||
|
||||
default:
|
||||
SLJIT_UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
|
||||
if (items > 0)
|
||||
{
|
||||
if (cc[-1] == XCL_NOTPROP)
|
||||
items ^= UCPCAT_ALL;
|
||||
category_list |= items;
|
||||
status |= XCLASS_HAS_TYPE;
|
||||
compares--;
|
||||
}
|
||||
|
||||
cc += 2;
|
||||
}
|
||||
|
||||
if (category_list == UCPCAT_ALL)
|
||||
{
|
||||
/* All or no characters are accepted, same as dotall. */
|
||||
if (status & XCLASS_IS_ECLASS)
|
||||
{
|
||||
if (list != backtracks)
|
||||
OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
|
||||
if (list == backtracks)
|
||||
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
|
||||
return;
|
||||
}
|
||||
|
||||
if (category_list != 0)
|
||||
compares++;
|
||||
#endif
|
||||
|
||||
if (*cc != XCL_END)
|
||||
{
|
||||
#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
|
||||
if (common->utf && compares == 0 && !(status & XCLASS_IS_ECLASS))
|
||||
{
|
||||
SLJIT_ASSERT(category_list == 0);
|
||||
max = 0;
|
||||
min = (flags & XCL_MAP) != 0 ? 0 : READ_CHAR_MAX;
|
||||
xclass_update_min_max(common, cc, &min, &max);
|
||||
}
|
||||
#endif
|
||||
compares++;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
status |= XCLASS_SAVE_CHAR;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
SLJIT_ASSERT(compares > 0 || category_list != 0);
|
||||
#else /* !SUPPORT_UNICODE */
|
||||
SLJIT_ASSERT(compares > 0);
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* We are not necessary in utf mode even in 8 bit mode. */
|
||||
cc = ccbegin;
|
||||
if (!(status & XCLASS_IS_ECLASS))
|
||||
{
|
||||
if ((flags & XCL_NOT) != 0)
|
||||
read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||
else
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
read_char(common, min, max, (status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0);
|
||||
#else /* !SUPPORT_UNICODE */
|
||||
read_char(common, min, max, NULL, 0);
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
}
|
||||
|
||||
if ((flags & XCL_MAP) != 0)
|
||||
{
|
||||
SLJIT_ASSERT(!(status & XCLASS_IS_ECLASS));
|
||||
xclass_check_bitset(common, (const sljit_u8 *)cc, &found, backtracks);
|
||||
cc += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (status & XCLASS_NEEDS_UCD)
|
||||
{
|
||||
if ((status & (XCLASS_SAVE_CHAR | XCLASS_IS_ECLASS)) == XCLASS_SAVE_CHAR)
|
||||
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (!common->utf)
|
||||
{
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
|
||||
SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, UNASSIGNED_UTF_CHAR, TMP1);
|
||||
}
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||
|
||||
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
|
||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
|
||||
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
|
||||
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
|
||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
|
||||
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
|
||||
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
|
||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
|
||||
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
||||
|
||||
ccbegin = cc;
|
||||
|
||||
if (status & XCLASS_HAS_BIDICL)
|
||||
{
|
||||
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
|
||||
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT);
|
||||
|
||||
while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
|
||||
{
|
||||
cc++;
|
||||
|
||||
if (*cc == PT_BIDICL)
|
||||
{
|
||||
compares--;
|
||||
invertcmp = (compares == 0 && list != backtracks);
|
||||
if (cc[-1] == XCL_NOTPROP)
|
||||
invertcmp ^= 0x1;
|
||||
jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
|
||||
add_jump(compiler, compares > 0 ? list : backtracks, jump);
|
||||
}
|
||||
cc += 2;
|
||||
}
|
||||
|
||||
cc = ccbegin;
|
||||
}
|
||||
|
||||
if (status & XCLASS_HAS_BOOL)
|
||||
{
|
||||
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops));
|
||||
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK);
|
||||
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
|
||||
|
||||
while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
|
||||
{
|
||||
cc++;
|
||||
if (*cc == PT_BOOL)
|
||||
{
|
||||
compares--;
|
||||
invertcmp = (compares == 0 && list != backtracks);
|
||||
if (cc[-1] == XCL_NOTPROP)
|
||||
invertcmp ^= 0x1;
|
||||
|
||||
OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f)));
|
||||
add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
|
||||
}
|
||||
cc += 2;
|
||||
}
|
||||
|
||||
cc = ccbegin;
|
||||
}
|
||||
|
||||
if (status & XCLASS_HAS_SCRIPT)
|
||||
{
|
||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
|
||||
|
||||
while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
|
||||
{
|
||||
cc++;
|
||||
|
||||
switch (*cc)
|
||||
{
|
||||
case PT_SCX:
|
||||
if (cc[-1] == XCL_NOTPROP)
|
||||
break;
|
||||
/* Fall through */
|
||||
|
||||
case PT_SC:
|
||||
compares--;
|
||||
invertcmp = (compares == 0 && list != backtracks);
|
||||
if (cc[-1] == XCL_NOTPROP)
|
||||
invertcmp ^= 0x1;
|
||||
|
||||
add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]));
|
||||
}
|
||||
cc += 2;
|
||||
}
|
||||
|
||||
cc = ccbegin;
|
||||
}
|
||||
|
||||
if (status & XCLASS_HAS_SCRIPT_EXTENSION)
|
||||
{
|
||||
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
|
||||
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK);
|
||||
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
|
||||
|
||||
if (status & XCLASS_SCRIPT_EXTENSION_NOTPROP)
|
||||
{
|
||||
if (status & XCLASS_HAS_TYPE)
|
||||
{
|
||||
if ((status & (XCLASS_SAVE_CHAR | XCLASS_IS_ECLASS)) == XCLASS_SAVE_CHAR)
|
||||
{
|
||||
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, TMP2, 0);
|
||||
status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCAL0;
|
||||
}
|
||||
else
|
||||
{
|
||||
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
|
||||
status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR;
|
||||
}
|
||||
}
|
||||
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
|
||||
}
|
||||
|
||||
while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
|
||||
{
|
||||
cc++;
|
||||
|
||||
if (*cc == PT_SCX)
|
||||
{
|
||||
compares--;
|
||||
invertcmp = (compares == 0 && list != backtracks);
|
||||
|
||||
jump = NULL;
|
||||
if (cc[-1] == XCL_NOTPROP)
|
||||
{
|
||||
jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]);
|
||||
if (invertcmp)
|
||||
{
|
||||
add_jump(compiler, backtracks, jump);
|
||||
jump = NULL;
|
||||
}
|
||||
invertcmp ^= 0x1;
|
||||
}
|
||||
|
||||
OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f)));
|
||||
add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
|
||||
|
||||
if (jump != NULL)
|
||||
JUMPHERE(jump);
|
||||
}
|
||||
cc += 2;
|
||||
}
|
||||
|
||||
if (status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCAL0)
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
|
||||
else if (status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR)
|
||||
OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
|
||||
cc = ccbegin;
|
||||
}
|
||||
|
||||
if (status & XCLASS_SAVE_CHAR)
|
||||
OP1(SLJIT_MOV, TMP1, 0, (status & XCLASS_IS_ECLASS) ? ECLASS_CHAR_DATA : RETURN_ADDR, 0);
|
||||
|
||||
if (status & XCLASS_HAS_TYPE)
|
||||
{
|
||||
if (status & XCLASS_SAVE_CHAR)
|
||||
typereg = RETURN_ADDR;
|
||||
|
||||
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
|
||||
OP2(SLJIT_SHL, typereg, 0, SLJIT_IMM, 1, TMP2, 0);
|
||||
|
||||
if (category_list > 0)
|
||||
{
|
||||
compares--;
|
||||
invertcmp = (compares == 0 && list != backtracks);
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, category_list);
|
||||
add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Generating code. */
|
||||
charoffset = 0;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
|
||||
{
|
||||
compares--;
|
||||
invertcmp = (compares == 0 && list != backtracks);
|
||||
jump = NULL;
|
||||
|
||||
if (*cc == XCL_NOTPROP)
|
||||
invertcmp ^= 0x1;
|
||||
cc++;
|
||||
switch(*cc)
|
||||
{
|
||||
case PT_LAMP:
|
||||
case PT_GC:
|
||||
case PT_PC:
|
||||
case PT_SC:
|
||||
case PT_SCX:
|
||||
case PT_BOOL:
|
||||
case PT_BIDICL:
|
||||
case PT_WORD:
|
||||
case PT_ALNUM:
|
||||
compares++;
|
||||
/* Already handled. */
|
||||
break;
|
||||
|
||||
case PT_SPACE:
|
||||
case PT_PXSPACE:
|
||||
SET_CHAR_OFFSET(9);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
|
||||
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Zl, ucp_Zs));
|
||||
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
|
||||
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
|
||||
break;
|
||||
|
||||
case PT_UCNC:
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
|
||||
|
||||
SET_CHAR_OFFSET(0xa0);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
SET_CHAR_OFFSET(0);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
|
||||
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
|
||||
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
|
||||
break;
|
||||
|
||||
case PT_PXGRAPH:
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT_RANGE(ucp_Zl, ucp_Zs));
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
|
||||
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
|
||||
jump = JUMP(SLJIT_ZERO);
|
||||
|
||||
c = charoffset;
|
||||
/* In case of ucp_Cf, we overwrite the result. */
|
||||
SET_CHAR_OFFSET(0x2066);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
|
||||
|
||||
/* Restore charoffset. */
|
||||
SET_CHAR_OFFSET(c);
|
||||
|
||||
JUMPHERE(jump);
|
||||
jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
|
||||
break;
|
||||
|
||||
case PT_PXPRINT:
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT2(ucp_Zl, ucp_Zp));
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
|
||||
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
|
||||
jump = JUMP(SLJIT_ZERO);
|
||||
|
||||
c = charoffset;
|
||||
/* In case of ucp_Cf, we overwrite the result. */
|
||||
SET_CHAR_OFFSET(0x2066);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
|
||||
|
||||
/* Restore charoffset. */
|
||||
SET_CHAR_OFFSET(c);
|
||||
|
||||
JUMPHERE(jump);
|
||||
jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
|
||||
break;
|
||||
|
||||
case PT_PXPUNCT:
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Sc, ucp_So));
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
|
||||
|
||||
SET_CHAR_OFFSET(0);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f);
|
||||
OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Pc, ucp_Ps));
|
||||
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
|
||||
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
|
||||
break;
|
||||
|
||||
case PT_PXXDIGIT:
|
||||
SET_CHAR_OFFSET(CHAR_A);
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, ~0x20);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP2, 0, SLJIT_IMM, CHAR_F - CHAR_A);
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
|
||||
SET_CHAR_OFFSET(CHAR_0);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_9 - CHAR_0);
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
|
||||
SET_CHAR_OFFSET(0xff10);
|
||||
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff10);
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff19 - 0xff10);
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
|
||||
SET_CHAR_OFFSET(0xff21);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff26 - 0xff21);
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
|
||||
SET_CHAR_OFFSET(0xff41);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff41);
|
||||
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
|
||||
SET_CHAR_OFFSET(0xff10);
|
||||
|
||||
JUMPHERE(jump);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
|
||||
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
|
||||
break;
|
||||
|
||||
default:
|
||||
SLJIT_UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
|
||||
cc += 2;
|
||||
|
||||
if (jump != NULL)
|
||||
add_jump(compiler, compares > 0 ? list : backtracks, jump);
|
||||
}
|
||||
|
||||
if (compares == 0)
|
||||
{
|
||||
if (found != NULL)
|
||||
set_jumps(found, LABEL());
|
||||
|
||||
if (status & XCLASS_IS_ECLASS)
|
||||
OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||
return;
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
SLJIT_ASSERT(compares == 1);
|
||||
ranges.range_count = 0;
|
||||
ranges.ranges = ranges.local_ranges;
|
||||
ranges.stack = ranges.local_stack;
|
||||
|
||||
xclass_compute_ranges(common, cc, &ranges);
|
||||
|
||||
/* Memory error is set for the compiler. */
|
||||
if (ranges.stack == NULL)
|
||||
return;
|
||||
|
||||
#if (defined SLJIT_DEBUG && SLJIT_DEBUG) && \
|
||||
defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
|
||||
if (common->utf)
|
||||
{
|
||||
min = READ_CHAR_MAX;
|
||||
max = 0;
|
||||
xclass_update_min_max(common, cc, &min, &max);
|
||||
SLJIT_ASSERT(ranges.ranges[0] == min && ranges.ranges[ranges.range_count - 1] == max);
|
||||
}
|
||||
#endif /* SLJIT_DEBUG && SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
|
||||
|
||||
invertcmp = (list != backtracks);
|
||||
|
||||
if (ranges.range_count == 2)
|
||||
{
|
||||
range_start = ranges.ranges[0];
|
||||
range_end = ranges.ranges[1];
|
||||
|
||||
if (range_start < range_end)
|
||||
{
|
||||
SET_CHAR_OFFSET(range_start);
|
||||
jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start));
|
||||
}
|
||||
else
|
||||
jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_start - charoffset));
|
||||
|
||||
add_jump(compiler, backtracks, jump);
|
||||
|
||||
SLJIT_ASSERT(ranges.stack == ranges.local_stack);
|
||||
if (found != NULL)
|
||||
set_jumps(found, LABEL());
|
||||
|
||||
if (status & XCLASS_IS_ECLASS)
|
||||
OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
range_start = ranges.ranges[0];
|
||||
SET_CHAR_OFFSET(range_start);
|
||||
if (ranges.range_count >= 6)
|
||||
{
|
||||
/* Early fail. */
|
||||
range_end = ranges.ranges[ranges.range_count - 1];
|
||||
add_jump(compiler, (flags & XCL_NOT) == 0 ? backtracks : &found,
|
||||
CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start)));
|
||||
}
|
||||
|
||||
depth = 0;
|
||||
first_item = 0;
|
||||
last_item = ranges.range_count - 2;
|
||||
has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV) != 0;
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
/* At least two items are present. */
|
||||
SLJIT_ASSERT(first_item < last_item && charoffset == ranges.ranges[0]);
|
||||
last_range_set = FALSE;
|
||||
|
||||
if (first_item + 6 <= last_item)
|
||||
{
|
||||
mid_item = ((first_item + last_item) >> 1) & ~(sljit_u32)1;
|
||||
SLJIT_ASSERT(last_item >= mid_item + 4);
|
||||
|
||||
range_end = ranges.ranges[mid_item + 1];
|
||||
if (first_item + 6 > mid_item && ranges.ranges[mid_item] == range_end)
|
||||
{
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_GREATER | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - charoffset));
|
||||
ranges.stack[depth].jump = JUMP(SLJIT_GREATER);
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
|
||||
last_range_set = TRUE;
|
||||
}
|
||||
else
|
||||
ranges.stack[depth].jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - charoffset));
|
||||
|
||||
ranges.stack[depth].first_item = (sljit_u32)(mid_item + 2);
|
||||
ranges.stack[depth].last_item = (sljit_u32)last_item;
|
||||
|
||||
depth++;
|
||||
SLJIT_ASSERT(ranges.stack == ranges.local_stack ?
|
||||
depth <= XCLASS_LOCAL_RANGES_LOG2_SIZE : (ranges.stack + depth) <= (xclass_stack_item*)ranges.ranges);
|
||||
|
||||
last_item = mid_item;
|
||||
if (!last_range_set)
|
||||
continue;
|
||||
|
||||
last_item -= 2;
|
||||
}
|
||||
|
||||
if (!last_range_set)
|
||||
{
|
||||
range_start = ranges.ranges[first_item];
|
||||
range_end = ranges.ranges[first_item + 1];
|
||||
|
||||
if (range_start < range_end)
|
||||
{
|
||||
SET_CHAR_OFFSET(range_start);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start));
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
}
|
||||
else
|
||||
{
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_start - charoffset));
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
|
||||
}
|
||||
first_item += 2;
|
||||
}
|
||||
|
||||
SLJIT_ASSERT(first_item <= last_item);
|
||||
|
||||
do
|
||||
{
|
||||
range_start = ranges.ranges[first_item];
|
||||
range_end = ranges.ranges[first_item + 1];
|
||||
|
||||
if (range_start < range_end)
|
||||
{
|
||||
SET_CHAR_OFFSET(range_start);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start));
|
||||
|
||||
if (has_cmov)
|
||||
SELECT(SLJIT_LESS_EQUAL, TMP2, STR_END, 0, TMP2);
|
||||
else
|
||||
OP_FLAGS(SLJIT_OR | ((first_item == last_item) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_LESS_EQUAL);
|
||||
}
|
||||
else
|
||||
{
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_start - charoffset));
|
||||
|
||||
if (has_cmov)
|
||||
SELECT(SLJIT_EQUAL, TMP2, STR_END, 0, TMP2);
|
||||
else
|
||||
OP_FLAGS(SLJIT_OR | ((first_item == last_item) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
|
||||
}
|
||||
|
||||
first_item += 2;
|
||||
}
|
||||
while (first_item <= last_item);
|
||||
|
||||
if (depth == 0) break;
|
||||
|
||||
add_jump(compiler, &check_result, JUMP(SLJIT_JUMP));
|
||||
|
||||
/* The charoffset resets after the end of a branch is reached. */
|
||||
charoffset = ranges.ranges[0];
|
||||
depth--;
|
||||
first_item = ranges.stack[depth].first_item;
|
||||
last_item = ranges.stack[depth].last_item;
|
||||
JUMPHERE(ranges.stack[depth].jump);
|
||||
}
|
||||
|
||||
if (check_result != NULL)
|
||||
set_jumps(check_result, LABEL());
|
||||
|
||||
if (has_cmov)
|
||||
jump = CMP(SLJIT_NOT_EQUAL ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
|
||||
else
|
||||
{
|
||||
sljit_set_current_flags(compiler, SLJIT_SET_Z);
|
||||
jump = JUMP(SLJIT_NOT_EQUAL ^ invertcmp);
|
||||
}
|
||||
|
||||
add_jump(compiler, backtracks, jump);
|
||||
|
||||
if (found != NULL)
|
||||
set_jumps(found, LABEL());
|
||||
|
||||
if (status & XCLASS_IS_ECLASS)
|
||||
OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||
|
||||
if (ranges.stack != ranges.local_stack)
|
||||
SLJIT_FREE(ranges.stack, compiler->allocator_data);
|
||||
}
|
||||
|
||||
static PCRE2_SPTR compile_eclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
PCRE2_SPTR end = cc + GET(cc, 0) - 1;
|
||||
PCRE2_SPTR begin;
|
||||
jump_list *not_found;
|
||||
jump_list *found = NULL;
|
||||
|
||||
cc += LINK_SIZE;
|
||||
|
||||
/* Should be optimized later. */
|
||||
read_char(common, 0, READ_CHAR_MAX, backtracks, 0);
|
||||
|
||||
if (((*cc++) & ECL_MAP) != 0)
|
||||
{
|
||||
xclass_check_bitset(common, (const sljit_u8 *)cc, &found, backtracks);
|
||||
cc += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
|
||||
begin = cc;
|
||||
|
||||
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, ECLASS_CHAR_DATA, 0);
|
||||
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, ECLASS_STACK_DATA, 0);
|
||||
OP1(SLJIT_MOV, ECLASS_STACK_DATA, 0, SLJIT_IMM, 0);
|
||||
OP1(SLJIT_MOV, ECLASS_CHAR_DATA, 0, TMP1, 0);
|
||||
|
||||
/* All eclass must start with an xclass. */
|
||||
SLJIT_ASSERT(*cc == ECL_XCLASS);
|
||||
|
||||
while (cc < end)
|
||||
{
|
||||
switch (*cc)
|
||||
{
|
||||
case ECL_AND:
|
||||
++cc;
|
||||
OP2(SLJIT_OR, TMP2, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, ~(sljit_sw)1);
|
||||
OP2(SLJIT_LSHR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||
OP2(SLJIT_AND, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, TMP2, 0);
|
||||
break;
|
||||
|
||||
case ECL_OR:
|
||||
++cc;
|
||||
OP2(SLJIT_AND, TMP2, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||
OP2(SLJIT_LSHR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||
OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, TMP2, 0);
|
||||
break;
|
||||
|
||||
case ECL_XOR:
|
||||
++cc;
|
||||
OP2(SLJIT_AND, TMP2, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||
OP2(SLJIT_LSHR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||
OP2(SLJIT_XOR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, TMP2, 0);
|
||||
break;
|
||||
|
||||
case ECL_NOT:
|
||||
++cc;
|
||||
OP2(SLJIT_XOR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||
break;
|
||||
|
||||
default:
|
||||
SLJIT_ASSERT(*cc == ECL_XCLASS);
|
||||
if (cc != begin)
|
||||
{
|
||||
OP1(SLJIT_MOV, TMP1, 0, ECLASS_CHAR_DATA, 0);
|
||||
OP2(SLJIT_SHL, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||
}
|
||||
|
||||
not_found = NULL;
|
||||
compile_xclass_matchingpath(common, cc + 1 + LINK_SIZE, ¬_found, XCLASS_IS_ECLASS);
|
||||
set_jumps(not_found, LABEL());
|
||||
|
||||
cc += GET(cc, 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, ECLASS_STACK_DATA, 0, SLJIT_IMM, 0);
|
||||
OP1(SLJIT_MOV, ECLASS_CHAR_DATA, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
|
||||
OP1(SLJIT_MOV, ECLASS_STACK_DATA, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
|
||||
add_jump(compiler, backtracks, JUMP(SLJIT_EQUAL));
|
||||
set_jumps(found, LABEL());
|
||||
return end;
|
||||
}
|
||||
|
||||
/* Generic character matching code. */
|
||||
|
||||
#undef SET_CHAR_OFFSET
|
||||
#undef READ_FROM_CHAR_LIST
|
||||
#undef XCLASS_LOCAL_RANGES_SIZE
|
||||
#undef XCLASS_LOCAL_RANGES_LOG2_SIZE
|
||||
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
|
||||
static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
|
||||
compare_context *context, jump_list **backtracks)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
unsigned int othercasebit = 0;
|
||||
PCRE2_SPTR othercasechar = NULL;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
int utflength;
|
||||
#endif
|
||||
|
||||
if (caseless && char_has_othercase(common, cc))
|
||||
{
|
||||
othercasebit = char_get_othercase_bit(common, cc);
|
||||
SLJIT_ASSERT(othercasebit);
|
||||
/* Extracting bit difference info. */
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
othercasechar = cc + (othercasebit >> 8);
|
||||
othercasebit &= 0xff;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
|
||||
/* Note that this code only handles characters in the BMP. If there
|
||||
ever are characters outside the BMP whose othercase differs in only one
|
||||
bit from itself (there currently are none), this code will need to be
|
||||
revised for PCRE2_CODE_UNIT_WIDTH == 32. */
|
||||
othercasechar = cc + (othercasebit >> 9);
|
||||
if ((othercasebit & 0x100) != 0)
|
||||
othercasebit = (othercasebit & 0xff) << 8;
|
||||
else
|
||||
othercasebit &= 0xff;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
|
||||
}
|
||||
|
||||
if (context->sourcereg == -1)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
|
||||
if (context->length >= 4)
|
||||
OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||
else if (context->length >= 2)
|
||||
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||
else
|
||||
#endif
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
|
||||
if (context->length >= 4)
|
||||
OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||
else
|
||||
#endif
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
|
||||
context->sourcereg = TMP2;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
utflength = 1;
|
||||
if (common->utf && HAS_EXTRALEN(*cc))
|
||||
utflength += GET_EXTRALEN(*cc);
|
||||
|
||||
do
|
||||
{
|
||||
#endif
|
||||
|
||||
context->length -= IN_UCHARS(1);
|
||||
#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
|
||||
|
||||
/* Unaligned read is supported. */
|
||||
if (othercasebit != 0 && othercasechar == cc)
|
||||
{
|
||||
context->c.asuchars[context->ucharptr] = *cc | othercasebit;
|
||||
context->oc.asuchars[context->ucharptr] = othercasebit;
|
||||
}
|
||||
else
|
||||
{
|
||||
context->c.asuchars[context->ucharptr] = *cc;
|
||||
context->oc.asuchars[context->ucharptr] = 0;
|
||||
}
|
||||
context->ucharptr++;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
|
||||
#else
|
||||
if (context->ucharptr >= 2 || context->length == 0)
|
||||
#endif
|
||||
{
|
||||
if (context->length >= 4)
|
||||
OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||
else if (context->length >= 2)
|
||||
OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
else if (context->length >= 1)
|
||||
OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
|
||||
|
||||
switch(context->ucharptr)
|
||||
{
|
||||
case 4 / sizeof(PCRE2_UCHAR):
|
||||
if (context->oc.asint != 0)
|
||||
OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
|
||||
break;
|
||||
|
||||
case 2 / sizeof(PCRE2_UCHAR):
|
||||
if (context->oc.asushort != 0)
|
||||
OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
|
||||
break;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
case 1:
|
||||
if (context->oc.asbyte != 0)
|
||||
OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
SLJIT_UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
context->ucharptr = 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Unaligned read is unsupported or in 32 bit mode. */
|
||||
if (context->length >= 1)
|
||||
OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||
|
||||
context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
|
||||
|
||||
if (othercasebit != 0 && othercasechar == cc)
|
||||
{
|
||||
OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
|
||||
}
|
||||
else
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
|
||||
|
||||
#endif
|
||||
|
||||
cc++;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
utflength--;
|
||||
}
|
||||
while (utflength > 0);
|
||||
#endif
|
||||
|
||||
return cc;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
|
||||
/* The code in this function copies the logic of the interpreter function that
|
||||
is defined in the pcre2_extuni.c source. If that code is updated, this
|
||||
function, and those below it, must be kept in step (note by PH, June 2024). */
|
||||
|
||||
static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
|
||||
{
|
||||
PCRE2_SPTR start_subject = args->begin;
|
||||
PCRE2_SPTR end_subject = args->end;
|
||||
int lgb, rgb, ricount;
|
||||
PCRE2_SPTR prevcc, endcc, bptr;
|
||||
BOOL first = TRUE;
|
||||
BOOL was_ep_ZWJ = FALSE;
|
||||
uint32_t c;
|
||||
|
||||
prevcc = cc;
|
||||
endcc = NULL;
|
||||
do
|
||||
{
|
||||
GETCHARINC(c, cc);
|
||||
rgb = UCD_GRAPHBREAK(c);
|
||||
|
||||
if (first)
|
||||
{
|
||||
lgb = rgb;
|
||||
endcc = cc;
|
||||
first = FALSE;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
|
||||
break;
|
||||
|
||||
/* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
|
||||
preceded by Extended Pictographic. */
|
||||
|
||||
if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
|
||||
break;
|
||||
|
||||
/* Not breaking between Regional Indicators is allowed only if there
|
||||
are an even number of preceding RIs. */
|
||||
|
||||
if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
|
||||
{
|
||||
ricount = 0;
|
||||
bptr = prevcc;
|
||||
|
||||
/* bptr is pointing to the left-hand character */
|
||||
while (bptr > start_subject)
|
||||
{
|
||||
bptr--;
|
||||
BACKCHAR(bptr);
|
||||
GETCHAR(c, bptr);
|
||||
|
||||
if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
|
||||
break;
|
||||
|
||||
ricount++;
|
||||
}
|
||||
|
||||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
||||
}
|
||||
|
||||
/* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
|
||||
between; see next statement). */
|
||||
|
||||
was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
|
||||
|
||||
/* If Extend follows Extended_Pictographic, do not update lgb; this allows
|
||||
any number of them before a following ZWJ. */
|
||||
|
||||
if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
|
||||
lgb = rgb;
|
||||
|
||||
prevcc = endcc;
|
||||
endcc = cc;
|
||||
}
|
||||
while (cc < end_subject);
|
||||
|
||||
return endcc;
|
||||
}
|
||||
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
|
||||
/* The code in this function copies the logic of the interpreter function that
|
||||
is defined in the pcre2_extuni.c source. If that code is updated, this
|
||||
function, and the one below it, must be kept in step (note by PH, June 2024). */
|
||||
|
||||
static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
|
||||
{
|
||||
PCRE2_SPTR start_subject = args->begin;
|
||||
PCRE2_SPTR end_subject = args->end;
|
||||
int lgb, rgb, ricount;
|
||||
PCRE2_SPTR prevcc, endcc, bptr;
|
||||
BOOL first = TRUE;
|
||||
BOOL was_ep_ZWJ = FALSE;
|
||||
uint32_t c;
|
||||
|
||||
prevcc = cc;
|
||||
endcc = NULL;
|
||||
do
|
||||
{
|
||||
GETCHARINC_INVALID(c, cc, end_subject, break);
|
||||
rgb = UCD_GRAPHBREAK(c);
|
||||
|
||||
if (first)
|
||||
{
|
||||
lgb = rgb;
|
||||
endcc = cc;
|
||||
first = FALSE;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
|
||||
break;
|
||||
|
||||
/* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
|
||||
preceded by Extended Pictographic. */
|
||||
|
||||
if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
|
||||
break;
|
||||
|
||||
/* Not breaking between Regional Indicators is allowed only if there
|
||||
are an even number of preceding RIs. */
|
||||
|
||||
if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
|
||||
{
|
||||
ricount = 0;
|
||||
bptr = prevcc;
|
||||
|
||||
/* bptr is pointing to the left-hand character */
|
||||
while (bptr > start_subject)
|
||||
{
|
||||
GETCHARBACK_INVALID(c, bptr, start_subject, break);
|
||||
|
||||
if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
|
||||
break;
|
||||
|
||||
ricount++;
|
||||
}
|
||||
|
||||
if ((ricount & 1) != 0)
|
||||
break; /* Grapheme break required */
|
||||
}
|
||||
|
||||
/* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
|
||||
between; see next statement). */
|
||||
|
||||
was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
|
||||
|
||||
/* If Extend follows Extended_Pictographic, do not update lgb; this allows
|
||||
any number of them before a following ZWJ. */
|
||||
|
||||
if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
|
||||
lgb = rgb;
|
||||
|
||||
prevcc = endcc;
|
||||
endcc = cc;
|
||||
}
|
||||
while (cc < end_subject);
|
||||
|
||||
return endcc;
|
||||
}
|
||||
|
||||
/* The code in this function copies the logic of the interpreter function that
|
||||
is defined in the pcre2_extuni.c source. If that code is updated, this
|
||||
function must be kept in step (note by PH, June 2024). */
|
||||
|
||||
static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
|
||||
{
|
||||
PCRE2_SPTR start_subject = args->begin;
|
||||
PCRE2_SPTR end_subject = args->end;
|
||||
int lgb, rgb, ricount;
|
||||
PCRE2_SPTR bptr;
|
||||
uint32_t c;
|
||||
BOOL was_ep_ZWJ = FALSE;
|
||||
|
||||
/* Patch by PH */
|
||||
/* GETCHARINC(c, cc); */
|
||||
c = *cc++;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (c >= 0x110000)
|
||||
return cc;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||
lgb = UCD_GRAPHBREAK(c);
|
||||
|
||||
while (cc < end_subject)
|
||||
{
|
||||
c = *cc;
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (c >= 0x110000)
|
||||
break;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||
rgb = UCD_GRAPHBREAK(c);
|
||||
|
||||
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
|
||||
break;
|
||||
|
||||
/* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
|
||||
preceded by Extended Pictographic. */
|
||||
|
||||
if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
|
||||
break;
|
||||
|
||||
/* Not breaking between Regional Indicators is allowed only if there
|
||||
are an even number of preceding RIs. */
|
||||
|
||||
if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
|
||||
{
|
||||
ricount = 0;
|
||||
bptr = cc - 1;
|
||||
|
||||
/* bptr is pointing to the left-hand character */
|
||||
while (bptr > start_subject)
|
||||
{
|
||||
bptr--;
|
||||
c = *bptr;
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (c >= 0x110000)
|
||||
break;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||
|
||||
if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
|
||||
|
||||
ricount++;
|
||||
}
|
||||
|
||||
if ((ricount & 1) != 0)
|
||||
break; /* Grapheme break required */
|
||||
}
|
||||
|
||||
/* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
|
||||
between; see next statement). */
|
||||
|
||||
was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
|
||||
|
||||
/* If Extend follows Extended_Pictographic, do not update lgb; this allows
|
||||
any number of them before a following ZWJ. */
|
||||
|
||||
if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
|
||||
lgb = rgb;
|
||||
|
||||
cc++;
|
||||
}
|
||||
|
||||
return cc;
|
||||
}
|
||||
|
||||
static void compile_clist(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
const sljit_u32 *other_cases;
|
||||
struct sljit_jump *jump;
|
||||
sljit_u32 min = 0, max = READ_CHAR_MAX;
|
||||
BOOL has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV) != 0;
|
||||
|
||||
SLJIT_ASSERT(cc[1] == PT_CLIST);
|
||||
|
||||
if (cc[0] == OP_PROP)
|
||||
{
|
||||
other_cases = PRIV(ucd_caseless_sets) + cc[2];
|
||||
|
||||
min = *other_cases++;
|
||||
max = min;
|
||||
|
||||
while (*other_cases != NOTACHAR)
|
||||
{
|
||||
if (*other_cases > max) max = *other_cases;
|
||||
if (*other_cases < min) min = *other_cases;
|
||||
other_cases++;
|
||||
}
|
||||
}
|
||||
|
||||
other_cases = PRIV(ucd_caseless_sets) + cc[2];
|
||||
SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR);
|
||||
/* The NOTACHAR is higher than any character. */
|
||||
SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
|
||||
|
||||
read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||
|
||||
/* At least two characters are required.
|
||||
Otherwise this case would be handled by the normal code path. */
|
||||
/* NOTACHAR is the unsigned maximum. */
|
||||
|
||||
/* Optimizing character pairs, if their difference is power of 2. */
|
||||
if (is_powerof2(other_cases[1] ^ other_cases[0]))
|
||||
{
|
||||
OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[1] ^ other_cases[0]));
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]);
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
|
||||
other_cases += 2;
|
||||
}
|
||||
else if (is_powerof2(other_cases[2] ^ other_cases[1]))
|
||||
{
|
||||
SLJIT_ASSERT(other_cases[2] != NOTACHAR);
|
||||
|
||||
OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[2] ^ other_cases[1]));
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]);
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)other_cases[0]);
|
||||
|
||||
if (has_cmov)
|
||||
SELECT(SLJIT_EQUAL, TMP2, STR_END, 0, TMP2);
|
||||
else
|
||||
OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
|
||||
|
||||
other_cases += 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++));
|
||||
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
|
||||
}
|
||||
|
||||
while (*other_cases != NOTACHAR)
|
||||
{
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++));
|
||||
|
||||
if (has_cmov)
|
||||
SELECT(SLJIT_EQUAL, TMP2, STR_END, 0, TMP2);
|
||||
else
|
||||
OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
|
||||
}
|
||||
|
||||
if (has_cmov)
|
||||
jump = CMP(cc[0] == OP_PROP ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
|
||||
else
|
||||
jump = JUMP(cc[0] == OP_PROP ? SLJIT_ZERO : SLJIT_NOT_ZERO);
|
||||
|
||||
add_jump(compiler, backtracks, jump);
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
int length;
|
||||
unsigned int c, oc, bit;
|
||||
compare_context context;
|
||||
struct sljit_jump *jump[3];
|
||||
jump_list *end_list;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
PCRE2_UCHAR propdata[5];
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
switch(type)
|
||||
{
|
||||
case OP_NOT_DIGIT:
|
||||
case OP_DIGIT:
|
||||
/* Digits are usually 0-9, so it is worth to optimize them. */
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
|
||||
read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
|
||||
else
|
||||
#endif
|
||||
read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
|
||||
/* Flip the starting bit in the negative case. */
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit);
|
||||
add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
|
||||
return cc;
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
case OP_WHITESPACE:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
|
||||
read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
|
||||
else
|
||||
#endif
|
||||
read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space);
|
||||
add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
|
||||
return cc;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
case OP_WORDCHAR:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
|
||||
read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
|
||||
else
|
||||
#endif
|
||||
read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word);
|
||||
add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
|
||||
return cc;
|
||||
|
||||
case OP_ANY:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
|
||||
{
|
||||
jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
|
||||
end_list = NULL;
|
||||
if (common->mode != PCRE2_JIT_PARTIAL_HARD)
|
||||
add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
else
|
||||
check_str_end(common, &end_list);
|
||||
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
|
||||
set_jumps(end_list, LABEL());
|
||||
JUMPHERE(jump[0]);
|
||||
}
|
||||
else
|
||||
check_newlinechar(common, common->nltype, backtracks, TRUE);
|
||||
return cc;
|
||||
|
||||
case OP_ALLANY:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (common->utf && common->invalid_utf)
|
||||
{
|
||||
read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||
return cc;
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
skip_valid_char(common);
|
||||
return cc;
|
||||
|
||||
case OP_ANYBYTE:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
return cc;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case OP_NOTPROP:
|
||||
case OP_PROP:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
if (cc[0] == PT_CLIST)
|
||||
{
|
||||
compile_clist(common, cc - 1, backtracks);
|
||||
return cc + 2;
|
||||
}
|
||||
|
||||
propdata[0] = 0;
|
||||
propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
|
||||
propdata[2] = cc[0];
|
||||
propdata[3] = cc[1];
|
||||
propdata[4] = XCL_END;
|
||||
compile_xclass_matchingpath(common, propdata, backtracks, 0);
|
||||
return cc + 2;
|
||||
#endif
|
||||
|
||||
case OP_ANYNL:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
|
||||
jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
|
||||
/* We don't need to handle soft partial matching case. */
|
||||
end_list = NULL;
|
||||
if (common->mode != PCRE2_JIT_PARTIAL_HARD)
|
||||
add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
else
|
||||
check_str_end(common, &end_list);
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);
|
||||
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
|
||||
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
|
||||
#endif
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||
jump[1] = JUMP(SLJIT_JUMP);
|
||||
JUMPHERE(jump[0]);
|
||||
check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
|
||||
set_jumps(end_list, LABEL());
|
||||
JUMPHERE(jump[1]);
|
||||
return cc;
|
||||
|
||||
case OP_NOT_HSPACE:
|
||||
case OP_HSPACE:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
|
||||
if (type == OP_NOT_HSPACE)
|
||||
read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||
else
|
||||
read_char(common, 0x9, 0x3000, NULL, 0);
|
||||
|
||||
add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
|
||||
sljit_set_current_flags(compiler, SLJIT_SET_Z);
|
||||
add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
|
||||
return cc;
|
||||
|
||||
case OP_NOT_VSPACE:
|
||||
case OP_VSPACE:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
|
||||
if (type == OP_NOT_VSPACE)
|
||||
read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||
else
|
||||
read_char(common, 0xa, 0x2029, NULL, 0);
|
||||
|
||||
add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
|
||||
sljit_set_current_flags(compiler, SLJIT_SET_Z);
|
||||
add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
|
||||
return cc;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
case OP_EXTUNI:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
|
||||
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
|
||||
OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
|
||||
common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
|
||||
if (common->invalid_utf)
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
|
||||
#else
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
|
||||
common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
|
||||
if (common->invalid_utf)
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
|
||||
#endif
|
||||
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
|
||||
|
||||
if (common->mode == PCRE2_JIT_PARTIAL_HARD)
|
||||
{
|
||||
jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
|
||||
/* Since we successfully read a char above, partial matching must occur. */
|
||||
check_partial(common, TRUE);
|
||||
JUMPHERE(jump[0]);
|
||||
}
|
||||
return cc;
|
||||
#endif
|
||||
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
length = 1;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
|
||||
#endif
|
||||
|
||||
if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
|
||||
detect_partial_match(common, backtracks);
|
||||
|
||||
if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
|
||||
{
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
|
||||
if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
context.length = IN_UCHARS(length);
|
||||
context.sourcereg = -1;
|
||||
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
|
||||
context.ucharptr = 0;
|
||||
#endif
|
||||
return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (common->utf)
|
||||
{
|
||||
GETCHAR(c, cc);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
c = *cc;
|
||||
|
||||
SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
|
||||
|
||||
if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
oc = char_othercase(common, c);
|
||||
read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
|
||||
|
||||
SLJIT_ASSERT(!is_powerof2(c ^ oc));
|
||||
|
||||
if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
|
||||
{
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc);
|
||||
SELECT(SLJIT_EQUAL, TMP1, SLJIT_IMM, c, TMP1);
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
|
||||
}
|
||||
else
|
||||
{
|
||||
jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
|
||||
JUMPHERE(jump[0]);
|
||||
}
|
||||
return cc + length;
|
||||
|
||||
case OP_NOT:
|
||||
case OP_NOTI:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
|
||||
length = 1;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (common->utf)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
c = *cc;
|
||||
if (c < 128 && !common->invalid_utf)
|
||||
{
|
||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
|
||||
if (type == OP_NOT || !char_has_othercase(common, cc))
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
|
||||
else
|
||||
{
|
||||
/* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
|
||||
OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
|
||||
}
|
||||
/* Skip the variable-length character. */
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||
JUMPHERE(jump[0]);
|
||||
return cc + 1;
|
||||
}
|
||||
else
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
{
|
||||
GETCHARLEN(c, cc, length);
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
c = *cc;
|
||||
|
||||
if (type == OP_NOT || !char_has_othercase(common, cc))
|
||||
{
|
||||
read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
|
||||
}
|
||||
else
|
||||
{
|
||||
oc = char_othercase(common, c);
|
||||
read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||
bit = c ^ oc;
|
||||
if (is_powerof2(bit))
|
||||
{
|
||||
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
|
||||
}
|
||||
else
|
||||
{
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
|
||||
}
|
||||
}
|
||||
return cc + length;
|
||||
|
||||
case OP_CLASS:
|
||||
case OP_NCLASS:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
|
||||
if (type == OP_NCLASS)
|
||||
read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||
else
|
||||
read_char(common, 0, bit, NULL, 0);
|
||||
#else
|
||||
if (type == OP_NCLASS)
|
||||
read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||
else
|
||||
read_char(common, 0, 255, NULL, 0);
|
||||
#endif
|
||||
|
||||
if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
|
||||
return cc + 32 / sizeof(PCRE2_UCHAR);
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
jump[0] = NULL;
|
||||
if (common->utf)
|
||||
{
|
||||
jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
|
||||
if (type == OP_CLASS)
|
||||
{
|
||||
add_jump(compiler, backtracks, jump[0]);
|
||||
jump[0] = NULL;
|
||||
}
|
||||
}
|
||||
#elif PCRE2_CODE_UNIT_WIDTH != 8
|
||||
jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
|
||||
if (type == OP_CLASS)
|
||||
{
|
||||
add_jump(compiler, backtracks, jump[0]);
|
||||
jump[0] = NULL;
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
|
||||
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
|
||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
|
||||
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
|
||||
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
|
||||
add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
|
||||
|
||||
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (jump[0] != NULL)
|
||||
JUMPHERE(jump[0]);
|
||||
#endif
|
||||
return cc + 32 / sizeof(PCRE2_UCHAR);
|
||||
|
||||
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
|
||||
case OP_XCLASS:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks, 0);
|
||||
return cc + GET(cc, 0) - 1;
|
||||
|
||||
case OP_ECLASS:
|
||||
if (check_str_ptr)
|
||||
detect_partial_match(common, backtracks);
|
||||
return compile_eclass_matchingpath(common, cc, backtracks);
|
||||
#endif
|
||||
}
|
||||
SLJIT_UNREACHABLE();
|
||||
return cc;
|
||||
}
|
||||
|
||||
static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
|
||||
{
|
||||
/* This function consumes at least one input character. */
|
||||
/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
|
||||
DEFINE_COMPILER;
|
||||
PCRE2_SPTR ccbegin = cc;
|
||||
compare_context context;
|
||||
int size;
|
||||
|
||||
context.length = 0;
|
||||
do
|
||||
{
|
||||
if (cc >= ccend)
|
||||
break;
|
||||
|
||||
if (*cc == OP_CHAR)
|
||||
{
|
||||
size = 1;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (common->utf && HAS_EXTRALEN(cc[1]))
|
||||
size += GET_EXTRALEN(cc[1]);
|
||||
#endif
|
||||
}
|
||||
else if (*cc == OP_CHARI)
|
||||
{
|
||||
size = 1;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (common->utf)
|
||||
{
|
||||
if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
|
||||
size = 0;
|
||||
else if (HAS_EXTRALEN(cc[1]))
|
||||
size += GET_EXTRALEN(cc[1]);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
|
||||
size = 0;
|
||||
}
|
||||
else
|
||||
size = 0;
|
||||
|
||||
cc += 1 + size;
|
||||
context.length += IN_UCHARS(size);
|
||||
}
|
||||
while (size > 0 && context.length <= 128);
|
||||
|
||||
cc = ccbegin;
|
||||
if (context.length > 0)
|
||||
{
|
||||
/* We have a fixed-length byte sequence. */
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
context.sourcereg = -1;
|
||||
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
|
||||
context.ucharptr = 0;
|
||||
#endif
|
||||
do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
|
||||
return cc;
|
||||
}
|
||||
|
||||
/* A non-fixed length character will be checked if length == 0. */
|
||||
return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
|
||||
}
|
||||
|
||||
|
||||
14105
3rd/pcre2/src/pcre2_jit_compile.c
Normal file
14105
3rd/pcre2/src/pcre2_jit_compile.c
Normal file
File diff suppressed because it is too large
Load Diff
200
3rd/pcre2/src/pcre2_jit_match.c
Normal file
200
3rd/pcre2/src/pcre2_jit_match.c
Normal file
@@ -0,0 +1,200 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2023 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE
|
||||
#error This file must be included from pcre2_jit_compile.c.
|
||||
#endif
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer)
|
||||
#include <sanitizer/msan_interface.h>
|
||||
#endif /* __has_feature(memory_sanitizer) */
|
||||
#endif /* defined(__has_feature) */
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
|
||||
static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, jit_function executable_func)
|
||||
{
|
||||
sljit_u8 local_space[MACHINE_STACK_SIZE];
|
||||
struct sljit_stack local_stack;
|
||||
|
||||
local_stack.min_start = local_space;
|
||||
local_stack.start = local_space;
|
||||
local_stack.end = local_space + MACHINE_STACK_SIZE;
|
||||
local_stack.top = local_space + MACHINE_STACK_SIZE;
|
||||
arguments->stack = &local_stack;
|
||||
return executable_func(arguments);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Do a JIT pattern match *
|
||||
*************************************************/
|
||||
|
||||
/* This function runs a JIT pattern match.
|
||||
|
||||
Arguments:
|
||||
code points to the compiled expression
|
||||
subject points to the subject string
|
||||
length length of subject string (may contain binary zeros)
|
||||
start_offset where to start in the subject string
|
||||
options option bits
|
||||
match_data points to a match_data block
|
||||
mcontext points to a match context
|
||||
|
||||
Returns: > 0 => success; value is the number of ovector pairs filled
|
||||
= 0 => success, but ovector is not big enough
|
||||
-1 => failed to match (PCRE2_ERROR_NOMATCH)
|
||||
< -1 => some kind of unexpected problem
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
|
||||
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
|
||||
pcre2_match_context *mcontext)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
|
||||
(void)code;
|
||||
(void)subject;
|
||||
(void)length;
|
||||
(void)start_offset;
|
||||
(void)options;
|
||||
(void)match_data;
|
||||
(void)mcontext;
|
||||
return PCRE2_ERROR_JIT_BADOPTION;
|
||||
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
pcre2_real_code *re = (pcre2_real_code *)code;
|
||||
executable_functions *functions = (executable_functions *)re->executable_jit;
|
||||
pcre2_jit_stack *jit_stack;
|
||||
uint32_t oveccount = match_data->oveccount;
|
||||
uint32_t max_oveccount;
|
||||
union {
|
||||
void *executable_func;
|
||||
jit_function call_executable_func;
|
||||
} convert_executable_func;
|
||||
jit_arguments arguments;
|
||||
int rc;
|
||||
int index = 0;
|
||||
|
||||
if ((options & PCRE2_PARTIAL_HARD) != 0)
|
||||
index = 2;
|
||||
else if ((options & PCRE2_PARTIAL_SOFT) != 0)
|
||||
index = 1;
|
||||
|
||||
if (functions == NULL || functions->executable_funcs[index] == NULL)
|
||||
return PCRE2_ERROR_JIT_BADOPTION;
|
||||
|
||||
/* Sanity checks should be handled by pcre2_match. */
|
||||
arguments.str = subject + start_offset;
|
||||
arguments.begin = subject;
|
||||
arguments.end = subject + length;
|
||||
arguments.match_data = match_data;
|
||||
arguments.startchar_ptr = subject;
|
||||
arguments.mark_ptr = NULL;
|
||||
arguments.options = options;
|
||||
|
||||
if (mcontext != NULL)
|
||||
{
|
||||
arguments.callout = mcontext->callout;
|
||||
arguments.callout_data = mcontext->callout_data;
|
||||
arguments.offset_limit = mcontext->offset_limit;
|
||||
arguments.limit_match = (mcontext->match_limit < re->limit_match)?
|
||||
mcontext->match_limit : re->limit_match;
|
||||
if (mcontext->jit_callback != NULL)
|
||||
jit_stack = mcontext->jit_callback(mcontext->jit_callback_data);
|
||||
else
|
||||
jit_stack = (pcre2_jit_stack *)mcontext->jit_callback_data;
|
||||
}
|
||||
else
|
||||
{
|
||||
arguments.callout = NULL;
|
||||
arguments.callout_data = NULL;
|
||||
arguments.offset_limit = PCRE2_UNSET;
|
||||
arguments.limit_match = (MATCH_LIMIT < re->limit_match)?
|
||||
MATCH_LIMIT : re->limit_match;
|
||||
jit_stack = NULL;
|
||||
}
|
||||
|
||||
|
||||
max_oveccount = functions->top_bracket;
|
||||
if (oveccount > max_oveccount)
|
||||
oveccount = max_oveccount;
|
||||
arguments.oveccount = oveccount << 1;
|
||||
|
||||
|
||||
convert_executable_func.executable_func = functions->executable_funcs[index];
|
||||
if (jit_stack != NULL)
|
||||
{
|
||||
arguments.stack = (struct sljit_stack *)(jit_stack->stack);
|
||||
rc = convert_executable_func.call_executable_func(&arguments);
|
||||
}
|
||||
else
|
||||
rc = jit_machine_stack_exec(&arguments, convert_executable_func.call_executable_func);
|
||||
|
||||
if (rc > (int)oveccount)
|
||||
rc = 0;
|
||||
match_data->code = re;
|
||||
match_data->subject = (rc >= 0 || rc == PCRE2_ERROR_PARTIAL)? subject : NULL;
|
||||
match_data->subject_length = length;
|
||||
match_data->rc = rc;
|
||||
match_data->startchar = arguments.startchar_ptr - subject;
|
||||
match_data->leftchar = 0;
|
||||
match_data->rightchar = 0;
|
||||
match_data->mark = arguments.mark_ptr;
|
||||
match_data->matchedby = PCRE2_MATCHEDBY_JIT;
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer)
|
||||
if (rc > 0)
|
||||
__msan_unpoison(match_data->ovector, 2 * rc * sizeof(match_data->ovector[0]));
|
||||
#endif /* __has_feature(memory_sanitizer) */
|
||||
#endif /* defined(__has_feature) */
|
||||
|
||||
return match_data->rc;
|
||||
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
/* End of pcre2_jit_match.c */
|
||||
234
3rd/pcre2/src/pcre2_jit_misc.c
Normal file
234
3rd/pcre2/src/pcre2_jit_misc.c
Normal file
@@ -0,0 +1,234 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE
|
||||
#error This file must be included from pcre2_jit_compile.c.
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free JIT read-only data *
|
||||
*************************************************/
|
||||
|
||||
void
|
||||
PRIV(jit_free_rodata)(void *current, void *allocator_data)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)current;
|
||||
(void)allocator_data;
|
||||
#else /* SUPPORT_JIT */
|
||||
void *next;
|
||||
|
||||
SLJIT_UNUSED_ARG(allocator_data);
|
||||
|
||||
while (current != NULL)
|
||||
{
|
||||
next = *(void**)current;
|
||||
SLJIT_FREE(current, allocator_data);
|
||||
current = next;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
/*************************************************
|
||||
* Free JIT compiled code *
|
||||
*************************************************/
|
||||
|
||||
void
|
||||
PRIV(jit_free)(void *executable_jit, pcre2_memctl *memctl)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)executable_jit;
|
||||
(void)memctl;
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
executable_functions *functions = (executable_functions *)executable_jit;
|
||||
void *allocator_data = memctl;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
|
||||
{
|
||||
if (functions->executable_funcs[i] != NULL)
|
||||
sljit_free_code(functions->executable_funcs[i], NULL);
|
||||
PRIV(jit_free_rodata)(functions->read_only_data_heads[i], allocator_data);
|
||||
}
|
||||
|
||||
SLJIT_FREE(functions, allocator_data);
|
||||
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free unused JIT memory *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_free_unused_memory(pcre2_general_context *gcontext)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)gcontext; /* Suppress warning */
|
||||
#else /* SUPPORT_JIT */
|
||||
SLJIT_UNUSED_ARG(gcontext);
|
||||
#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
|
||||
sljit_free_unused_memory_exec();
|
||||
#endif /* SLJIT_EXECUTABLE_ALLOCATOR */
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Allocate a JIT stack *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_jit_stack * PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_stack_create(size_t startsize, size_t maxsize,
|
||||
pcre2_general_context *gcontext)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
|
||||
(void)gcontext;
|
||||
(void)startsize;
|
||||
(void)maxsize;
|
||||
return NULL;
|
||||
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
pcre2_jit_stack *jit_stack;
|
||||
|
||||
if (startsize == 0 || maxsize == 0 || maxsize > SIZE_MAX - STACK_GROWTH_RATE)
|
||||
return NULL;
|
||||
if (startsize > maxsize)
|
||||
startsize = maxsize;
|
||||
startsize = (startsize + STACK_GROWTH_RATE - 1) & (size_t)(~(STACK_GROWTH_RATE - 1));
|
||||
maxsize = (maxsize + STACK_GROWTH_RATE - 1) & (size_t)(~(STACK_GROWTH_RATE - 1));
|
||||
|
||||
jit_stack = PRIV(memctl_malloc)(sizeof(pcre2_real_jit_stack), (pcre2_memctl *)gcontext);
|
||||
if (jit_stack == NULL) return NULL;
|
||||
jit_stack->stack = sljit_allocate_stack(startsize, maxsize, &jit_stack->memctl);
|
||||
if (jit_stack->stack == NULL)
|
||||
{
|
||||
jit_stack->memctl.free(jit_stack, jit_stack->memctl.memory_data);
|
||||
return NULL;
|
||||
}
|
||||
return jit_stack;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Assign a JIT stack to a pattern *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_stack_assign(pcre2_match_context *mcontext, pcre2_jit_callback callback,
|
||||
void *callback_data)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)mcontext;
|
||||
(void)callback;
|
||||
(void)callback_data;
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
if (mcontext == NULL) return;
|
||||
mcontext->jit_callback = callback;
|
||||
mcontext->jit_callback_data = callback_data;
|
||||
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free a JIT stack *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_stack_free(pcre2_jit_stack *jit_stack)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)jit_stack;
|
||||
#else /* SUPPORT_JIT */
|
||||
if (jit_stack != NULL)
|
||||
{
|
||||
sljit_free_stack((struct sljit_stack *)(jit_stack->stack), &jit_stack->memctl);
|
||||
jit_stack->memctl.free(jit_stack, jit_stack->memctl.memory_data);
|
||||
}
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get target CPU type *
|
||||
*************************************************/
|
||||
|
||||
const char*
|
||||
PRIV(jit_get_target)(void)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
return "JIT is not supported";
|
||||
#else /* SUPPORT_JIT */
|
||||
return sljit_get_platform_name();
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get size of JIT code *
|
||||
*************************************************/
|
||||
|
||||
size_t
|
||||
PRIV(jit_get_size)(void *executable_jit)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)executable_jit;
|
||||
return 0;
|
||||
#else /* SUPPORT_JIT */
|
||||
sljit_uw *executable_sizes = ((executable_functions *)executable_jit)->executable_sizes;
|
||||
SLJIT_COMPILE_ASSERT(JIT_NUMBER_OF_COMPILE_MODES == 3, number_of_compile_modes_changed);
|
||||
return executable_sizes[0] + executable_sizes[1] + executable_sizes[2];
|
||||
#endif
|
||||
}
|
||||
|
||||
/* End of pcre2_jit_misc.c */
|
||||
354
3rd/pcre2/src/pcre2_jit_neon_inc.h
Normal file
354
3rd/pcre2/src/pcre2_jit_neon_inc.h
Normal file
@@ -0,0 +1,354 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
This module by Zoltan Herczeg and Sebastian Pop
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
# if defined(FFCS)
|
||||
# if defined(FF_UTF)
|
||||
# define FF_FUN ffcs_utf
|
||||
# else
|
||||
# define FF_FUN ffcs
|
||||
# endif
|
||||
|
||||
# elif defined(FFCS_2)
|
||||
# if defined(FF_UTF)
|
||||
# define FF_FUN ffcs_2_utf
|
||||
# else
|
||||
# define FF_FUN ffcs_2
|
||||
# endif
|
||||
|
||||
# elif defined(FFCS_MASK)
|
||||
# if defined(FF_UTF)
|
||||
# define FF_FUN ffcs_mask_utf
|
||||
# else
|
||||
# define FF_FUN ffcs_mask
|
||||
# endif
|
||||
|
||||
# elif defined(FFCPS_0)
|
||||
# if defined (FF_UTF)
|
||||
# define FF_FUN ffcps_0_utf
|
||||
# else
|
||||
# define FF_FUN ffcps_0
|
||||
# endif
|
||||
|
||||
# elif defined (FFCPS_1)
|
||||
# if defined (FF_UTF)
|
||||
# define FF_FUN ffcps_1_utf
|
||||
# else
|
||||
# define FF_FUN ffcps_1
|
||||
# endif
|
||||
|
||||
# elif defined (FFCPS_DEFAULT)
|
||||
# if defined (FF_UTF)
|
||||
# define FF_FUN ffcps_default_utf
|
||||
# else
|
||||
# define FF_FUN ffcps_default
|
||||
# endif
|
||||
# endif
|
||||
|
||||
#if (defined(__GNUC__) && defined(__SANITIZE_ADDRESS__) && __SANITIZE_ADDRESS__ ) \
|
||||
|| (defined(__clang__) \
|
||||
&& ((__clang_major__ == 3 && __clang_minor__ >= 3) || (__clang_major__ > 3)))
|
||||
__attribute__((no_sanitize_address))
|
||||
#endif
|
||||
static sljit_u8* SLJIT_FUNC FF_FUN(sljit_u8 *str_end, sljit_u8 **str_ptr, sljit_uw offs1, sljit_uw offs2, sljit_uw chars)
|
||||
#undef FF_FUN
|
||||
{
|
||||
quad_word qw;
|
||||
int_char ic;
|
||||
|
||||
SLJIT_UNUSED_ARG(offs1);
|
||||
SLJIT_UNUSED_ARG(offs2);
|
||||
|
||||
ic.x = chars;
|
||||
|
||||
#if defined(FFCS)
|
||||
sljit_u8 c1 = ic.c.c1;
|
||||
vect_t vc1 = VDUPQ(c1);
|
||||
|
||||
#elif defined(FFCS_2)
|
||||
sljit_u8 c1 = ic.c.c1;
|
||||
vect_t vc1 = VDUPQ(c1);
|
||||
sljit_u8 c2 = ic.c.c2;
|
||||
vect_t vc2 = VDUPQ(c2);
|
||||
|
||||
#elif defined(FFCS_MASK)
|
||||
sljit_u8 c1 = ic.c.c1;
|
||||
vect_t vc1 = VDUPQ(c1);
|
||||
sljit_u8 mask = ic.c.c2;
|
||||
vect_t vmask = VDUPQ(mask);
|
||||
#endif
|
||||
|
||||
#if defined(FFCPS)
|
||||
compare_type compare1_type = compare_match1;
|
||||
compare_type compare2_type = compare_match1;
|
||||
vect_t cmp1a, cmp1b, cmp2a, cmp2b;
|
||||
const sljit_u32 diff = IN_UCHARS(offs1 - offs2);
|
||||
PCRE2_UCHAR char1a = ic.c.c1;
|
||||
PCRE2_UCHAR char2a = ic.c.c3;
|
||||
|
||||
# ifdef FFCPS_CHAR1A2A
|
||||
cmp1a = VDUPQ(char1a);
|
||||
cmp2a = VDUPQ(char2a);
|
||||
cmp1b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
|
||||
cmp2b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
|
||||
# else
|
||||
PCRE2_UCHAR char1b = ic.c.c2;
|
||||
PCRE2_UCHAR char2b = ic.c.c4;
|
||||
if (char1a == char1b)
|
||||
{
|
||||
cmp1a = VDUPQ(char1a);
|
||||
cmp1b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
|
||||
}
|
||||
else
|
||||
{
|
||||
sljit_u32 bit1 = char1a ^ char1b;
|
||||
if (is_powerof2(bit1))
|
||||
{
|
||||
compare1_type = compare_match1i;
|
||||
cmp1a = VDUPQ(char1a | bit1);
|
||||
cmp1b = VDUPQ(bit1);
|
||||
}
|
||||
else
|
||||
{
|
||||
compare1_type = compare_match2;
|
||||
cmp1a = VDUPQ(char1a);
|
||||
cmp1b = VDUPQ(char1b);
|
||||
}
|
||||
}
|
||||
|
||||
if (char2a == char2b)
|
||||
{
|
||||
cmp2a = VDUPQ(char2a);
|
||||
cmp2b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
|
||||
}
|
||||
else
|
||||
{
|
||||
sljit_u32 bit2 = char2a ^ char2b;
|
||||
if (is_powerof2(bit2))
|
||||
{
|
||||
compare2_type = compare_match1i;
|
||||
cmp2a = VDUPQ(char2a | bit2);
|
||||
cmp2b = VDUPQ(bit2);
|
||||
}
|
||||
else
|
||||
{
|
||||
compare2_type = compare_match2;
|
||||
cmp2a = VDUPQ(char2a);
|
||||
cmp2b = VDUPQ(char2b);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
*str_ptr += IN_UCHARS(offs1);
|
||||
#endif
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
vect_t char_mask = VDUPQ(0xff);
|
||||
#endif
|
||||
|
||||
#if defined(FF_UTF)
|
||||
restart:;
|
||||
#endif
|
||||
|
||||
#if defined(FFCPS)
|
||||
if (*str_ptr >= str_end)
|
||||
return NULL;
|
||||
sljit_u8 *p1 = *str_ptr - diff;
|
||||
#endif
|
||||
sljit_s32 align_offset = ((uint64_t)*str_ptr & 0xf);
|
||||
*str_ptr = (sljit_u8 *) ((uint64_t)*str_ptr & ~0xf);
|
||||
vect_t data = VLD1Q(*str_ptr);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
data = VANDQ(data, char_mask);
|
||||
#endif
|
||||
|
||||
#if defined(FFCS)
|
||||
vect_t eq = VCEQQ(data, vc1);
|
||||
|
||||
#elif defined(FFCS_2)
|
||||
vect_t eq1 = VCEQQ(data, vc1);
|
||||
vect_t eq2 = VCEQQ(data, vc2);
|
||||
vect_t eq = VORRQ(eq1, eq2);
|
||||
|
||||
#elif defined(FFCS_MASK)
|
||||
vect_t eq = VORRQ(data, vmask);
|
||||
eq = VCEQQ(eq, vc1);
|
||||
|
||||
#elif defined(FFCPS)
|
||||
# if defined(FFCPS_DIFF1)
|
||||
vect_t prev_data = data;
|
||||
# endif
|
||||
|
||||
vect_t data2;
|
||||
if (p1 < *str_ptr)
|
||||
{
|
||||
data2 = VLD1Q(*str_ptr - diff);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
data2 = VANDQ(data2, char_mask);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
data2 = shift_left_n_lanes(data, offs1 - offs2);
|
||||
|
||||
if (compare1_type == compare_match1)
|
||||
data = VCEQQ(data, cmp1a);
|
||||
else
|
||||
data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
|
||||
|
||||
if (compare2_type == compare_match1)
|
||||
data2 = VCEQQ(data2, cmp2a);
|
||||
else
|
||||
data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
|
||||
|
||||
vect_t eq = VANDQ(data, data2);
|
||||
#endif
|
||||
|
||||
VST1Q(qw.mem, eq);
|
||||
/* Ignore matches before the first STR_PTR. */
|
||||
if (align_offset < 8)
|
||||
{
|
||||
qw.dw[0] >>= align_offset * 8;
|
||||
if (qw.dw[0])
|
||||
{
|
||||
*str_ptr += align_offset + __builtin_ctzll(qw.dw[0]) / 8;
|
||||
goto match;
|
||||
}
|
||||
if (qw.dw[1])
|
||||
{
|
||||
*str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8;
|
||||
goto match;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
qw.dw[1] >>= (align_offset - 8) * 8;
|
||||
if (qw.dw[1])
|
||||
{
|
||||
*str_ptr += align_offset + __builtin_ctzll(qw.dw[1]) / 8;
|
||||
goto match;
|
||||
}
|
||||
}
|
||||
*str_ptr += 16;
|
||||
|
||||
while (*str_ptr < str_end)
|
||||
{
|
||||
vect_t orig_data = VLD1Q(*str_ptr);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
orig_data = VANDQ(orig_data, char_mask);
|
||||
#endif
|
||||
data = orig_data;
|
||||
|
||||
#if defined(FFCS)
|
||||
eq = VCEQQ(data, vc1);
|
||||
|
||||
#elif defined(FFCS_2)
|
||||
eq1 = VCEQQ(data, vc1);
|
||||
eq2 = VCEQQ(data, vc2);
|
||||
eq = VORRQ(eq1, eq2);
|
||||
|
||||
#elif defined(FFCS_MASK)
|
||||
eq = VORRQ(data, vmask);
|
||||
eq = VCEQQ(eq, vc1);
|
||||
#endif
|
||||
|
||||
#if defined(FFCPS)
|
||||
# if defined (FFCPS_DIFF1)
|
||||
data2 = VEXTQ(prev_data, data, VECTOR_FACTOR - 1);
|
||||
# else
|
||||
data2 = VLD1Q(*str_ptr - diff);
|
||||
# if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
data2 = VANDQ(data2, char_mask);
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# ifdef FFCPS_CHAR1A2A
|
||||
data = VCEQQ(data, cmp1a);
|
||||
data2 = VCEQQ(data2, cmp2a);
|
||||
# else
|
||||
if (compare1_type == compare_match1)
|
||||
data = VCEQQ(data, cmp1a);
|
||||
else
|
||||
data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
|
||||
if (compare2_type == compare_match1)
|
||||
data2 = VCEQQ(data2, cmp2a);
|
||||
else
|
||||
data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
|
||||
# endif
|
||||
|
||||
eq = VANDQ(data, data2);
|
||||
#endif
|
||||
|
||||
VST1Q(qw.mem, eq);
|
||||
if (qw.dw[0])
|
||||
*str_ptr += __builtin_ctzll(qw.dw[0]) / 8;
|
||||
else if (qw.dw[1])
|
||||
*str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8;
|
||||
else {
|
||||
*str_ptr += 16;
|
||||
#if defined (FFCPS_DIFF1)
|
||||
prev_data = orig_data;
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
match:;
|
||||
if (*str_ptr >= str_end)
|
||||
/* Failed match. */
|
||||
return NULL;
|
||||
|
||||
#if defined(FF_UTF)
|
||||
if (utf_continue((PCRE2_SPTR)*str_ptr - offs1))
|
||||
{
|
||||
/* Not a match. */
|
||||
*str_ptr += IN_UCHARS(1);
|
||||
goto restart;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Match. */
|
||||
#if defined (FFCPS)
|
||||
*str_ptr -= IN_UCHARS(offs1);
|
||||
#endif
|
||||
return *str_ptr;
|
||||
}
|
||||
|
||||
/* Failed match. */
|
||||
return NULL;
|
||||
}
|
||||
2356
3rd/pcre2/src/pcre2_jit_simd_inc.h
Normal file
2356
3rd/pcre2/src/pcre2_jit_simd_inc.h
Normal file
@@ -0,0 +1,2356 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
This module by Zoltan Herczeg
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#if !(defined SUPPORT_VALGRIND)
|
||||
|
||||
#if ((defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
|
||||
|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
|
||||
|| (defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64))
|
||||
|
||||
typedef enum {
|
||||
vector_compare_match1,
|
||||
vector_compare_match1i,
|
||||
vector_compare_match2,
|
||||
} vector_compare_type;
|
||||
|
||||
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
|
||||
static SLJIT_INLINE sljit_s32 max_fast_forward_char_pair_offset(void)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
/* The AVX2 code path is currently disabled. */
|
||||
/* return sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? 31 : 15; */
|
||||
return 15;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
/* The AVX2 code path is currently disabled. */
|
||||
/* return sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? 15 : 7; */
|
||||
return 7;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
/* The AVX2 code path is currently disabled. */
|
||||
/* return sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? 7 : 3; */
|
||||
return 3;
|
||||
#else
|
||||
#error "Unsupported unit width"
|
||||
#endif
|
||||
}
|
||||
#else /* !SLJIT_CONFIG_X86 */
|
||||
static SLJIT_INLINE sljit_s32 max_fast_forward_char_pair_offset(void)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
return 15;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
return 7;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
return 3;
|
||||
#else
|
||||
#error "Unsupported unit width"
|
||||
#endif
|
||||
}
|
||||
#endif /* SLJIT_CONFIG_X86 */
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
|
||||
return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80);
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
|
||||
return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00);
|
||||
#else
|
||||
#error "Unknown code width"
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* SLJIT_CONFIG_X86 || SLJIT_CONFIG_S390X */
|
||||
|
||||
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
|
||||
|
||||
static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
|
||||
{
|
||||
sljit_u32 value = chr;
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define SIMD_COMPARE_TYPE_INDEX 0
|
||||
return (sljit_s32)((value << 24) | (value << 16) | (value << 8) | value);
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
#define SIMD_COMPARE_TYPE_INDEX 1
|
||||
return (sljit_s32)((value << 16) | value);
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#define SIMD_COMPARE_TYPE_INDEX 2
|
||||
return (sljit_s32)(value);
|
||||
#else
|
||||
#error "Unsupported unit width"
|
||||
#endif
|
||||
}
|
||||
|
||||
static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type,
|
||||
sljit_s32 reg_type, int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
|
||||
{
|
||||
sljit_u8 instruction[4];
|
||||
|
||||
if (reg_type == SLJIT_SIMD_REG_128)
|
||||
{
|
||||
instruction[0] = 0x66;
|
||||
instruction[1] = 0x0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Two byte VEX prefix. */
|
||||
instruction[0] = 0xc5;
|
||||
instruction[1] = 0xfd;
|
||||
}
|
||||
|
||||
SLJIT_ASSERT(step >= 0 && step <= 3);
|
||||
|
||||
if (compare_type != vector_compare_match2)
|
||||
{
|
||||
if (step == 0)
|
||||
{
|
||||
if (compare_type == vector_compare_match1i)
|
||||
{
|
||||
/* POR xmm1, xmm2/m128 */
|
||||
if (reg_type == SLJIT_SIMD_REG_256)
|
||||
instruction[1] ^= (dst_ind << 3);
|
||||
|
||||
/* Prefix is filled. */
|
||||
instruction[2] = 0xeb;
|
||||
instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (step != 2)
|
||||
return;
|
||||
|
||||
/* PCMPEQB/W/D xmm1, xmm2/m128 */
|
||||
if (reg_type == SLJIT_SIMD_REG_256)
|
||||
instruction[1] ^= (dst_ind << 3);
|
||||
|
||||
/* Prefix is filled. */
|
||||
instruction[2] = 0x74 + SIMD_COMPARE_TYPE_INDEX;
|
||||
instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
return;
|
||||
}
|
||||
|
||||
if (reg_type == SLJIT_SIMD_REG_256)
|
||||
{
|
||||
if (step == 2)
|
||||
return;
|
||||
|
||||
if (step == 0)
|
||||
{
|
||||
step = 2;
|
||||
instruction[1] ^= (dst_ind << 3);
|
||||
}
|
||||
}
|
||||
|
||||
switch (step)
|
||||
{
|
||||
case 0:
|
||||
SLJIT_ASSERT(reg_type == SLJIT_SIMD_REG_128);
|
||||
|
||||
/* MOVDQA xmm1, xmm2/m128 */
|
||||
/* Prefix is filled. */
|
||||
instruction[2] = 0x6f;
|
||||
instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
return;
|
||||
|
||||
case 1:
|
||||
/* PCMPEQB/W/D xmm1, xmm2/m128 */
|
||||
if (reg_type == SLJIT_SIMD_REG_256)
|
||||
instruction[1] ^= (dst_ind << 3);
|
||||
|
||||
/* Prefix is filled. */
|
||||
instruction[2] = 0x74 + SIMD_COMPARE_TYPE_INDEX;
|
||||
instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
return;
|
||||
|
||||
case 2:
|
||||
/* PCMPEQB/W/D xmm1, xmm2/m128 */
|
||||
/* Prefix is filled. */
|
||||
instruction[2] = 0x74 + SIMD_COMPARE_TYPE_INDEX;
|
||||
instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
return;
|
||||
|
||||
case 3:
|
||||
/* POR xmm1, xmm2/m128 */
|
||||
if (reg_type == SLJIT_SIMD_REG_256)
|
||||
instruction[1] ^= (dst_ind << 3);
|
||||
|
||||
/* Prefix is filled. */
|
||||
instruction[2] = 0xeb;
|
||||
instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD))
|
||||
|
||||
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
sljit_u8 instruction[8];
|
||||
/* The AVX2 code path is currently disabled. */
|
||||
/* sljit_s32 reg_type = sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? SLJIT_SIMD_REG_256 : SLJIT_SIMD_REG_128; */
|
||||
sljit_s32 reg_type = SLJIT_SIMD_REG_128;
|
||||
sljit_s32 value;
|
||||
struct sljit_label *start;
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
struct sljit_label *restart;
|
||||
#endif
|
||||
struct sljit_jump *quit;
|
||||
struct sljit_jump *partial_quit[2];
|
||||
vector_compare_type compare_type = vector_compare_match1;
|
||||
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||
sljit_s32 data_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR0);
|
||||
sljit_s32 cmp1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR1);
|
||||
sljit_s32 cmp2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR2);
|
||||
sljit_s32 tmp_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR3);
|
||||
sljit_u32 bit = 0;
|
||||
int i;
|
||||
|
||||
SLJIT_UNUSED_ARG(offset);
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
bit = char1 ^ char2;
|
||||
compare_type = vector_compare_match1i;
|
||||
|
||||
if (!is_powerof2(bit))
|
||||
{
|
||||
bit = 0;
|
||||
compare_type = vector_compare_match2;
|
||||
}
|
||||
}
|
||||
|
||||
partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||
add_jump(compiler, &common->failed_match, partial_quit[0]);
|
||||
|
||||
/* First part (unaligned start) */
|
||||
value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO;
|
||||
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
|
||||
|
||||
if (char1 != char2)
|
||||
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR2, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
|
||||
|
||||
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
|
||||
|
||||
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR1, SLJIT_VR1, 0);
|
||||
|
||||
if (char1 != char2)
|
||||
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR2, SLJIT_VR2, 0);
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
restart = LABEL();
|
||||
#endif
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? 0x1f : 0xf;
|
||||
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~value);
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, value);
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
|
||||
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
|
||||
quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
/* Second part (aligned) */
|
||||
start = LABEL();
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? 32 : 16;
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
|
||||
|
||||
partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||
add_jump(compiler, &common->failed_match, partial_quit[1]);
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
|
||||
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
|
||||
for (i = 0; i < 4; i++)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
|
||||
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
|
||||
|
||||
JUMPHERE(quit);
|
||||
|
||||
SLJIT_ASSERT(tmp1_reg_ind < 8);
|
||||
/* BSF r32, r/m32 */
|
||||
instruction[0] = 0x0f;
|
||||
instruction[1] = 0xbc;
|
||||
instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 3);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||
|
||||
if (common->mode != PCRE2_JIT_COMPLETE)
|
||||
{
|
||||
JUMPHERE(partial_quit[0]);
|
||||
JUMPHERE(partial_quit[1]);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
|
||||
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
|
||||
}
|
||||
else
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf && offset > 0)
|
||||
{
|
||||
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
|
||||
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
|
||||
|
||||
quit = jump_if_utf_char_start(compiler, TMP1);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
|
||||
JUMPTO(SLJIT_JUMP, restart);
|
||||
|
||||
JUMPHERE(quit);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD))
|
||||
|
||||
static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
sljit_u8 instruction[8];
|
||||
/* The AVX2 code path is currently disabled. */
|
||||
/* sljit_s32 reg_type = sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? SLJIT_SIMD_REG_256 : SLJIT_SIMD_REG_128; */
|
||||
sljit_s32 reg_type = SLJIT_SIMD_REG_128;
|
||||
sljit_s32 value;
|
||||
struct sljit_label *start;
|
||||
struct sljit_jump *quit;
|
||||
jump_list *not_found = NULL;
|
||||
vector_compare_type compare_type = vector_compare_match1;
|
||||
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||
sljit_s32 data_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR0);
|
||||
sljit_s32 cmp1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR1);
|
||||
sljit_s32 cmp2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR2);
|
||||
sljit_s32 tmp_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR3);
|
||||
sljit_u32 bit = 0;
|
||||
int i;
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
bit = char1 ^ char2;
|
||||
compare_type = vector_compare_match1i;
|
||||
|
||||
if (!is_powerof2(bit))
|
||||
{
|
||||
bit = 0;
|
||||
compare_type = vector_compare_match2;
|
||||
}
|
||||
}
|
||||
|
||||
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||
OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
|
||||
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
|
||||
|
||||
/* First part (unaligned start) */
|
||||
|
||||
value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO;
|
||||
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
|
||||
|
||||
if (char1 != char2)
|
||||
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR2, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
|
||||
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR1, SLJIT_VR1, 0);
|
||||
|
||||
if (char1 != char2)
|
||||
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR2, SLJIT_VR2, 0);
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? 0x1f : 0xf;
|
||||
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~value);
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, value);
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
|
||||
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
|
||||
quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
/* Second part (aligned) */
|
||||
start = LABEL();
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? 32 : 16;
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
|
||||
|
||||
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
|
||||
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
|
||||
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
|
||||
|
||||
JUMPHERE(quit);
|
||||
|
||||
SLJIT_ASSERT(tmp1_reg_ind < 8);
|
||||
/* BSF r32, r/m32 */
|
||||
instruction[0] = 0x0f;
|
||||
instruction[1] = 0xbc;
|
||||
instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 3);
|
||||
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0);
|
||||
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
|
||||
return not_found;
|
||||
}
|
||||
|
||||
#ifndef _WIN64
|
||||
|
||||
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD))
|
||||
|
||||
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
|
||||
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
sljit_u8 instruction[8];
|
||||
/* The AVX2 code path is currently disabled. */
|
||||
/* sljit_s32 reg_type = sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? SLJIT_SIMD_REG_256 : SLJIT_SIMD_REG_128; */
|
||||
sljit_s32 reg_type = SLJIT_SIMD_REG_128;
|
||||
sljit_s32 value;
|
||||
vector_compare_type compare1_type = vector_compare_match1;
|
||||
vector_compare_type compare2_type = vector_compare_match1;
|
||||
sljit_u32 bit1 = 0;
|
||||
sljit_u32 bit2 = 0;
|
||||
sljit_u32 diff = IN_UCHARS(offs1 - offs2);
|
||||
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||
sljit_s32 data1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR0);
|
||||
sljit_s32 data2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR1);
|
||||
sljit_s32 cmp1a_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR2);
|
||||
sljit_s32 cmp2a_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR3);
|
||||
sljit_s32 cmp1b_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR4);
|
||||
sljit_s32 cmp2b_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR5);
|
||||
sljit_s32 tmp1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR6);
|
||||
sljit_s32 tmp2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_TMP_DEST_VREG);
|
||||
struct sljit_label *start;
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
struct sljit_label *restart;
|
||||
#endif
|
||||
struct sljit_jump *jump[2];
|
||||
int i;
|
||||
|
||||
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2 && offs2 >= 0);
|
||||
SLJIT_ASSERT(diff <= (unsigned)IN_UCHARS(max_fast_forward_char_pair_offset()));
|
||||
|
||||
/* Initialize. */
|
||||
if (common->match_end_ptr != 0)
|
||||
{
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
|
||||
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0);
|
||||
SELECT(SLJIT_LESS, STR_END, TMP1, 0, STR_END);
|
||||
}
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
if (char1a == char1b)
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
|
||||
else
|
||||
{
|
||||
bit1 = char1a ^ char1b;
|
||||
if (is_powerof2(bit1))
|
||||
{
|
||||
compare1_type = vector_compare_match1i;
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1));
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1));
|
||||
}
|
||||
else
|
||||
{
|
||||
compare1_type = vector_compare_match2;
|
||||
bit1 = 0;
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b));
|
||||
}
|
||||
}
|
||||
|
||||
value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO;
|
||||
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR2, 0, TMP1, 0);
|
||||
|
||||
if (char1a != char1b)
|
||||
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR4, 0, TMP2, 0);
|
||||
|
||||
if (char2a == char2b)
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
|
||||
else
|
||||
{
|
||||
bit2 = char2a ^ char2b;
|
||||
if (is_powerof2(bit2))
|
||||
{
|
||||
compare2_type = vector_compare_match1i;
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2));
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2));
|
||||
}
|
||||
else
|
||||
{
|
||||
compare2_type = vector_compare_match2;
|
||||
bit2 = 0;
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b));
|
||||
}
|
||||
}
|
||||
|
||||
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR3, 0, TMP1, 0);
|
||||
|
||||
if (char2a != char2b)
|
||||
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR5, 0, TMP2, 0);
|
||||
|
||||
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR2, SLJIT_VR2, 0);
|
||||
if (char1a != char1b)
|
||||
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR4, SLJIT_VR4, 0);
|
||||
|
||||
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR3, SLJIT_VR3, 0);
|
||||
if (char2a != char2b)
|
||||
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR5, SLJIT_VR5, 0);
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
restart = LABEL();
|
||||
#endif
|
||||
|
||||
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff);
|
||||
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? ~0x1f : ~0xf;
|
||||
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
|
||||
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
|
||||
|
||||
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0);
|
||||
|
||||
sljit_emit_simd_mov(compiler, reg_type, SLJIT_VR1, SLJIT_MEM1(STR_PTR), -(sljit_sw)diff);
|
||||
jump[1] = JUMP(SLJIT_JUMP);
|
||||
|
||||
JUMPHERE(jump[0]);
|
||||
|
||||
if (reg_type == SLJIT_SIMD_REG_256)
|
||||
{
|
||||
if (diff != 16)
|
||||
{
|
||||
/* PSLLDQ ymm1, ymm2, imm8 */
|
||||
instruction[0] = 0xc5;
|
||||
instruction[1] = (sljit_u8)(0xf9 ^ (data2_ind << 3));
|
||||
instruction[2] = 0x73;
|
||||
instruction[3] = 0xc0 | (7 << 3) | data1_ind;
|
||||
instruction[4] = diff & 0xf;
|
||||
sljit_emit_op_custom(compiler, instruction, 5);
|
||||
}
|
||||
|
||||
instruction[0] = 0xc4;
|
||||
instruction[1] = 0xe3;
|
||||
if (diff < 16)
|
||||
{
|
||||
/* VINSERTI128 xmm1, xmm2, xmm3/m128 */
|
||||
/* instruction[0] = 0xc4; */
|
||||
/* instruction[1] = 0xe3; */
|
||||
instruction[2] = (sljit_u8)(0x7d ^ (data2_ind << 3));
|
||||
instruction[3] = 0x38;
|
||||
SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR) <= 7);
|
||||
instruction[4] = 0x40 | (data2_ind << 3) | sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
|
||||
instruction[5] = (sljit_u8)(16 - diff);
|
||||
instruction[6] = 1;
|
||||
sljit_emit_op_custom(compiler, instruction, 7);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* VPERM2I128 xmm1, xmm2, xmm3/m128 */
|
||||
/* instruction[0] = 0xc4; */
|
||||
/* instruction[1] = 0xe3; */
|
||||
value = (diff == 16) ? data1_ind : data2_ind;
|
||||
instruction[2] = (sljit_u8)(0x7d ^ (value << 3));
|
||||
instruction[3] = 0x46;
|
||||
instruction[4] = 0xc0 | (data2_ind << 3) | value;
|
||||
instruction[5] = 0x08;
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* MOVDQA xmm1, xmm2/m128 */
|
||||
instruction[0] = 0x66;
|
||||
instruction[1] = 0x0f;
|
||||
instruction[2] = 0x6f;
|
||||
instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
|
||||
/* PSLLDQ xmm1, imm8 */
|
||||
/* instruction[0] = 0x66; */
|
||||
/* instruction[1] = 0x0f; */
|
||||
instruction[2] = 0x73;
|
||||
instruction[3] = 0xc0 | (7 << 3) | data2_ind;
|
||||
instruction[4] = diff;
|
||||
sljit_emit_op_custom(compiler, instruction, 5);
|
||||
}
|
||||
|
||||
JUMPHERE(jump[1]);
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? 0x1f : 0xf;
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, value);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare2_type, reg_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare1_type, reg_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
|
||||
}
|
||||
|
||||
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_VR0, SLJIT_VR0, SLJIT_VR1, 0);
|
||||
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
|
||||
|
||||
/* Ignore matches before the first STR_PTR. */
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
|
||||
jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
/* Main loop. */
|
||||
start = LABEL();
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? 32 : 16;
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
|
||||
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
|
||||
sljit_emit_simd_mov(compiler, reg_type, SLJIT_VR1, SLJIT_MEM1(STR_PTR), -(sljit_sw)diff);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare1_type, reg_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind);
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare2_type, reg_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind);
|
||||
}
|
||||
|
||||
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_VR0, SLJIT_VR0, SLJIT_VR1, 0);
|
||||
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
|
||||
|
||||
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
|
||||
|
||||
JUMPHERE(jump[0]);
|
||||
|
||||
SLJIT_ASSERT(tmp1_reg_ind < 8);
|
||||
/* BSF r32, r/m32 */
|
||||
instruction[0] = 0x0f;
|
||||
instruction[1] = 0xbc;
|
||||
instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 3);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf)
|
||||
{
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
|
||||
|
||||
jump[0] = jump_if_utf_char_start(compiler, TMP1);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
|
||||
|
||||
add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));
|
||||
|
||||
JUMPHERE(jump[0]);
|
||||
}
|
||||
#endif
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
|
||||
|
||||
if (common->match_end_ptr != 0)
|
||||
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
|
||||
}
|
||||
|
||||
#endif /* !_WIN64 */
|
||||
|
||||
#undef SIMD_COMPARE_TYPE_INDEX
|
||||
|
||||
#endif /* SLJIT_CONFIG_X86 */
|
||||
|
||||
#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__))
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
typedef union {
|
||||
unsigned int x;
|
||||
struct { unsigned char c1, c2, c3, c4; } c;
|
||||
} int_char;
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
static SLJIT_INLINE int utf_continue(PCRE2_SPTR s)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
return (*s & 0xc0) == 0x80;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
return (*s & 0xfc00) == 0xdc00;
|
||||
#else
|
||||
#error "Unknown code width"
|
||||
#endif
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
# define VECTOR_FACTOR 16
|
||||
# define vect_t uint8x16_t
|
||||
# define VLD1Q(X) vld1q_u8((sljit_u8 *)(X))
|
||||
# define VCEQQ vceqq_u8
|
||||
# define VORRQ vorrq_u8
|
||||
# define VST1Q vst1q_u8
|
||||
# define VDUPQ vdupq_n_u8
|
||||
# define VEXTQ vextq_u8
|
||||
# define VANDQ vandq_u8
|
||||
typedef union {
|
||||
uint8_t mem[16];
|
||||
uint64_t dw[2];
|
||||
} quad_word;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
# define VECTOR_FACTOR 8
|
||||
# define vect_t uint16x8_t
|
||||
# define VLD1Q(X) vld1q_u16((sljit_u16 *)(X))
|
||||
# define VCEQQ vceqq_u16
|
||||
# define VORRQ vorrq_u16
|
||||
# define VST1Q vst1q_u16
|
||||
# define VDUPQ vdupq_n_u16
|
||||
# define VEXTQ vextq_u16
|
||||
# define VANDQ vandq_u16
|
||||
typedef union {
|
||||
uint16_t mem[8];
|
||||
uint64_t dw[2];
|
||||
} quad_word;
|
||||
#else
|
||||
# define VECTOR_FACTOR 4
|
||||
# define vect_t uint32x4_t
|
||||
# define VLD1Q(X) vld1q_u32((sljit_u32 *)(X))
|
||||
# define VCEQQ vceqq_u32
|
||||
# define VORRQ vorrq_u32
|
||||
# define VST1Q vst1q_u32
|
||||
# define VDUPQ vdupq_n_u32
|
||||
# define VEXTQ vextq_u32
|
||||
# define VANDQ vandq_u32
|
||||
typedef union {
|
||||
uint32_t mem[4];
|
||||
uint64_t dw[2];
|
||||
} quad_word;
|
||||
#endif
|
||||
|
||||
#define FFCS
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCS
|
||||
|
||||
#define FFCS_2
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCS_2
|
||||
|
||||
#define FFCS_MASK
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCS_MASK
|
||||
|
||||
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
|
||||
|
||||
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
int_char ic;
|
||||
struct sljit_jump *partial_quit, *quit;
|
||||
/* Save temporary registers. */
|
||||
SLJIT_ASSERT(common->locals_size >= 2 * (int)sizeof(sljit_sw));
|
||||
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, STR_PTR, 0);
|
||||
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, TMP3, 0);
|
||||
|
||||
/* Prepare function arguments */
|
||||
OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
|
||||
GET_LOCAL_BASE(SLJIT_R1, 0, LOCAL0);
|
||||
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, offset);
|
||||
|
||||
if (char1 == char2)
|
||||
{
|
||||
ic.c.c1 = char1;
|
||||
ic.c.c2 = char2;
|
||||
OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf && offset > 0)
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_utf));
|
||||
else
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs));
|
||||
#else
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs));
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
PCRE2_UCHAR mask = char1 ^ char2;
|
||||
if (is_powerof2(mask))
|
||||
{
|
||||
ic.c.c1 = char1 | mask;
|
||||
ic.c.c2 = mask;
|
||||
OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf && offset > 0)
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask_utf));
|
||||
else
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask));
|
||||
#else
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask));
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
ic.c.c1 = char1;
|
||||
ic.c.c2 = char2;
|
||||
OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf && offset > 0)
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2_utf));
|
||||
else
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2));
|
||||
#else
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
/* Restore registers. */
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
|
||||
OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
|
||||
|
||||
/* Check return value. */
|
||||
partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
|
||||
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||
add_jump(compiler, &common->failed_match, partial_quit);
|
||||
|
||||
/* Fast forward STR_PTR to the result of memchr. */
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
|
||||
if (common->mode != PCRE2_JIT_COMPLETE)
|
||||
{
|
||||
quit = CMP(SLJIT_NOT_ZERO, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
|
||||
JUMPHERE(partial_quit);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
|
||||
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
|
||||
JUMPHERE(quit);
|
||||
}
|
||||
}
|
||||
|
||||
typedef enum {
|
||||
compare_match1,
|
||||
compare_match1i,
|
||||
compare_match2,
|
||||
} compare_type;
|
||||
|
||||
static inline vect_t fast_forward_char_pair_compare(compare_type ctype, vect_t dst, vect_t cmp1, vect_t cmp2)
|
||||
{
|
||||
if (ctype == compare_match2)
|
||||
{
|
||||
vect_t tmp = dst;
|
||||
dst = VCEQQ(dst, cmp1);
|
||||
tmp = VCEQQ(tmp, cmp2);
|
||||
dst = VORRQ(dst, tmp);
|
||||
return dst;
|
||||
}
|
||||
|
||||
if (ctype == compare_match1i)
|
||||
dst = VORRQ(dst, cmp2);
|
||||
dst = VCEQQ(dst, cmp1);
|
||||
return dst;
|
||||
}
|
||||
|
||||
static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
return 15;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
return 7;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
return 3;
|
||||
#else
|
||||
#error "Unsupported unit width"
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ARM doesn't have a shift left across lanes. */
|
||||
static SLJIT_INLINE vect_t shift_left_n_lanes(vect_t a, sljit_u8 n)
|
||||
{
|
||||
vect_t zero = VDUPQ(0);
|
||||
SLJIT_ASSERT(0 < n && n < VECTOR_FACTOR);
|
||||
/* VEXTQ takes an immediate as last argument. */
|
||||
#define C(X) case X: return VEXTQ(zero, a, VECTOR_FACTOR - X);
|
||||
switch (n)
|
||||
{
|
||||
C(1); C(2); C(3);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
C(4); C(5); C(6); C(7);
|
||||
# if PCRE2_CODE_UNIT_WIDTH != 16
|
||||
C(8); C(9); C(10); C(11); C(12); C(13); C(14); C(15);
|
||||
# endif
|
||||
#endif
|
||||
default:
|
||||
/* Based on the ASSERT(0 < n && n < VECTOR_FACTOR) above, this won't
|
||||
happen. The return is still here for compilers to not warn. */
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
#define FFCPS
|
||||
#define FFCPS_DIFF1
|
||||
#define FFCPS_CHAR1A2A
|
||||
|
||||
#define FFCPS_0
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCPS_0
|
||||
|
||||
#undef FFCPS_CHAR1A2A
|
||||
|
||||
#define FFCPS_1
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCPS_1
|
||||
|
||||
#undef FFCPS_DIFF1
|
||||
|
||||
#define FFCPS_DEFAULT
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCPS
|
||||
|
||||
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1
|
||||
|
||||
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
|
||||
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
sljit_u32 diff = IN_UCHARS(offs1 - offs2);
|
||||
struct sljit_jump *partial_quit;
|
||||
int_char ic;
|
||||
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
|
||||
SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset()));
|
||||
SLJIT_ASSERT(compiler->scratches == 5);
|
||||
|
||||
/* Save temporary register STR_PTR. */
|
||||
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, STR_PTR, 0);
|
||||
|
||||
/* Prepare arguments for the function call. */
|
||||
if (common->match_end_ptr == 0)
|
||||
OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
|
||||
else
|
||||
{
|
||||
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
|
||||
OP2(SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, SLJIT_R0, 0);
|
||||
SELECT(SLJIT_LESS, SLJIT_R0, STR_END, 0, SLJIT_R0);
|
||||
}
|
||||
|
||||
GET_LOCAL_BASE(SLJIT_R1, 0, LOCAL0);
|
||||
OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_IMM, offs1);
|
||||
OP1(SLJIT_MOV_S32, SLJIT_R3, 0, SLJIT_IMM, offs2);
|
||||
ic.c.c1 = char1a;
|
||||
ic.c.c2 = char1b;
|
||||
ic.c.c3 = char2a;
|
||||
ic.c.c4 = char2b;
|
||||
OP1(SLJIT_MOV_U32, SLJIT_R4, 0, SLJIT_IMM, ic.x);
|
||||
|
||||
if (diff == 1) {
|
||||
if (char1a == char1b && char2a == char2b) {
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf)
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0_utf));
|
||||
else
|
||||
#endif
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0));
|
||||
} else {
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf)
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1_utf));
|
||||
else
|
||||
#endif
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1));
|
||||
}
|
||||
} else {
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf)
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default_utf));
|
||||
else
|
||||
#endif
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default));
|
||||
}
|
||||
|
||||
/* Restore STR_PTR register. */
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
|
||||
|
||||
/* Check return value. */
|
||||
partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
|
||||
add_jump(compiler, &common->failed_match, partial_quit);
|
||||
|
||||
/* Fast forward STR_PTR to the result of memchr. */
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
|
||||
|
||||
JUMPHERE(partial_quit);
|
||||
}
|
||||
|
||||
#endif /* SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 */
|
||||
|
||||
#if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define VECTOR_ELEMENT_SIZE 0
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
#define VECTOR_ELEMENT_SIZE 1
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#define VECTOR_ELEMENT_SIZE 2
|
||||
#else
|
||||
#error "Unsupported unit width"
|
||||
#endif
|
||||
|
||||
static void load_from_mem_vector(struct sljit_compiler *compiler, BOOL vlbb, sljit_s32 dst_vreg,
|
||||
sljit_s32 base_reg, sljit_s32 index_reg)
|
||||
{
|
||||
sljit_u16 instruction[3];
|
||||
|
||||
instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | index_reg);
|
||||
instruction[1] = (sljit_u16)(base_reg << 12);
|
||||
instruction[2] = (sljit_u16)((0x8 << 8) | (vlbb ? 0x07 : 0x06));
|
||||
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
|
||||
static void replicate_imm_vector(struct sljit_compiler *compiler, int step, sljit_s32 dst_vreg,
|
||||
PCRE2_UCHAR chr, sljit_s32 tmp_general_reg)
|
||||
{
|
||||
sljit_u16 instruction[3];
|
||||
|
||||
SLJIT_ASSERT(step >= 0 && step <= 1);
|
||||
|
||||
if (chr < 0x7fff)
|
||||
{
|
||||
if (step == 1)
|
||||
return;
|
||||
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4));
|
||||
instruction[1] = (sljit_u16)chr;
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
return;
|
||||
}
|
||||
|
||||
if (step == 0)
|
||||
{
|
||||
OP1(SLJIT_MOV, tmp_general_reg, 0, SLJIT_IMM, chr);
|
||||
|
||||
/* VLVG */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | sljit_get_register_index(SLJIT_GP_REGISTER, tmp_general_reg));
|
||||
instruction[1] = 0;
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x22);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
return;
|
||||
}
|
||||
|
||||
/* VREP */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | dst_vreg);
|
||||
instruction[1] = 0;
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xc << 8) | 0x4d);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type,
|
||||
int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
|
||||
{
|
||||
sljit_u16 instruction[3];
|
||||
|
||||
SLJIT_ASSERT(step >= 0 && step <= 2);
|
||||
|
||||
if (step == 1)
|
||||
{
|
||||
/* VCEQ */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
|
||||
instruction[1] = (sljit_u16)(cmp1_ind << 12);
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
return;
|
||||
}
|
||||
|
||||
if (compare_type != vector_compare_match2)
|
||||
{
|
||||
if (step == 0 && compare_type == vector_compare_match1i)
|
||||
{
|
||||
/* VO */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
|
||||
instruction[1] = (sljit_u16)(cmp2_ind << 12);
|
||||
instruction[2] = (sljit_u16)((0xe << 8) | 0x6a);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
switch (step)
|
||||
{
|
||||
case 0:
|
||||
/* VCEQ */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (tmp_ind << 4) | dst_ind);
|
||||
instruction[1] = (sljit_u16)(cmp2_ind << 12);
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
return;
|
||||
|
||||
case 2:
|
||||
/* VO */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
|
||||
instruction[1] = (sljit_u16)(tmp_ind << 12);
|
||||
instruction[2] = (sljit_u16)((0xe << 8) | 0x6a);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
|
||||
|
||||
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
sljit_u16 instruction[3];
|
||||
struct sljit_label *start;
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
struct sljit_label *restart;
|
||||
#endif
|
||||
struct sljit_jump *quit;
|
||||
struct sljit_jump *partial_quit[2];
|
||||
vector_compare_type compare_type = vector_compare_match1;
|
||||
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
|
||||
sljit_s32 data_ind = 0;
|
||||
sljit_s32 tmp_ind = 1;
|
||||
sljit_s32 cmp1_ind = 2;
|
||||
sljit_s32 cmp2_ind = 3;
|
||||
sljit_s32 zero_ind = 4;
|
||||
sljit_u32 bit = 0;
|
||||
int i;
|
||||
|
||||
SLJIT_UNUSED_ARG(offset);
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
bit = char1 ^ char2;
|
||||
compare_type = vector_compare_match1i;
|
||||
|
||||
if (!is_powerof2(bit))
|
||||
{
|
||||
bit = 0;
|
||||
compare_type = vector_compare_match2;
|
||||
}
|
||||
}
|
||||
|
||||
partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||
add_jump(compiler, &common->failed_match, partial_quit[0]);
|
||||
|
||||
/* First part (unaligned start) */
|
||||
|
||||
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4));
|
||||
instruction[1] = (sljit_u16)(char1 | bit);
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4));
|
||||
instruction[1] = (sljit_u16)(bit != 0 ? bit : char2);
|
||||
/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
#else /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP1);
|
||||
|
||||
if (char1 != char2)
|
||||
replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP1);
|
||||
}
|
||||
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
|
||||
if (compare_type == vector_compare_match2)
|
||||
{
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
|
||||
instruction[1] = 0;
|
||||
instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
restart = LABEL();
|
||||
#endif
|
||||
|
||||
load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0);
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15);
|
||||
|
||||
if (compare_type != vector_compare_match2)
|
||||
{
|
||||
if (compare_type == vector_compare_match1i)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VFEE */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||
instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < 3; i++)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VFENE */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
||||
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
/* VLGVB */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind);
|
||||
instruction[1] = 7;
|
||||
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||
quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16);
|
||||
|
||||
/* Second part (aligned) */
|
||||
start = LABEL();
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||
|
||||
partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||
add_jump(compiler, &common->failed_match, partial_quit[1]);
|
||||
|
||||
load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0);
|
||||
|
||||
if (compare_type != vector_compare_match2)
|
||||
{
|
||||
if (compare_type == vector_compare_match1i)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VFEE */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||
instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < 3; i++)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VFENE */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
||||
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW);
|
||||
JUMPTO(SLJIT_OVERFLOW, start);
|
||||
|
||||
/* VLGVB */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind);
|
||||
instruction[1] = 7;
|
||||
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||
|
||||
JUMPHERE(quit);
|
||||
|
||||
if (common->mode != PCRE2_JIT_COMPLETE)
|
||||
{
|
||||
JUMPHERE(partial_quit[0]);
|
||||
JUMPHERE(partial_quit[1]);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
|
||||
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
|
||||
}
|
||||
else
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf && offset > 0)
|
||||
{
|
||||
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
|
||||
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
|
||||
|
||||
quit = jump_if_utf_char_start(compiler, TMP1);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||
JUMPTO(SLJIT_JUMP, restart);
|
||||
|
||||
JUMPHERE(quit);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD 1
|
||||
|
||||
static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
sljit_u16 instruction[3];
|
||||
struct sljit_label *start;
|
||||
struct sljit_jump *quit;
|
||||
jump_list *not_found = NULL;
|
||||
vector_compare_type compare_type = vector_compare_match1;
|
||||
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||
sljit_s32 tmp3_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP3);
|
||||
sljit_s32 data_ind = 0;
|
||||
sljit_s32 tmp_ind = 1;
|
||||
sljit_s32 cmp1_ind = 2;
|
||||
sljit_s32 cmp2_ind = 3;
|
||||
sljit_s32 zero_ind = 4;
|
||||
sljit_u32 bit = 0;
|
||||
int i;
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
bit = char1 ^ char2;
|
||||
compare_type = vector_compare_match1i;
|
||||
|
||||
if (!is_powerof2(bit))
|
||||
{
|
||||
bit = 0;
|
||||
compare_type = vector_compare_match2;
|
||||
}
|
||||
}
|
||||
|
||||
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||
|
||||
/* First part (unaligned start) */
|
||||
|
||||
OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, 16);
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4));
|
||||
instruction[1] = (sljit_u16)(char1 | bit);
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4));
|
||||
instruction[1] = (sljit_u16)(bit != 0 ? bit : char2);
|
||||
/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
#else /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP3);
|
||||
|
||||
if (char1 != char2)
|
||||
replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP3);
|
||||
}
|
||||
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
|
||||
if (compare_type == vector_compare_match2)
|
||||
{
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
|
||||
instruction[1] = 0;
|
||||
instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0);
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15);
|
||||
|
||||
if (compare_type != vector_compare_match2)
|
||||
{
|
||||
if (compare_type == vector_compare_match1i)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VFEE */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||
instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < 3; i++)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VFENE */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
||||
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
/* VLGVB */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind);
|
||||
instruction[1] = 7;
|
||||
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
|
||||
quit = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0);
|
||||
|
||||
OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 16);
|
||||
|
||||
/* Second part (aligned) */
|
||||
start = LABEL();
|
||||
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 16);
|
||||
|
||||
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||
|
||||
load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0);
|
||||
|
||||
if (compare_type != vector_compare_match2)
|
||||
{
|
||||
if (compare_type == vector_compare_match1i)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VFEE */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||
instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < 3; i++)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VFENE */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
||||
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW);
|
||||
JUMPTO(SLJIT_OVERFLOW, start);
|
||||
|
||||
/* VLGVB */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind);
|
||||
instruction[1] = 7;
|
||||
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
|
||||
|
||||
JUMPHERE(quit);
|
||||
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||
|
||||
return not_found;
|
||||
}
|
||||
|
||||
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1
|
||||
|
||||
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
|
||||
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
sljit_u16 instruction[3];
|
||||
struct sljit_label *start;
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
struct sljit_label *restart;
|
||||
#endif
|
||||
struct sljit_jump *quit;
|
||||
struct sljit_jump *jump[2];
|
||||
vector_compare_type compare1_type = vector_compare_match1;
|
||||
vector_compare_type compare2_type = vector_compare_match1;
|
||||
sljit_u32 bit1 = 0;
|
||||
sljit_u32 bit2 = 0;
|
||||
sljit_s32 diff = IN_UCHARS(offs2 - offs1);
|
||||
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||
sljit_s32 tmp2_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP2);
|
||||
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
|
||||
sljit_s32 data1_ind = 0;
|
||||
sljit_s32 data2_ind = 1;
|
||||
sljit_s32 tmp1_ind = 2;
|
||||
sljit_s32 tmp2_ind = 3;
|
||||
sljit_s32 cmp1a_ind = 4;
|
||||
sljit_s32 cmp1b_ind = 5;
|
||||
sljit_s32 cmp2a_ind = 6;
|
||||
sljit_s32 cmp2b_ind = 7;
|
||||
sljit_s32 zero_ind = 8;
|
||||
int i;
|
||||
|
||||
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
|
||||
SLJIT_ASSERT(-diff <= (sljit_s32)IN_UCHARS(max_fast_forward_char_pair_offset()));
|
||||
SLJIT_ASSERT(tmp1_reg_ind != 0 && tmp2_reg_ind != 0);
|
||||
|
||||
if (char1a != char1b)
|
||||
{
|
||||
bit1 = char1a ^ char1b;
|
||||
compare1_type = vector_compare_match1i;
|
||||
|
||||
if (!is_powerof2(bit1))
|
||||
{
|
||||
bit1 = 0;
|
||||
compare1_type = vector_compare_match2;
|
||||
}
|
||||
}
|
||||
|
||||
if (char2a != char2b)
|
||||
{
|
||||
bit2 = char2a ^ char2b;
|
||||
compare2_type = vector_compare_match1i;
|
||||
|
||||
if (!is_powerof2(bit2))
|
||||
{
|
||||
bit2 = 0;
|
||||
compare2_type = vector_compare_match2;
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize. */
|
||||
if (common->match_end_ptr != 0)
|
||||
{
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
|
||||
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0);
|
||||
SELECT(SLJIT_LESS, STR_END, TMP1, 0, STR_END);
|
||||
}
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15);
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
|
||||
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
|
||||
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (cmp1a_ind << 4));
|
||||
instruction[1] = (sljit_u16)(char1a | bit1);
|
||||
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
if (char1a != char1b)
|
||||
{
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (cmp1b_ind << 4));
|
||||
instruction[1] = (sljit_u16)(bit1 != 0 ? bit1 : char1b);
|
||||
/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (cmp2a_ind << 4));
|
||||
instruction[1] = (sljit_u16)(char2a | bit2);
|
||||
/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
if (char2a != char2b)
|
||||
{
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (cmp2b_ind << 4));
|
||||
instruction[1] = (sljit_u16)(bit2 != 0 ? bit2 : char2b);
|
||||
/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
}
|
||||
|
||||
#else /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
replicate_imm_vector(compiler, i, cmp1a_ind, char1a | bit1, TMP1);
|
||||
|
||||
if (char1a != char1b)
|
||||
replicate_imm_vector(compiler, i, cmp1b_ind, bit1 != 0 ? bit1 : char1b, TMP1);
|
||||
|
||||
replicate_imm_vector(compiler, i, cmp2a_ind, char2a | bit2, TMP1);
|
||||
|
||||
if (char2a != char2b)
|
||||
replicate_imm_vector(compiler, i, cmp2b_ind, bit2 != 0 ? bit2 : char2b, TMP1);
|
||||
}
|
||||
|
||||
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
|
||||
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
|
||||
/* VREPI */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
|
||||
instruction[1] = 0;
|
||||
instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
restart = LABEL();
|
||||
#endif
|
||||
|
||||
jump[0] = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0);
|
||||
load_from_mem_vector(compiler, TRUE, data2_ind, tmp1_reg_ind, 0);
|
||||
jump[1] = JUMP(SLJIT_JUMP);
|
||||
JUMPHERE(jump[0]);
|
||||
load_from_mem_vector(compiler, FALSE, data2_ind, tmp1_reg_ind, 0);
|
||||
JUMPHERE(jump[1]);
|
||||
|
||||
load_from_mem_vector(compiler, TRUE, data1_ind, str_ptr_reg_ind, 0);
|
||||
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 16);
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
|
||||
}
|
||||
|
||||
/* VN */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
|
||||
instruction[1] = (sljit_u16)(data2_ind << 12);
|
||||
instruction[2] = (sljit_u16)((0xe << 8) | 0x68);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
/* VFENE */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
|
||||
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
||||
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
/* VLGVB */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data1_ind);
|
||||
instruction[1] = 7;
|
||||
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||
quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16);
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, diff);
|
||||
|
||||
/* Main loop. */
|
||||
start = LABEL();
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
load_from_mem_vector(compiler, FALSE, data1_ind, str_ptr_reg_ind, 0);
|
||||
load_from_mem_vector(compiler, FALSE, data2_ind, str_ptr_reg_ind, tmp1_reg_ind);
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
|
||||
}
|
||||
|
||||
/* VN */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
|
||||
instruction[1] = (sljit_u16)(data2_ind << 12);
|
||||
instruction[2] = (sljit_u16)((0xe << 8) | 0x68);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
/* VFENE */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
|
||||
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
||||
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW);
|
||||
JUMPTO(SLJIT_OVERFLOW, start);
|
||||
|
||||
/* VLGVB */
|
||||
instruction[0] = (sljit_u16)(0xe700 | (tmp2_reg_ind << 4) | data1_ind);
|
||||
instruction[1] = 7;
|
||||
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
||||
sljit_emit_op_custom(compiler, instruction, 6);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
JUMPHERE(quit);
|
||||
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf)
|
||||
{
|
||||
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
|
||||
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
|
||||
|
||||
quit = jump_if_utf_char_start(compiler, TMP1);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
/* TMP1 contains diff. */
|
||||
OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15);
|
||||
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
|
||||
JUMPTO(SLJIT_JUMP, restart);
|
||||
|
||||
JUMPHERE(quit);
|
||||
}
|
||||
#endif
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
|
||||
|
||||
if (common->match_end_ptr != 0)
|
||||
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
|
||||
}
|
||||
|
||||
#endif /* SLJIT_CONFIG_S390X */
|
||||
|
||||
#if (defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64)
|
||||
|
||||
#ifdef __linux__
|
||||
/* Using getauxval(AT_HWCAP) under Linux for detecting whether LSX is available */
|
||||
#include <sys/auxv.h>
|
||||
#define LOONGARCH_HWCAP_LSX (1 << 4)
|
||||
#define HAS_LSX_SUPPORT ((getauxval(AT_HWCAP) & LOONGARCH_HWCAP_LSX) != 0)
|
||||
#else
|
||||
#define HAS_LSX_SUPPORT 0
|
||||
#endif
|
||||
|
||||
typedef sljit_ins sljit_u32;
|
||||
|
||||
#define SI12_IMM_MASK 0x003ffc00
|
||||
#define UI5_IMM_MASK 0x00007c00
|
||||
#define UI2_IMM_MASK 0x00000c00
|
||||
|
||||
#define VD(vd) ((sljit_ins)vd << 0)
|
||||
#define VJ(vj) ((sljit_ins)vj << 5)
|
||||
#define VK(vk) ((sljit_ins)vk << 10)
|
||||
#define RD_V(rd) ((sljit_ins)rd << 0)
|
||||
#define RJ_V(rj) ((sljit_ins)rj << 5)
|
||||
|
||||
#define IMM_SI12(imm) (((sljit_ins)(imm) << 10) & SI12_IMM_MASK)
|
||||
#define IMM_UI5(imm) (((sljit_ins)(imm) << 10) & UI5_IMM_MASK)
|
||||
#define IMM_UI2(imm) (((sljit_ins)(imm) << 10) & UI2_IMM_MASK)
|
||||
|
||||
// LSX OPCODES:
|
||||
#define VLD 0x2c000000
|
||||
#define VOR_V 0x71268000
|
||||
#define VAND_V 0x71260000
|
||||
#define VBSLL_V 0x728e0000
|
||||
#define VMSKLTZ_B 0x729c4000
|
||||
#define VPICKVE2GR_WU 0x72f3e000
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define VREPLGR2VR 0x729f0000
|
||||
#define VSEQ 0x70000000
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
#define VREPLGR2VR 0x729f0400
|
||||
#define VSEQ 0x70008000
|
||||
#else
|
||||
#define VREPLGR2VR 0x729f0800
|
||||
#define VSEQ 0x70010000
|
||||
#endif
|
||||
|
||||
static void fast_forward_char_pair_lsx_compare(struct sljit_compiler *compiler, vector_compare_type compare_type,
|
||||
sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
|
||||
{
|
||||
if (compare_type != vector_compare_match2)
|
||||
{
|
||||
if (compare_type == vector_compare_match1i)
|
||||
{
|
||||
/* VOR.V vd, vj, vk */
|
||||
push_inst(compiler, VOR_V | VD(dst_ind) | VJ(cmp2_ind) | VK(dst_ind));
|
||||
}
|
||||
|
||||
/* VSEQ.B/H/W vd, vj, vk */
|
||||
push_inst(compiler, VSEQ | VD(dst_ind) | VJ(dst_ind) | VK(cmp1_ind));
|
||||
return;
|
||||
}
|
||||
|
||||
/* VBSLL.V vd, vj, ui5 */
|
||||
push_inst(compiler, VBSLL_V | VD(tmp_ind) | VJ(dst_ind) | IMM_UI5(0));
|
||||
|
||||
/* VSEQ.B/H/W vd, vj, vk */
|
||||
push_inst(compiler, VSEQ | VD(dst_ind) | VJ(dst_ind) | VK(cmp1_ind));
|
||||
|
||||
/* VSEQ.B/H/W vd, vj, vk */
|
||||
push_inst(compiler, VSEQ | VD(tmp_ind) | VJ(tmp_ind) | VK(cmp2_ind));
|
||||
|
||||
/* VOR vd, vj, vk */
|
||||
push_inst(compiler, VOR_V | VD(dst_ind) | VJ(tmp_ind) | VK(dst_ind));
|
||||
return;
|
||||
}
|
||||
|
||||
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD HAS_LSX_SUPPORT
|
||||
|
||||
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
struct sljit_label *start;
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
struct sljit_label *restart;
|
||||
#endif
|
||||
struct sljit_jump *quit;
|
||||
struct sljit_jump *partial_quit[2];
|
||||
vector_compare_type compare_type = vector_compare_match1;
|
||||
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
|
||||
sljit_s32 data_ind = 0;
|
||||
sljit_s32 tmp_ind = 1;
|
||||
sljit_s32 cmp1_ind = 2;
|
||||
sljit_s32 cmp2_ind = 3;
|
||||
sljit_u32 bit = 0;
|
||||
|
||||
SLJIT_UNUSED_ARG(offset);
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
bit = char1 ^ char2;
|
||||
compare_type = vector_compare_match1i;
|
||||
|
||||
if (!is_powerof2(bit))
|
||||
{
|
||||
bit = 0;
|
||||
compare_type = vector_compare_match2;
|
||||
}
|
||||
}
|
||||
|
||||
partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||
add_jump(compiler, &common->failed_match, partial_quit[0]);
|
||||
|
||||
/* First part (unaligned start) */
|
||||
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1 | bit);
|
||||
|
||||
/* VREPLGR2VR.B/H/W vd, rj */
|
||||
push_inst(compiler, VREPLGR2VR | VD(cmp1_ind) | RJ_V(tmp1_reg_ind));
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, bit != 0 ? bit : char2);
|
||||
|
||||
/* VREPLGR2VR.B/H/W vd, rj */
|
||||
push_inst(compiler, VREPLGR2VR | VD(cmp2_ind) | RJ_V(tmp1_reg_ind));
|
||||
}
|
||||
|
||||
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
restart = LABEL();
|
||||
#endif
|
||||
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
/* VLD vd, rj, si12 */
|
||||
push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
|
||||
fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VMSKLTZ.B vd, vj */
|
||||
push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
|
||||
|
||||
/* VPICKVE2GR.WU rd, vj, ui2 */
|
||||
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
|
||||
quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
/* Second part (aligned) */
|
||||
start = LABEL();
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||
|
||||
partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||
add_jump(compiler, &common->failed_match, partial_quit[1]);
|
||||
|
||||
/* VLD vd, rj, si12 */
|
||||
push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
|
||||
fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VMSKLTZ.B vd, vj */
|
||||
push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
|
||||
|
||||
/* VPICKVE2GR.WU rd, vj, ui2 */
|
||||
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
|
||||
|
||||
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
|
||||
|
||||
JUMPHERE(quit);
|
||||
|
||||
/* CTZ.W rd, rj */
|
||||
push_inst(compiler, CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind));
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||
|
||||
if (common->mode != PCRE2_JIT_COMPLETE)
|
||||
{
|
||||
JUMPHERE(partial_quit[0]);
|
||||
JUMPHERE(partial_quit[1]);
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
|
||||
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
|
||||
}
|
||||
else
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf && offset > 0)
|
||||
{
|
||||
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
|
||||
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
|
||||
|
||||
quit = jump_if_utf_char_start(compiler, TMP1);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
|
||||
JUMPTO(SLJIT_JUMP, restart);
|
||||
|
||||
JUMPHERE(quit);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD HAS_LSX_SUPPORT
|
||||
|
||||
static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
struct sljit_label *start;
|
||||
struct sljit_jump *quit;
|
||||
jump_list *not_found = NULL;
|
||||
vector_compare_type compare_type = vector_compare_match1;
|
||||
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
|
||||
sljit_s32 data_ind = 0;
|
||||
sljit_s32 tmp_ind = 1;
|
||||
sljit_s32 cmp1_ind = 2;
|
||||
sljit_s32 cmp2_ind = 3;
|
||||
sljit_u32 bit = 0;
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
bit = char1 ^ char2;
|
||||
compare_type = vector_compare_match1i;
|
||||
|
||||
if (!is_powerof2(bit))
|
||||
{
|
||||
bit = 0;
|
||||
compare_type = vector_compare_match2;
|
||||
}
|
||||
}
|
||||
|
||||
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||
OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
|
||||
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
|
||||
|
||||
/* First part (unaligned start) */
|
||||
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1 | bit);
|
||||
|
||||
/* VREPLGR2VR vd, rj */
|
||||
push_inst(compiler, VREPLGR2VR | VD(cmp1_ind) | RJ_V(tmp1_reg_ind));
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, bit != 0 ? bit : char2);
|
||||
/* VREPLGR2VR vd, rj */
|
||||
push_inst(compiler, VREPLGR2VR | VD(cmp2_ind) | RJ_V(tmp1_reg_ind));
|
||||
}
|
||||
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
/* VLD vd, rj, si12 */
|
||||
push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
|
||||
fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VMSKLTZ.B vd, vj */
|
||||
push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
|
||||
|
||||
/* VPICKVE2GR.WU rd, vj, ui2 */
|
||||
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
|
||||
quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
/* Second part (aligned) */
|
||||
start = LABEL();
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||
|
||||
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
/* VLD vd, rj, si12 */
|
||||
push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
|
||||
fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* VMSKLTZ.B vd, vj */
|
||||
push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
|
||||
|
||||
/* VPICKVE2GR.WU rd, vj, ui2 */
|
||||
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
|
||||
|
||||
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
|
||||
|
||||
JUMPHERE(quit);
|
||||
|
||||
/* CTZ.W rd, rj */
|
||||
push_inst(compiler, CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind));
|
||||
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0);
|
||||
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
|
||||
return not_found;
|
||||
}
|
||||
|
||||
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD HAS_LSX_SUPPORT
|
||||
|
||||
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
|
||||
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
vector_compare_type compare1_type = vector_compare_match1;
|
||||
vector_compare_type compare2_type = vector_compare_match1;
|
||||
sljit_u32 bit1 = 0;
|
||||
sljit_u32 bit2 = 0;
|
||||
sljit_u32 diff = IN_UCHARS(offs1 - offs2);
|
||||
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||
sljit_s32 tmp2_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP2);
|
||||
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
|
||||
sljit_s32 data1_ind = 0;
|
||||
sljit_s32 data2_ind = 1;
|
||||
sljit_s32 tmp1_ind = 2;
|
||||
sljit_s32 tmp2_ind = 3;
|
||||
sljit_s32 cmp1a_ind = 4;
|
||||
sljit_s32 cmp1b_ind = 5;
|
||||
sljit_s32 cmp2a_ind = 6;
|
||||
sljit_s32 cmp2b_ind = 7;
|
||||
struct sljit_label *start;
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
struct sljit_label *restart;
|
||||
#endif
|
||||
struct sljit_jump *jump[2];
|
||||
|
||||
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
|
||||
SLJIT_ASSERT(diff <= (unsigned)IN_UCHARS(max_fast_forward_char_pair_offset()));
|
||||
|
||||
/* Initialize. */
|
||||
if (common->match_end_ptr != 0)
|
||||
{
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
|
||||
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
|
||||
|
||||
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0);
|
||||
SELECT(SLJIT_LESS, STR_END, TMP1, 0, STR_END);
|
||||
}
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
if (char1a == char1b)
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1a);
|
||||
else
|
||||
{
|
||||
bit1 = char1a ^ char1b;
|
||||
if (is_powerof2(bit1))
|
||||
{
|
||||
compare1_type = vector_compare_match1i;
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1a | bit1);
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, bit1);
|
||||
}
|
||||
else
|
||||
{
|
||||
compare1_type = vector_compare_match2;
|
||||
bit1 = 0;
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1a);
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, char1b);
|
||||
}
|
||||
}
|
||||
|
||||
/* VREPLGR2VR vd, rj */
|
||||
push_inst(compiler, VREPLGR2VR | VD(cmp1a_ind) | RJ_V(tmp1_reg_ind));
|
||||
|
||||
if (char1a != char1b)
|
||||
{
|
||||
/* VREPLGR2VR vd, rj */
|
||||
push_inst(compiler, VREPLGR2VR | VD(cmp1b_ind) | RJ_V(tmp2_reg_ind));
|
||||
}
|
||||
|
||||
if (char2a == char2b)
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char2a);
|
||||
else
|
||||
{
|
||||
bit2 = char2a ^ char2b;
|
||||
if (is_powerof2(bit2))
|
||||
{
|
||||
compare2_type = vector_compare_match1i;
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char2a | bit2);
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, bit2);
|
||||
}
|
||||
else
|
||||
{
|
||||
compare2_type = vector_compare_match2;
|
||||
bit2 = 0;
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char2a);
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, char2b);
|
||||
}
|
||||
}
|
||||
|
||||
/* VREPLGR2VR vd, rj */
|
||||
push_inst(compiler, VREPLGR2VR | VD(cmp2a_ind) | RJ_V(tmp1_reg_ind));
|
||||
|
||||
if (char2a != char2b)
|
||||
{
|
||||
/* VREPLGR2VR vd, rj */
|
||||
push_inst(compiler, VREPLGR2VR | VD(cmp2b_ind) | RJ_V(tmp2_reg_ind));
|
||||
}
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
restart = LABEL();
|
||||
#endif
|
||||
|
||||
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff);
|
||||
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
/* VLD vd, rj, si12 */
|
||||
push_inst(compiler, VLD | VD(data1_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
|
||||
|
||||
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0);
|
||||
|
||||
/* VLD vd, rj, si12 */
|
||||
push_inst(compiler, VLD | VD(data2_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(-(sljit_s8)diff));
|
||||
jump[1] = JUMP(SLJIT_JUMP);
|
||||
|
||||
JUMPHERE(jump[0]);
|
||||
|
||||
/* VBSLL.V vd, vj, ui5 */
|
||||
push_inst(compiler, VBSLL_V | VD(data2_ind) | VJ(data1_ind) | IMM_UI5(diff));
|
||||
|
||||
JUMPHERE(jump[1]);
|
||||
|
||||
fast_forward_char_pair_lsx_compare(compiler, compare2_type, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
|
||||
fast_forward_char_pair_lsx_compare(compiler, compare1_type, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
|
||||
|
||||
/* VAND vd, vj, vk */
|
||||
push_inst(compiler, VOR_V | VD(data1_ind) | VJ(data1_ind) | VK(data2_ind));
|
||||
|
||||
/* VMSKLTZ.B vd, vj */
|
||||
push_inst(compiler, VMSKLTZ_B | VD(tmp1_ind) | VJ(data1_ind));
|
||||
|
||||
/* VPICKVE2GR.WU rd, vj, ui2 */
|
||||
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp1_ind) | IMM_UI2(0));
|
||||
|
||||
/* Ignore matches before the first STR_PTR. */
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
|
||||
jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
/* Main loop. */
|
||||
start = LABEL();
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
/* VLD vd, rj, si12 */
|
||||
push_inst(compiler, VLD | VD(data1_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
|
||||
push_inst(compiler, VLD | VD(data2_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(-(sljit_s8)diff));
|
||||
|
||||
fast_forward_char_pair_lsx_compare(compiler, compare1_type, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind);
|
||||
fast_forward_char_pair_lsx_compare(compiler, compare2_type, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind);
|
||||
|
||||
/* VAND.V vd, vj, vk */
|
||||
push_inst(compiler, VAND_V | VD(data1_ind) | VJ(data1_ind) | VK(data2_ind));
|
||||
|
||||
/* VMSKLTZ.B vd, vj */
|
||||
push_inst(compiler, VMSKLTZ_B | VD(tmp1_ind) | VJ(data1_ind));
|
||||
|
||||
/* VPICKVE2GR.WU rd, vj, ui2 */
|
||||
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp1_ind) | IMM_UI2(0));
|
||||
|
||||
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
|
||||
|
||||
JUMPHERE(jump[0]);
|
||||
|
||||
/* CTZ.W rd, rj */
|
||||
push_inst(compiler, CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind));
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf)
|
||||
{
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
|
||||
|
||||
jump[0] = jump_if_utf_char_start(compiler, TMP1);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
|
||||
|
||||
add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));
|
||||
|
||||
JUMPHERE(jump[0]);
|
||||
}
|
||||
#endif
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
|
||||
|
||||
if (common->match_end_ptr != 0)
|
||||
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
|
||||
}
|
||||
|
||||
#endif /* SLJIT_CONFIG_LOONGARCH_64 */
|
||||
|
||||
#endif /* !SUPPORT_VALGRIND */
|
||||
2541
3rd/pcre2/src/pcre2_jit_test.c
Normal file
2541
3rd/pcre2/src/pcre2_jit_test.c
Normal file
@@ -0,0 +1,2541 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define PCRE2_CODE_UNIT_WIDTH 0
|
||||
#include "pcre2.h"
|
||||
|
||||
/*
|
||||
Letter characters:
|
||||
\xe6\x92\xad = 0x64ad = 25773 (kanji)
|
||||
Non-letter characters:
|
||||
\xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
|
||||
\xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
|
||||
\xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
|
||||
\xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
|
||||
Newlines:
|
||||
\xc2\x85 = 0x85 = 133 (NExt Line = NEL)
|
||||
\xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
|
||||
Othercase pairs:
|
||||
\xc3\xa9 = 0xe9 = 233 (e')
|
||||
\xc3\x89 = 0xc9 = 201 (E')
|
||||
\xc3\xa1 = 0xe1 = 225 (a')
|
||||
\xc3\x81 = 0xc1 = 193 (A')
|
||||
\x53 = 0x53 = S
|
||||
\x73 = 0x73 = s
|
||||
\xc5\xbf = 0x17f = 383 (long S)
|
||||
\xc8\xba = 0x23a = 570
|
||||
\xe2\xb1\xa5 = 0x2c65 = 11365
|
||||
\xe1\xbd\xb8 = 0x1f78 = 8056
|
||||
\xe1\xbf\xb8 = 0x1ff8 = 8184
|
||||
\xf0\x90\x90\x80 = 0x10400 = 66560
|
||||
\xf0\x90\x90\xa8 = 0x10428 = 66600
|
||||
\xc7\x84 = 0x1c4 = 452
|
||||
\xc7\x85 = 0x1c5 = 453
|
||||
\xc7\x86 = 0x1c6 = 454
|
||||
Caseless sets:
|
||||
ucp_Armenian - \x{531}-\x{556} -> \x{561}-\x{586}
|
||||
ucp_Coptic - \x{2c80}-\x{2ce3} -> caseless: XOR 0x1
|
||||
ucp_Latin - \x{ff21}-\x{ff3a} -> \x{ff41]-\x{ff5a}
|
||||
|
||||
Mark property:
|
||||
\xcc\x8d = 0x30d = 781
|
||||
Special:
|
||||
\xc2\x80 = 0x80 = 128 (lowest 2 byte character)
|
||||
\xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
|
||||
\xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
|
||||
\xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
|
||||
\xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
|
||||
\xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
|
||||
*/
|
||||
|
||||
static int regression_tests(void);
|
||||
static int invalid_utf8_regression_tests(void);
|
||||
static int invalid_utf16_regression_tests(void);
|
||||
static int invalid_utf32_regression_tests(void);
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int jit = 0;
|
||||
#if defined SUPPORT_PCRE2_8
|
||||
pcre2_config_8(PCRE2_CONFIG_JIT, &jit);
|
||||
#elif defined SUPPORT_PCRE2_16
|
||||
pcre2_config_16(PCRE2_CONFIG_JIT, &jit);
|
||||
#elif defined SUPPORT_PCRE2_32
|
||||
pcre2_config_32(PCRE2_CONFIG_JIT, &jit);
|
||||
#endif
|
||||
if (!jit) {
|
||||
printf("JIT must be enabled to run pcre2_jit_test\n");
|
||||
return 1;
|
||||
}
|
||||
return regression_tests()
|
||||
| invalid_utf8_regression_tests()
|
||||
| invalid_utf16_regression_tests()
|
||||
| invalid_utf32_regression_tests();
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------------- */
|
||||
|
||||
#if !(defined SUPPORT_PCRE2_8) && !(defined SUPPORT_PCRE2_16) && !(defined SUPPORT_PCRE2_32)
|
||||
#error SUPPORT_PCRE2_8 or SUPPORT_PCRE2_16 or SUPPORT_PCRE2_32 must be defined
|
||||
#endif
|
||||
|
||||
#define MU (PCRE2_MULTILINE | PCRE2_UTF)
|
||||
#define MUP (PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
|
||||
#define CMU (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF)
|
||||
#define CMUP (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
|
||||
#define M (PCRE2_MULTILINE)
|
||||
#define MP (PCRE2_MULTILINE | PCRE2_UCP)
|
||||
#define U (PCRE2_UTF)
|
||||
#define CM (PCRE2_CASELESS | PCRE2_MULTILINE)
|
||||
|
||||
#define BSR(x) ((x) << 16)
|
||||
#define A PCRE2_NEWLINE_ANYCRLF
|
||||
|
||||
#define GET_NEWLINE(x) ((x) & 0xffff)
|
||||
#define GET_BSR(x) ((x) >> 16)
|
||||
|
||||
#define OFFSET_MASK 0x00ffff
|
||||
#define F_NO8 0x010000
|
||||
#define F_NO16 0x020000
|
||||
#define F_NO32 0x020000
|
||||
#define F_NOMATCH 0x040000
|
||||
#define F_DIFF 0x080000
|
||||
#define F_FORCECONV 0x100000
|
||||
#define F_PROPERTY 0x200000
|
||||
|
||||
struct regression_test_case {
|
||||
uint32_t compile_options;
|
||||
int newline;
|
||||
int match_options;
|
||||
int start_offset;
|
||||
const char *pattern;
|
||||
const char *input;
|
||||
};
|
||||
|
||||
static struct regression_test_case regression_test_cases[] = {
|
||||
/* Constant strings. */
|
||||
{ MU, A, 0, 0, "AbC", "AbAbC" },
|
||||
{ MU, A, 0, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
|
||||
{ CMU, A, 0, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
|
||||
{ M, A, 0, 0, "[^a]", "aAbB" },
|
||||
{ CM, A, 0, 0, "[^m]", "mMnN" },
|
||||
{ M, A, 0, 0, "a[^b][^#]", "abacd" },
|
||||
{ CM, A, 0, 0, "A[^B][^E]", "abacd" },
|
||||
{ CMU, A, 0, 0, "[^x][^#]", "XxBll" },
|
||||
{ MU, A, 0, 0, "[^a]", "aaa\xc3\xa1#Ab" },
|
||||
{ CMU, A, 0, 0, "[^A]", "aA\xe6\x92\xad" },
|
||||
{ MU, A, 0, 0, "\\W(\\W)?\\w", "\r\n+bc" },
|
||||
{ MU, A, 0, 0, "\\W(\\W)?\\w", "\n\r+bc" },
|
||||
{ MU, A, 0, 0, "\\W(\\W)?\\w", "\r\r+bc" },
|
||||
{ MU, A, 0, 0, "\\W(\\W)?\\w", "\n\n+bc" },
|
||||
{ MU, A, 0, 0, "[axd]", "sAXd" },
|
||||
{ CMU, A, 0, 0, "[axd]", "sAXd" },
|
||||
{ CMU, A, 0, 0 | F_NOMATCH, "[^axd]", "DxA" },
|
||||
{ MU, A, 0, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
|
||||
{ MU, A, 0, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
|
||||
{ CMU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
|
||||
{ MU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
|
||||
{ MU, A, 0, 0, "[^a]", "\xc2\x80[]" },
|
||||
{ CMU, A, 0, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
|
||||
{ CM, A, 0, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
|
||||
{ PCRE2_CASELESS, 0, 0, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
|
||||
{ PCRE2_CASELESS, 0, 0, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
|
||||
{ PCRE2_CASELESS, 0, 0, 0, "a1", "Aa1" },
|
||||
#ifndef NEVER_BACKSLASH_C
|
||||
{ M, A, 0, 0, "\\Ca", "cda" },
|
||||
{ CM, A, 0, 0, "\\Ca", "CDA" },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "\\Cx", "cda" },
|
||||
{ CM, A, 0, 0 | F_NOMATCH, "\\Cx", "CDA" },
|
||||
#endif /* !NEVER_BACKSLASH_C */
|
||||
{ CMUP, A, 0, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
|
||||
{ CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
|
||||
{ CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
|
||||
{ CMUP, A, 0, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
|
||||
{ M, A, 0, 0, "[3-57-9]", "5" },
|
||||
{ PCRE2_AUTO_CALLOUT, A, 0, 0, "12345678901234567890123456789012345678901234567890123456789012345678901234567890",
|
||||
"12345678901234567890123456789012345678901234567890123456789012345678901234567890" },
|
||||
{ 0, A, 0, 0, "..a.......b", "bbbbbbbbbbbbbbbbbbbbbabbbbbbbb" },
|
||||
{ 0, A, 0, 0, "..a.....b", "bbbbbbbbbbbbbbbbbbbbbabbbbbbbb" },
|
||||
|
||||
/* Assertions. */
|
||||
{ MU, A, 0, 0, "\\b[^A]", "A_B#" },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "\\b\\W", "\n*" },
|
||||
{ MU, A, 0, 0, "\\B[^,]\\b[^s]\\b", "#X" },
|
||||
{ MP, A, 0, 0, "\\B", "_\xa1" },
|
||||
{ MP, A, 0, 0 | F_PROPERTY, "\\b_\\b[,A]\\B", "_," },
|
||||
{ MUP, A, 0, 0, "\\b", "\xe6\x92\xad!" },
|
||||
{ MUP, A, 0, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
|
||||
{ MUP, A, 0, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
|
||||
{ MUP, A, 0, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
|
||||
{ CMUP, A, 0, 0, "\\By", "\xf0\x90\x90\xa8y" },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "\\R^", "\n" },
|
||||
{ M, A, 0, 1 | F_NOMATCH, "^", "\n" },
|
||||
{ 0, 0, 0, 0, "^ab", "ab" },
|
||||
{ 0, 0, 0, 0 | F_NOMATCH, "^ab", "aab" },
|
||||
{ M, PCRE2_NEWLINE_CRLF, 0, 0, "^a", "\r\raa\n\naa\r\naa" },
|
||||
{ MU, A, 0, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
|
||||
{ M, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--b--\x85--" },
|
||||
{ MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xe2\x80\xa8--" },
|
||||
{ MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xc2\x85--" },
|
||||
{ 0, 0, 0, 0, "ab$", "ab" },
|
||||
{ 0, 0, 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
|
||||
{ PCRE2_DOLLAR_ENDONLY, 0, 0, 0 | F_NOMATCH, "ab$", "abab\r\n" },
|
||||
{ M, PCRE2_NEWLINE_CRLF, 0, 0, "a$", "\r\raa\n\naa\r\naa" },
|
||||
{ M, PCRE2_NEWLINE_ANY, 0, 0, "a$", "aaa" },
|
||||
{ MU, PCRE2_NEWLINE_ANYCRLF, 0, 0, "#$", "#\xc2\x85###\r#" },
|
||||
{ MU, PCRE2_NEWLINE_ANY, 0, 0, "#$", "#\xe2\x80\xa9" },
|
||||
{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0 | F_NOMATCH, "^a", "aa\naa" },
|
||||
{ M, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0, "^a", "aa\naa" },
|
||||
{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\naa" },
|
||||
{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\r\n" },
|
||||
{ U | PCRE2_DOLLAR_ENDONLY, PCRE2_NEWLINE_ANY, 0, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
|
||||
{ M, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0, "a$", "aa\naa" },
|
||||
{ 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa" },
|
||||
{ U, PCRE2_NEWLINE_CR, 0, 0, "a\\Z", "aaa\r" },
|
||||
{ 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa\n" },
|
||||
{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r" },
|
||||
{ U, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\n" },
|
||||
{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r\n" },
|
||||
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
|
||||
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
|
||||
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
|
||||
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
|
||||
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
|
||||
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
|
||||
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
|
||||
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
|
||||
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
|
||||
{ U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xc2\x85" },
|
||||
{ U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
|
||||
{ M, A, 0, 0, "\\Aa", "aaa" },
|
||||
{ M, A, 0, 1 | F_NOMATCH, "\\Aa", "aaa" },
|
||||
{ M, A, 0, 1, "\\Ga", "aaa" },
|
||||
{ M, A, 0, 1 | F_NOMATCH, "\\Ga", "aba" },
|
||||
{ M, A, 0, 0, "a\\z", "aaa" },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "a\\z", "aab" },
|
||||
|
||||
/* Brackets and alternatives. */
|
||||
{ MU, A, 0, 0, "(ab|bb|cd)", "bacde" },
|
||||
{ MU, A, 0, 0, "(?:ab|a)(bc|c)", "ababc" },
|
||||
{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
|
||||
{ CMU, A, 0, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
|
||||
{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
|
||||
{ MU, A, 0, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
|
||||
{ MU, A, 0, 0, "\xc7\x82|\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
|
||||
{ MU, A, 0, 0, "=\xc7\x82|#\xc6\x82", "\xf1\x83\x82\x82=\xc7\x82\xc7\x83" },
|
||||
{ MU, A, 0, 0, "\xc7\x82\xc7\x83|\xc6\x82\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
|
||||
{ MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" },
|
||||
{ U, A, 0, 0, "\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80", "\xdf\xbf\xc2\x80\xe4\x84\x80" },
|
||||
{ U, A, 0, 0, "(?:\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80)#", "\xdf\xbf\xc2\x80#\xe4\x84\x80#" },
|
||||
{ CM, A, 0, 0, "ab|cd", "CD" },
|
||||
{ CM, A, 0, 0, "a1277|a1377|bX487", "bx487" },
|
||||
{ CM, A, 0, 0, "a1277|a1377|bx487", "bX487" },
|
||||
{ 0, A, 0, 0, "(a|)b*+a", "a" },
|
||||
{ 0, A, 0, 0 | F_NOMATCH, "(.|.|.|.|.)(|.|.|.|.)(.||.|.|.)(.|.||.|.)(.|.|.||.)(.|.|.|.|)(A|.|.|.|.)(.|A|.|.|.)(.|.|A|.|.)(.|.|.|A|.)(.|.|.|.|A)(B|.|.|.|.)(.|B|.|.|.)(.|.|B|.|.)(.|.|.|B|.)(.|.|.|.|B)xa", "1234567890123456ax" },
|
||||
|
||||
/* Greedy and non-greedy ? operators. */
|
||||
{ MU, A, 0, 0, "(?:a)?a", "laab" },
|
||||
{ CMU, A, 0, 0, "(A)?A", "llaab" },
|
||||
{ MU, A, 0, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
|
||||
{ MU, A, 0, 0, "(a)?a", "manm" },
|
||||
{ CMU, A, 0, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
|
||||
{ MU, A, 0, 0, "(a|b)?\?d((?:e)?)", "abcde" },
|
||||
{ MU, A, 0, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
|
||||
{ M, A, 0, 0, "(?:a?|a)b", "ba" },
|
||||
|
||||
/* Greedy and non-greedy + operators */
|
||||
{ MU, A, 0, 0, "(aa)+aa", "aaaaaaa" },
|
||||
{ MU, A, 0, 0, "(aa)+?aa", "aaaaaaa" },
|
||||
{ MU, A, 0, 0, "(?:aba|ab|a)+l", "ababamababal" },
|
||||
{ MU, A, 0, 0, "(?:aba|ab|a)+?l", "ababamababal" },
|
||||
{ MU, A, 0, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
|
||||
{ MU, A, 0, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
|
||||
{ MU, A, 0, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
|
||||
{ MU, A, 0, 0, "(aa|bb){8,1000}", "abaabbaabbaabbaab_aabbaabbaabbaabbaabbaabb_" },
|
||||
|
||||
/* Greedy and non-greedy * operators */
|
||||
{ CMU, A, 0, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
|
||||
{ MU, A, 0, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
|
||||
{ MU, A, 0, 0, "(aa|ab)*ab", "aaabaaab" },
|
||||
{ CMU, A, 0, 0, "(aa|Ab)*?aB", "aaabaaab" },
|
||||
{ MU, A, 0, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
|
||||
{ MU, A, 0, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
|
||||
{ M, A, 0, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
|
||||
{ M, A, 0, 0, "((?:a|)*){0}a", "a" },
|
||||
|
||||
/* Combining ? + * operators */
|
||||
{ MU, A, 0, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
|
||||
{ MU, A, 0, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
|
||||
{ MU, A, 0, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
|
||||
{ MU, A, 0, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
|
||||
{ MU, A, 0, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
|
||||
|
||||
/* Single character iterators. */
|
||||
{ MU, A, 0, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
|
||||
{ MU, A, 0, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
|
||||
{ MU, A, 0, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
|
||||
{ MU, A, 0, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
|
||||
{ MU, A, 0, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
|
||||
{ MU, A, 0, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
|
||||
{ MU, A, 0, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
|
||||
{ MU, A, 0, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
|
||||
{ MU, A, 0, 0, "(ba{2})+c", "baabaaabacbaabaac" },
|
||||
{ MU, A, 0, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
|
||||
{ MU, A, 0, 0, "(a?+[^b])+", "babaacacb" },
|
||||
{ MU, A, 0, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
|
||||
{ CMU, A, 0, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
|
||||
{ CMU, A, 0, 0, "[c-f]+k", "DemmFke" },
|
||||
{ MU, A, 0, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
|
||||
{ MU, A, 0, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
|
||||
{ CMU, A, 0, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
|
||||
{ CMU, A, 0, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
|
||||
{ CMU, A, 0, 0, "[ace]{3,}", "AcbDAcEEcEd" },
|
||||
{ CMU, A, 0, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
|
||||
{ MU, A, 0, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
|
||||
{ CMU, A, 0, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
|
||||
{ MU, A, 0, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
|
||||
{ MU, A, 0, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
|
||||
{ MU, A, 0, 0, "\\b\\w+\\B", "x,a_cd" },
|
||||
{ MUP, A, 0, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
|
||||
{ CMU, A, 0, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
|
||||
{ CMUP, A, 0, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
|
||||
{ CMU, A, 0, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
|
||||
{ CMU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
|
||||
{ MU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
|
||||
{ MU, A, 0, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
|
||||
{ MU, A, 0, 0, "\\d+123", "987654321,01234" },
|
||||
{ MU, A, 0, 0, "abcd*|\\w+xy", "aaaaa,abxyz" },
|
||||
{ MU, A, 0, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" },
|
||||
{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."},
|
||||
{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."},
|
||||
{ MU, A, 0, 0, ".[ab]*.", "xx" },
|
||||
{ MU, A, 0, 0, ".[ab]*a", "xxa" },
|
||||
{ MU, A, 0, 0, ".[ab]?.", "xx" },
|
||||
{ MU, A, 0, 0, "_[ab]+_*a", "_aa" },
|
||||
{ MU, A, 0, 0, "#(A+)#\\d+", "#A#A#0" },
|
||||
{ MU, A, 0, 0, "(?P<size>\\d+)m|M", "4M" },
|
||||
{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\n?.+#", "\n,\n,#" },
|
||||
{ 0, A, 0, 0, "<(\\w+)[\\s\\w]+id>", "<br><div id>" },
|
||||
{ MU, A, 0, 0, "([a-z]{0,3}c;)+", "ccccc;c;cc;ccc;cccccccccccccccc;" },
|
||||
|
||||
/* Bracket repeats with limit. */
|
||||
{ MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
|
||||
{ MU, A, 0, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
|
||||
{ MU, A, 0, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
|
||||
{ MU, A, 0, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
|
||||
{ MU, A, 0, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
|
||||
{ MU, A, 0, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
|
||||
{ MU, A, 0, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
|
||||
{ MU, A, 0, 0, "(ab){4,6}?M", "abababababababM" },
|
||||
|
||||
/* Basic character sets. */
|
||||
{ MU, A, 0, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
|
||||
{ MU, A, 0, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
|
||||
{ MU, A, 0, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
|
||||
{ MU, A, 0, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
|
||||
{ MU, A, 0, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
|
||||
{ MU, A, 0, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
|
||||
{ MU, A, 0, 0, "x[bcef]+", "xaxdxecbfg" },
|
||||
{ MU, A, 0, 0, "x[bcdghij]+", "xaxexfxdgbjk" },
|
||||
{ MU, A, 0, 0, "x[^befg]+", "xbxexacdhg" },
|
||||
{ MU, A, 0, 0, "x[^bcdl]+", "xlxbxaekmd" },
|
||||
{ MU, A, 0, 0, "x[^bcdghi]+", "xbxdxgxaefji" },
|
||||
{ MU, A, 0, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" },
|
||||
{ CMU, A, 0, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" },
|
||||
{ CMU, A, 0, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" },
|
||||
{ MU, A, 0, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" },
|
||||
{ MU, A, 0, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" },
|
||||
{ MU, A, 0, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" },
|
||||
{ MU, A, 0, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" },
|
||||
{ MU, A, 0, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" },
|
||||
{ MU, A, 0, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
|
||||
{ MU, A, 0, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
|
||||
{ MU, A, 0, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
|
||||
{ CMU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "^[\\x{100}-\\x{17f}]", " " },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "[^\\S\\W]{6}", "abcdefghijk" },
|
||||
|
||||
/* Unicode properties. */
|
||||
{ MUP, A, 0, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
|
||||
{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
|
||||
{ MUP, A, 0, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
|
||||
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
|
||||
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
|
||||
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
|
||||
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
|
||||
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
|
||||
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
|
||||
{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
|
||||
{ MUP, A, 0, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
|
||||
{ MUP, A, 0, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
|
||||
{ CMUP, A, 0, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
|
||||
{ MUP, A, 0, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
|
||||
{ MUP, A, 0, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
|
||||
{ MU, A, 0, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
|
||||
{ CMUP, A, 0, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
|
||||
{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
|
||||
{ MUP, A, 0, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
|
||||
{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "[a-b\\s]{2,5}[^a]", "AB baaa" },
|
||||
{ MUP, 0, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Hangul}\\p{Z}]", " " },
|
||||
{ MUP, 0, 0, 0, "[\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" },
|
||||
{ MUP, 0, 0, 0, "[\\x{a92e}\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" },
|
||||
{ CMUP, 0, 0, 0, "[^S]\\B", "\xe2\x80\x8a" },
|
||||
{ MUP, 0, 0, 0 | F_NOMATCH, "[^[:print:]\\x{f6f6}]", "\xef\x9b\xb6" },
|
||||
{ MUP, 0, 0, 0, "[[:xdigit:]\\x{6500}]#", "\xe6\x94\x80#" },
|
||||
{ MUP, 0, 0, 0 | F_PROPERTY, "[\\pC\\PC]#", "A#" },
|
||||
{ MUP, 0, 0, 0 | F_PROPERTY, "[\\x80-\\xff\\x{800}\\x{802}\\x{804}\\p{Cc}]", "\xdf\xbf\xe0\xa0\x80" },
|
||||
|
||||
/* Possible empty brackets. */
|
||||
{ MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
|
||||
{ MU, A, 0, 0, "(|ab||bc|a)+d", "abcxabcabd" },
|
||||
{ MU, A, 0, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
|
||||
{ MU, A, 0, 0, "(|ab||bc|a)*d", "abcxabcabd" },
|
||||
{ MU, A, 0, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
|
||||
{ MU, A, 0, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
|
||||
{ MU, A, 0, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
|
||||
{ MU, A, 0, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
|
||||
{ MU, A, 0, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
|
||||
{ MU, A, 0, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
|
||||
|
||||
/* Start offset. */
|
||||
{ MU, A, 0, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
|
||||
{ MU, A, 0, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
|
||||
{ MU, A, 0, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
|
||||
{ MU, A, 0, 1, "(\\w\\W\\w)+", "ab#d" },
|
||||
|
||||
/* Newline. */
|
||||
{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
|
||||
{ M, PCRE2_NEWLINE_CR, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
|
||||
{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{1,3}[^#]", "\r\n##...." },
|
||||
{ MU, A, PCRE2_NO_UTF_CHECK, 1, "^.a", "\n\x80\nxa" },
|
||||
{ MU, A, 0, 1, "^", "\r\n" },
|
||||
{ M, PCRE2_NEWLINE_CRLF, 0, 1 | F_NOMATCH, "^", "\r\n" },
|
||||
{ M, PCRE2_NEWLINE_CRLF, 0, 1, "^", "\r\na" },
|
||||
|
||||
/* Any character except newline or any newline. */
|
||||
{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
|
||||
{ U, PCRE2_NEWLINE_CRLF, 0, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
|
||||
{ 0, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
|
||||
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
|
||||
{ U, PCRE2_NEWLINE_ANY, 0, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
|
||||
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
|
||||
{ 0, PCRE2_NEWLINE_ANY, 0, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
|
||||
{ U, PCRE2_NEWLINE_ANY, 0, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
|
||||
{ 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\r" },
|
||||
{ 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\x85#\r\n#" },
|
||||
{ U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\xe2\x80\xa8#c" },
|
||||
{ U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\r\nc" },
|
||||
{ U, PCRE2_NEWLINE_CRLF | BSR(PCRE2_BSR_UNICODE), 0, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "\\R+", "ab" },
|
||||
{ MU, A, 0, 0, "\\R+", "ab\r\n\r" },
|
||||
{ MU, A, 0, 0, "\\R*", "ab\r\n\r" },
|
||||
{ MU, A, 0, 0, "\\R*", "\r\n\r" },
|
||||
{ M, A, 0, 0, "\\R+\x85", "\r\n\n\r#\r\x85\n" },
|
||||
{ MU, A, 0, 0, "\\R{2,4}", "\r\nab\r\r" },
|
||||
{ MU, A, 0, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
|
||||
{ MU, A, 0, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
|
||||
{ MU, A, 0, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
|
||||
{ MU, A, 0, 0, "\\R{2,4}\n", "\r\n\nab\r\r\nab\r\r\n\n" },
|
||||
{ MU, A, 0, 0, "\\R{2,4}\n", "\r\n\nab\n\n\n\r\r\n" },
|
||||
{ MU, A, 0, 0, "\\R{3,}\n", "\r\n\r\n\nab\n\n\n\r\r\n\n" },
|
||||
{ MU, A, 0, 0, "\\R{0,3}\n", "\r\n\r\n\r\n\n" },
|
||||
{ MU, A, 0, 0, "\\R{0,3}\n", "\r\n\r\n\r\n\r" },
|
||||
{ MU, A, 0, 0, "(\\R{0,3}\n;)+", "\r\n\r\n\r\n\r\n\n;\n;\n\n;\n\n\n;\n\n\n\n\n;" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
|
||||
{ MU, A, 0, 0, "\\R+\\R\\R", "\r\r\r" },
|
||||
{ MU, A, 0, 0, "\\R*\\R\\R", "\n\r" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
|
||||
{ MU, A, 0, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
|
||||
|
||||
/* Atomic groups (no fallback from "next" direction). */
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
|
||||
{ MU, A, 0, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
|
||||
"bababcdedefgheijijklmlmnop" },
|
||||
{ MU, A, 0, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
|
||||
{ MU, A, 0, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
|
||||
{ MU, A, 0, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
|
||||
{ MU, A, 0, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
|
||||
{ MU, A, 0, 0, "((?>a|)+?)b", "aaacaaab" },
|
||||
{ MU, A, 0, 0, "(?>x|)*$", "aaa" },
|
||||
{ MU, A, 0, 0, "(?>(x)|)*$", "aaa" },
|
||||
{ MU, A, 0, 0, "(?>x|())*$", "aaa" },
|
||||
{ MU, A, 0, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
|
||||
{ MU, A, 0, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
|
||||
{ MU, A, 0, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
|
||||
{ MU, A, 0, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
|
||||
{ MU, A, 0, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
|
||||
{ MU, A, 0, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
|
||||
{ MU, A, 0, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
|
||||
{ MU, A, 0, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
|
||||
{ MU, A, 0, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
|
||||
{ MU, A, 0, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
|
||||
{ MU, A, 0, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
|
||||
{ MU, A, 0, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
|
||||
{ MU, A, 0, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
|
||||
{ MU, A, 0, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
|
||||
{ CM, A, 0, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
|
||||
{ MU, A, 0, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
|
||||
{ MU, A, 0, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
|
||||
{ MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
|
||||
{ MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
|
||||
{ MU, A, 0, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
|
||||
{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
|
||||
{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
|
||||
{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
|
||||
{ MU, A, 0, 0, "(c(ab)?+ab)+", "cabcababcab" },
|
||||
{ MU, A, 0, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?>a*|)a", "aaa" },
|
||||
|
||||
/* Possessive quantifiers. */
|
||||
{ MU, A, 0, 0, "(?:a|b)++m", "mababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "(a|b)++m", "mababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "(a|b)*+m", "mababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "(a|b)*+m", "ababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "(a|b(*ACCEPT))++m", "maaxab" },
|
||||
{ MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxm" },
|
||||
{ MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
|
||||
{ MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxm" },
|
||||
{ MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
|
||||
{ MU, A, 0, 0, "(b*)++m", "bxbbxbbbxm" },
|
||||
{ MU, A, 0, 0, "(b*)++m", "bxbbxbbbxbbm" },
|
||||
{ MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxm" },
|
||||
{ MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxbbm" },
|
||||
{ MU, A, 0, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "(a|(b))++m", "mababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "((a)|b)*+m", "mababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
|
||||
{ MU, A, 0, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
|
||||
{ MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxm" },
|
||||
{ MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
|
||||
{ MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
|
||||
{ MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
|
||||
{ MU, A, 0, 0, "((b*))++m", "bxbbxbbbxm" },
|
||||
{ MU, A, 0, 0, "((b*))++m", "bxbbxbbbxbbm" },
|
||||
{ MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxm" },
|
||||
{ MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxbbm" },
|
||||
{ MU, A, 0, 0, "(A)*+$", "ABC" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
|
||||
{ MU, A, 0, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
|
||||
{ MU, A, 0, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
|
||||
{ MU, A, 0, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
|
||||
{ MU, A, 0, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
|
||||
|
||||
/* Back references. */
|
||||
{ MU, A, 0, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
|
||||
{ CMU, A, 0, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
|
||||
{ CM, A, 0, 0, "(a{2,4})\\1", "AaAaaAaA" },
|
||||
{ MU, A, 0, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
|
||||
{ MU, A, 0, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
|
||||
{ MU, A, 0, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
|
||||
{ MU, A, 0, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
|
||||
{ MU, A, 0, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
|
||||
{ MU, A, 0, 0, "(?:(aa)|b)\\1?b", "bb" },
|
||||
{ CMU, A, 0, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
|
||||
{ MU, A, 0, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
|
||||
{ CMU, A, 0, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
|
||||
{ MU, A, 0, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
|
||||
{ CM, A, 0, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
|
||||
{ MU, A, 0, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
|
||||
{ MU, A, 0, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
|
||||
{ M, A, 0, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
|
||||
{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
|
||||
{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
|
||||
{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
|
||||
{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
|
||||
{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
|
||||
{ CMUP, A, 0, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
|
||||
{ MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
|
||||
{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
|
||||
{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
|
||||
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
|
||||
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
|
||||
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
|
||||
{ MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
|
||||
{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
|
||||
{ MU | PCRE2_DUPNAMES, A, 0, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
|
||||
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
|
||||
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
|
||||
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
|
||||
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
|
||||
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
|
||||
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
|
||||
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
|
||||
{ MU | PCRE2_DUPNAMES, A, 0, 0, "^(?P<NAME>..)(?P<NAME>..)\\k<NAME>{2,4}", "AaAAAaAaAaaA" },
|
||||
{ MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "(a)|\\1+c", "xxc" },
|
||||
{ MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\1+?()", "" },
|
||||
|
||||
/* Assertions. */
|
||||
{ MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
|
||||
{ MU, A, 0, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
|
||||
{ MU, A, 0, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
|
||||
{ MU, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
|
||||
{ MU, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
|
||||
{ M, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
|
||||
{ M, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
|
||||
{ MU, A, 0, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
|
||||
{ MU, A, 0, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
|
||||
{ MU, A, 0, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
|
||||
{ MU, A, 0, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
|
||||
{ MU, A, 0, 0, "((?(?=(a))a)+k)", "bbak" },
|
||||
{ MU, A, 0, 0, "((?(?=a)a)+k)", "bbak" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
|
||||
{ MU, A, 0, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
|
||||
{ MU, A, 0, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
|
||||
{ MU, A, 0, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
|
||||
{ MU, A, 0, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
|
||||
{ MU, A, 0, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
|
||||
{ MU, A, 0, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
|
||||
{ MU, A, 0, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
|
||||
{ MU, A, 0, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
|
||||
{ MU, A, 0, 0, "a(?=(?C)\\B(?C`x`))b", "ab" },
|
||||
{ MU, A, 0, 0, "a(?!(?C)\\B(?C`x`))bb|ab", "abb" },
|
||||
{ MU, A, 0, 0, "a(?=\\b|(?C)\\B(?C`x`))b", "ab" },
|
||||
{ MU, A, 0, 0, "a(?!\\b|(?C)\\B(?C`x`))bb|ab", "abb" },
|
||||
{ MU, A, 0, 0, "c(?(?=(?C)\\B(?C`x`))ab|a)", "cab" },
|
||||
{ MU, A, 0, 0, "c(?(?!(?C)\\B(?C`x`))ab|a)", "cab" },
|
||||
{ MU, A, 0, 0, "c(?(?=\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
|
||||
{ MU, A, 0, 0, "c(?(?!\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
|
||||
{ MU, A, 0, 0, "a(?=)b", "ab" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "a(?!)b", "ab" },
|
||||
{ MU, A, 0, 0, "(?(?<!|(|a)))", "a" },
|
||||
|
||||
/* Not empty, ACCEPT, FAIL */
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0, "a*", "bcaad" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0, "a*?", "bcaad" },
|
||||
{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
|
||||
{ MU, A, 0, 0, "a(*ACCEPT)b", "ab" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
|
||||
{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
|
||||
{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
|
||||
{ MU, A, 0, 0, "((a(*ACCEPT)b))", "ab" },
|
||||
{ MU, A, 0, 0, "(a(*FAIL)a|a)", "aaa" },
|
||||
{ MU, A, 0, 0, "(?=ab(*ACCEPT)b)a", "ab" },
|
||||
{ MU, A, 0, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
|
||||
{ MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?=A)", "AB" },
|
||||
{ MU | PCRE2_ENDANCHORED, A, 0, 0, "aa(*ACCEPT)aa", "aaa" },
|
||||
|
||||
/* Conditional blocks. */
|
||||
{ MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
|
||||
{ MU, A, 0, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
|
||||
{ MU, A, 0, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
|
||||
{ MU, A, 0, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
|
||||
{ MU, A, 0, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
|
||||
{ MU, A, 0, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
|
||||
{ MU, A, 0, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
|
||||
{ MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
|
||||
{ MU, A, 0, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
|
||||
{ MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
|
||||
{ MU, A, 0, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
|
||||
{ MU, A, 0, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
|
||||
{ MU, A, 0, 0, "(?(?=a)ab)", "a" },
|
||||
{ MU, A, 0, 0, "(?(?<!b)c)", "b" },
|
||||
{ MU, A, 0, 0, "(?(DEFINE)a(b))", "a" },
|
||||
{ MU, A, 0, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
|
||||
{ MU, A, 0, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
|
||||
{ MU, A, 0, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
|
||||
{ MU, A, 0, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
|
||||
{ MU, A, 0, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
|
||||
{ MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
|
||||
{ MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cbb" },
|
||||
{ MU, A, 0, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
|
||||
{ MU, A, 0, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
|
||||
{ MU, A, 0, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
|
||||
{ MU, A, 0, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
|
||||
{ MU, A, 0, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
|
||||
{ MU, A, 0, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
|
||||
{ MU, A, 0, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
|
||||
{ MU, A, 0, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
|
||||
{ MU, A, 0, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
|
||||
{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
|
||||
{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
|
||||
{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
|
||||
{ MU, A, 0, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
|
||||
{ MU, A, 0, 0, "(?(?!)a|b)", "ab" },
|
||||
{ MU, A, 0, 0, "(?(?!)a)", "ab" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?(?!)a|b)", "ac" },
|
||||
|
||||
/* Set start of match. */
|
||||
{ MU, A, 0, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
|
||||
{ MU, A, 0, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
|
||||
{ MU, A, 0, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
|
||||
{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
|
||||
|
||||
/* First line. */
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 0, "(?<=a)", "a" },
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[^a][^b]", "ab" },
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "a", "\na" },
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[abc]", "\na" },
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^a", "\na" },
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
|
||||
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\xc2\x85#" },
|
||||
{ M | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\x85#" },
|
||||
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
|
||||
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
|
||||
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
|
||||
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, "a", "\ra" },
|
||||
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
|
||||
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
|
||||
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 1, ".", "\r\n" },
|
||||
{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_LF, 0, 0 | F_NOMATCH, "ab.", "ab" },
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
|
||||
{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_ANY, 0, 0, "....a", "012\n0a" },
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 0, "[aC]", "a" },
|
||||
|
||||
/* Recurse. */
|
||||
{ MU, A, 0, 0, "(a)(?1)", "aa" },
|
||||
{ MU, A, 0, 0, "((a))(?1)", "aa" },
|
||||
{ MU, A, 0, 0, "(b|a)(?1)", "aa" },
|
||||
{ MU, A, 0, 0, "(b|(a))(?1)", "aa" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
|
||||
{ MU, A, 0, 0, "((a)(b)(?:a*))(?1)", "abab" },
|
||||
{ MU, A, 0, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
|
||||
{ MU, A, 0, 0, "((?2)b|(a)){2}(?1)", "aabab" },
|
||||
{ MU, A, 0, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
|
||||
{ MU, A, 0, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
|
||||
{ MU, A, 0, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
|
||||
{ MU, A, 0, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
|
||||
{ MU, A, 0, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
|
||||
{ MU, A, 0, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
|
||||
{ MU, A, 0, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
|
||||
{ MU, A, 0, 0, "b|<(?R)*>", "<<b>" },
|
||||
{ MU, A, 0, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
|
||||
{ MU, A, 0, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
|
||||
{ MU, A, 0, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
|
||||
{ MU, A, 0, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
|
||||
{ MU, A, 0, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
|
||||
{ MU, A, 0, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
|
||||
{ MU, A, 0, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
|
||||
{ MU, A, 0, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
|
||||
{ MU, A, 0, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
|
||||
{ MU, A, 0, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
|
||||
{ MU, A, 0, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
|
||||
{ MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
|
||||
{ MU, A, 0, 0, "((.)(?:.|\\2(?1))){0}#(?1)#", "#aabbccdde# #aabbccddee#" },
|
||||
{ MU, A, 0, 0, "((.)(?:\\2|\\2{4}b)){0}#(?:(?1))+#", "#aaaab# #aaaaab#" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?1)$((.|\\2xx){1,2})", "abc" },
|
||||
|
||||
/* 16 bit specific tests. */
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
|
||||
{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
|
||||
{ CM, A, 0, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
|
||||
{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
|
||||
{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
|
||||
{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
|
||||
{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
|
||||
{ M, A, 0, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
|
||||
{ M, A, 0, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
|
||||
{ CM | PCRE2_EXTENDED, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
|
||||
{ CM, A, 0, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
|
||||
{ M, PCRE2_NEWLINE_ANY, 0, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
|
||||
{ 0, BSR(PCRE2_BSR_UNICODE), 0, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
|
||||
{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
|
||||
{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
|
||||
{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
|
||||
{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
|
||||
|
||||
/* Partial matching. */
|
||||
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "ab", "a" },
|
||||
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "ab|a", "a" },
|
||||
{ MU, A, PCRE2_PARTIAL_HARD, 0, "ab|a", "a" },
|
||||
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "\\b#", "a" },
|
||||
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
|
||||
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
|
||||
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a\\B", "a" },
|
||||
{ MU, A, PCRE2_PARTIAL_HARD, 0, "a\\b", "a" },
|
||||
{ M | PCRE2_DUPNAMES, A, PCRE2_PARTIAL_HARD, 0, "^(?P<NAME>..)(?P<NAME>..)\\k<NAME>{2,4}", "AaAAAaAaAaA" },
|
||||
{ M | PCRE2_DUPNAMES, A, PCRE2_PARTIAL_HARD, 0, "^(?P<NAME>..)(?P<NAME>..)\\k<NAME>{2,4}", "AaAAAaAaAaa" },
|
||||
|
||||
/* (*MARK) verb. */
|
||||
{ MU, A, 0, 0, "a(*MARK:aa)a", "ababaa" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
|
||||
{ MU, A, 0, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
|
||||
{ MU, A, 0, 0, "(?>a(*:aa))b|ac", "ac" },
|
||||
{ MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
|
||||
{ MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
|
||||
{ MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
|
||||
{ MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
|
||||
{ MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
|
||||
{ MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(*:mark)m", "a" },
|
||||
|
||||
/* (*COMMIT) verb. */
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
|
||||
{ MU, A, 0, 0, "aa(*COMMIT)b", "xaxaab" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
|
||||
|
||||
/* (*PRUNE) verb. */
|
||||
{ MU, A, 0, 0, "aa\\K(*PRUNE)b", "aaab" },
|
||||
{ MU, A, 0, 0, "aa(*PRUNE:bb)b|a", "aa" },
|
||||
{ MU, A, 0, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
|
||||
{ MU, A, 0, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
|
||||
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
|
||||
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
|
||||
{ MU, A, 0, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
|
||||
{ MU, A, 0, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
|
||||
{ MU, A, 0, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
|
||||
{ MU, A, 0, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
|
||||
{ MU, A, 0, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
|
||||
{ MU, A, 0, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
|
||||
{ MU, A, 0, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
|
||||
{ MU, A, 0, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
|
||||
{ MU, A, 0, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
|
||||
{ MU, A, 0, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
|
||||
{ MU, A, 0, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
|
||||
{ MU, A, 0, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
|
||||
|
||||
/* (*SKIP) verb. */
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
|
||||
{ MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," },
|
||||
{ MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" },
|
||||
|
||||
/* (*THEN) verb. */
|
||||
{ MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
|
||||
{ MU, A, 0, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
|
||||
{ MU, A, 0, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
|
||||
{ MU, A, 0, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
|
||||
{ MU, A, 0, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
|
||||
{ MU, A, 0, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
|
||||
{ MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
|
||||
{ MU, A, 0, 0, "(?=(*THEN: ))* ", " " },
|
||||
{ MU, A, 0, 0, "a(*THEN)(?R) |", "a" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?<!(*THEN)a|(*THEN)b|(*THEN)ab?|(*THEN)ba?|)", "c" },
|
||||
|
||||
/* Recurse and control verbs. */
|
||||
{ MU, A, 0, 0, "(a(*ACCEPT)b){0}a(?1)b", "aacaabb" },
|
||||
{ MU, A, 0, 0, "((a)\\2(*ACCEPT)b){0}a(?1)b", "aaacaaabb" },
|
||||
{ MU, A, 0, 0, "((ab|a(*ACCEPT)x)+|ababababax){0}_(?1)_", "_ababababax_ _ababababa_" },
|
||||
{ MU, A, 0, 0, "((.)(?:A(*ACCEPT)|(?1)\\2)){0}_(?1)_", "_bcdaAdcb_bcdaAdcb_" },
|
||||
{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_", "_ab_" },
|
||||
{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_|(_aa_)", "_aa_" },
|
||||
{ MU, A, 0, 0, "(a(*COMMIT)(?:b|bb)|c(*ACCEPT)d|dd){0}_(?1)+_", "_ax_ _cd_ _abbb_ _abcd_ _abbcdd_" },
|
||||
{ MU, A, 0, 0, "((.)(?:.|(*COMMIT)\\2{3}(*ACCEPT).*|.*)){0}_(?1){0,4}_", "_aaaabbbbccccddd_ _aaaabbbbccccdddd_" },
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/* Script runs and iterations. */
|
||||
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
||||
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
||||
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
||||
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
||||
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
||||
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)++#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
||||
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)?#", "!ab!abc!ab!ab#" },
|
||||
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)??#", "!ab!abc!ab!ab#" },
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Deep recursion. */
|
||||
{ MU, A, 0, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
|
||||
{ MU, A, 0, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
|
||||
{ MU, A, 0, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
|
||||
|
||||
/* Deep recursion: Stack limit reached. */
|
||||
{ M, A, 0, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
|
||||
|
||||
{ 0, 0, 0, 0, NULL, NULL }
|
||||
};
|
||||
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
static pcre2_jit_stack_8* callback8(void *arg)
|
||||
{
|
||||
return (pcre2_jit_stack_8 *)arg;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
static pcre2_jit_stack_16* callback16(void *arg)
|
||||
{
|
||||
return (pcre2_jit_stack_16 *)arg;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
static pcre2_jit_stack_32* callback32(void *arg)
|
||||
{
|
||||
return (pcre2_jit_stack_32 *)arg;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
static pcre2_jit_stack_8 *stack8;
|
||||
|
||||
static pcre2_jit_stack_8 *getstack8(void)
|
||||
{
|
||||
if (!stack8)
|
||||
stack8 = pcre2_jit_stack_create_8(1, 1024 * 1024, NULL);
|
||||
return stack8;
|
||||
}
|
||||
|
||||
static void setstack8(pcre2_match_context_8 *mcontext)
|
||||
{
|
||||
if (!mcontext) {
|
||||
if (stack8)
|
||||
pcre2_jit_stack_free_8(stack8);
|
||||
stack8 = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
pcre2_jit_stack_assign_8(mcontext, callback8, getstack8());
|
||||
}
|
||||
#endif /* SUPPORT_PCRE2_8 */
|
||||
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
static pcre2_jit_stack_16 *stack16;
|
||||
|
||||
static pcre2_jit_stack_16 *getstack16(void)
|
||||
{
|
||||
if (!stack16)
|
||||
stack16 = pcre2_jit_stack_create_16(1, 1024 * 1024, NULL);
|
||||
return stack16;
|
||||
}
|
||||
|
||||
static void setstack16(pcre2_match_context_16 *mcontext)
|
||||
{
|
||||
if (!mcontext) {
|
||||
if (stack16)
|
||||
pcre2_jit_stack_free_16(stack16);
|
||||
stack16 = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
pcre2_jit_stack_assign_16(mcontext, callback16, getstack16());
|
||||
}
|
||||
#endif /* SUPPORT_PCRE2_16 */
|
||||
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
static pcre2_jit_stack_32 *stack32;
|
||||
|
||||
static pcre2_jit_stack_32 *getstack32(void)
|
||||
{
|
||||
if (!stack32)
|
||||
stack32 = pcre2_jit_stack_create_32(1, 1024 * 1024, NULL);
|
||||
return stack32;
|
||||
}
|
||||
|
||||
static void setstack32(pcre2_match_context_32 *mcontext)
|
||||
{
|
||||
if (!mcontext) {
|
||||
if (stack32)
|
||||
pcre2_jit_stack_free_32(stack32);
|
||||
stack32 = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
pcre2_jit_stack_assign_32(mcontext, callback32, getstack32());
|
||||
}
|
||||
#endif /* SUPPORT_PCRE2_32 */
|
||||
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
|
||||
static int convert_utf8_to_utf16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int *offsetmap, int max_length)
|
||||
{
|
||||
PCRE2_SPTR8 iptr = input;
|
||||
PCRE2_UCHAR16 *optr = output;
|
||||
unsigned int c;
|
||||
|
||||
if (max_length == 0)
|
||||
return 0;
|
||||
|
||||
while (*iptr && max_length > 1) {
|
||||
c = 0;
|
||||
if (offsetmap)
|
||||
*offsetmap++ = (int)(iptr - (unsigned char*)input);
|
||||
|
||||
if (*iptr < 0xc0)
|
||||
c = *iptr++;
|
||||
else if (!(*iptr & 0x20)) {
|
||||
c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
|
||||
iptr += 2;
|
||||
} else if (!(*iptr & 0x10)) {
|
||||
c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
|
||||
iptr += 3;
|
||||
} else if (!(*iptr & 0x08)) {
|
||||
c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
|
||||
iptr += 4;
|
||||
}
|
||||
|
||||
if (c < 65536) {
|
||||
*optr++ = c;
|
||||
max_length--;
|
||||
} else if (max_length <= 2) {
|
||||
*optr = '\0';
|
||||
return (int)(optr - output);
|
||||
} else {
|
||||
c -= 0x10000;
|
||||
*optr++ = 0xd800 | ((c >> 10) & 0x3ff);
|
||||
*optr++ = 0xdc00 | (c & 0x3ff);
|
||||
max_length -= 2;
|
||||
if (offsetmap)
|
||||
offsetmap++;
|
||||
}
|
||||
}
|
||||
if (offsetmap)
|
||||
*offsetmap = (int)(iptr - (unsigned char*)input);
|
||||
*optr = '\0';
|
||||
return (int)(optr - output);
|
||||
}
|
||||
|
||||
static int copy_char8_to_char16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int max_length)
|
||||
{
|
||||
PCRE2_SPTR8 iptr = input;
|
||||
PCRE2_UCHAR16 *optr = output;
|
||||
|
||||
if (max_length == 0)
|
||||
return 0;
|
||||
|
||||
while (*iptr && max_length > 1) {
|
||||
*optr++ = *iptr++;
|
||||
max_length--;
|
||||
}
|
||||
*optr = '\0';
|
||||
return (int)(optr - output);
|
||||
}
|
||||
|
||||
#define REGTEST_MAX_LENGTH16 4096
|
||||
static PCRE2_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
|
||||
static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
|
||||
|
||||
#endif /* SUPPORT_PCRE2_16 */
|
||||
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
|
||||
static int convert_utf8_to_utf32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int *offsetmap, int max_length)
|
||||
{
|
||||
PCRE2_SPTR8 iptr = input;
|
||||
PCRE2_UCHAR32 *optr = output;
|
||||
unsigned int c;
|
||||
|
||||
if (max_length == 0)
|
||||
return 0;
|
||||
|
||||
while (*iptr && max_length > 1) {
|
||||
c = 0;
|
||||
if (offsetmap)
|
||||
*offsetmap++ = (int)(iptr - (unsigned char*)input);
|
||||
|
||||
if (*iptr < 0xc0)
|
||||
c = *iptr++;
|
||||
else if (!(*iptr & 0x20)) {
|
||||
c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
|
||||
iptr += 2;
|
||||
} else if (!(*iptr & 0x10)) {
|
||||
c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
|
||||
iptr += 3;
|
||||
} else if (!(*iptr & 0x08)) {
|
||||
c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
|
||||
iptr += 4;
|
||||
}
|
||||
|
||||
*optr++ = c;
|
||||
max_length--;
|
||||
}
|
||||
if (offsetmap)
|
||||
*offsetmap = (int)(iptr - (unsigned char*)input);
|
||||
*optr = 0;
|
||||
return (int)(optr - output);
|
||||
}
|
||||
|
||||
static int copy_char8_to_char32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int max_length)
|
||||
{
|
||||
PCRE2_SPTR8 iptr = input;
|
||||
PCRE2_UCHAR32 *optr = output;
|
||||
|
||||
if (max_length == 0)
|
||||
return 0;
|
||||
|
||||
while (*iptr && max_length > 1) {
|
||||
*optr++ = *iptr++;
|
||||
max_length--;
|
||||
}
|
||||
*optr = '\0';
|
||||
return (int)(optr - output);
|
||||
}
|
||||
|
||||
#define REGTEST_MAX_LENGTH32 4096
|
||||
static PCRE2_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
|
||||
static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
|
||||
|
||||
#endif /* SUPPORT_PCRE2_32 */
|
||||
|
||||
static int check_ascii(const char *input)
|
||||
{
|
||||
const unsigned char *ptr = (unsigned char *)input;
|
||||
while (*ptr) {
|
||||
if (*ptr > 127)
|
||||
return 0;
|
||||
ptr++;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
#define OVECTOR_SIZE 15
|
||||
|
||||
static int regression_tests(void)
|
||||
{
|
||||
struct regression_test_case *current = regression_test_cases;
|
||||
int error;
|
||||
PCRE2_SIZE err_offs;
|
||||
int is_successful;
|
||||
int is_ascii;
|
||||
int total = 0;
|
||||
int successful = 0;
|
||||
int successful_row = 0;
|
||||
int counter = 0;
|
||||
int jit_compile_mode;
|
||||
int utf = 0;
|
||||
uint32_t disabled_options = 0;
|
||||
int i;
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
pcre2_code_8 *re8;
|
||||
pcre2_compile_context_8 *ccontext8;
|
||||
pcre2_match_data_8 *mdata8_1;
|
||||
pcre2_match_data_8 *mdata8_2;
|
||||
pcre2_match_context_8 *mcontext8;
|
||||
PCRE2_SIZE *ovector8_1 = NULL;
|
||||
PCRE2_SIZE *ovector8_2 = NULL;
|
||||
int return_value8[2];
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
pcre2_code_16 *re16;
|
||||
pcre2_compile_context_16 *ccontext16;
|
||||
pcre2_match_data_16 *mdata16_1;
|
||||
pcre2_match_data_16 *mdata16_2;
|
||||
pcre2_match_context_16 *mcontext16;
|
||||
PCRE2_SIZE *ovector16_1 = NULL;
|
||||
PCRE2_SIZE *ovector16_2 = NULL;
|
||||
int return_value16[2];
|
||||
int length16;
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
pcre2_code_32 *re32;
|
||||
pcre2_compile_context_32 *ccontext32;
|
||||
pcre2_match_data_32 *mdata32_1;
|
||||
pcre2_match_data_32 *mdata32_2;
|
||||
pcre2_match_context_32 *mcontext32;
|
||||
PCRE2_SIZE *ovector32_1 = NULL;
|
||||
PCRE2_SIZE *ovector32_2 = NULL;
|
||||
int return_value32[2];
|
||||
int length32;
|
||||
#endif
|
||||
|
||||
#if defined SUPPORT_PCRE2_8
|
||||
PCRE2_UCHAR8 cpu_info[128];
|
||||
#elif defined SUPPORT_PCRE2_16
|
||||
PCRE2_UCHAR16 cpu_info[128];
|
||||
#elif defined SUPPORT_PCRE2_32
|
||||
PCRE2_UCHAR32 cpu_info[128];
|
||||
#endif
|
||||
#if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
|
||||
int return_value;
|
||||
#endif
|
||||
|
||||
/* This test compares the behaviour of interpreter and JIT. Although disabling
|
||||
utf or ucp may make tests fail, if the pcre2_match result is the SAME, it is
|
||||
still considered successful from pcre2_jit_test point of view. */
|
||||
|
||||
#if defined SUPPORT_PCRE2_8
|
||||
pcre2_config_8(PCRE2_CONFIG_JITTARGET, &cpu_info);
|
||||
#elif defined SUPPORT_PCRE2_16
|
||||
pcre2_config_16(PCRE2_CONFIG_JITTARGET, &cpu_info);
|
||||
#elif defined SUPPORT_PCRE2_32
|
||||
pcre2_config_32(PCRE2_CONFIG_JITTARGET, &cpu_info);
|
||||
#endif
|
||||
|
||||
printf("Running JIT regression tests\n");
|
||||
printf(" target CPU of SLJIT compiler: ");
|
||||
for (i = 0; cpu_info[i]; i++)
|
||||
printf("%c", (char)(cpu_info[i]));
|
||||
printf("\n");
|
||||
|
||||
#if defined SUPPORT_PCRE2_8
|
||||
pcre2_config_8(PCRE2_CONFIG_UNICODE, &utf);
|
||||
#elif defined SUPPORT_PCRE2_16
|
||||
pcre2_config_16(PCRE2_CONFIG_UNICODE, &utf);
|
||||
#elif defined SUPPORT_PCRE2_32
|
||||
pcre2_config_32(PCRE2_CONFIG_UNICODE, &utf);
|
||||
#endif
|
||||
|
||||
if (!utf)
|
||||
disabled_options |= PCRE2_UTF;
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
printf(" in 8 bit mode with UTF-8 %s:\n", utf ? "enabled" : "disabled");
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
printf(" in 16 bit mode with UTF-16 %s:\n", utf ? "enabled" : "disabled");
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
printf(" in 32 bit mode with UTF-32 %s:\n", utf ? "enabled" : "disabled");
|
||||
#endif
|
||||
|
||||
while (current->pattern) {
|
||||
/* printf("\nPattern: %s :\n", current->pattern); */
|
||||
total++;
|
||||
is_ascii = 0;
|
||||
if (!(current->start_offset & F_PROPERTY))
|
||||
is_ascii = check_ascii(current->pattern) && check_ascii(current->input);
|
||||
|
||||
if (current->match_options & PCRE2_PARTIAL_SOFT)
|
||||
jit_compile_mode = PCRE2_JIT_PARTIAL_SOFT;
|
||||
else if (current->match_options & PCRE2_PARTIAL_HARD)
|
||||
jit_compile_mode = PCRE2_JIT_PARTIAL_HARD;
|
||||
else
|
||||
jit_compile_mode = PCRE2_JIT_COMPLETE;
|
||||
error = 0;
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
re8 = NULL;
|
||||
ccontext8 = pcre2_compile_context_create_8(NULL);
|
||||
if (ccontext8) {
|
||||
if (GET_NEWLINE(current->newline))
|
||||
pcre2_set_newline_8(ccontext8, GET_NEWLINE(current->newline));
|
||||
if (GET_BSR(current->newline))
|
||||
pcre2_set_bsr_8(ccontext8, GET_BSR(current->newline));
|
||||
|
||||
if (!(current->start_offset & F_NO8)) {
|
||||
re8 = pcre2_compile_8((PCRE2_SPTR8)current->pattern, PCRE2_ZERO_TERMINATED,
|
||||
current->compile_options & ~disabled_options,
|
||||
&error, &err_offs, ccontext8);
|
||||
|
||||
if (!re8 && (utf || is_ascii))
|
||||
printf("\n8 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
|
||||
}
|
||||
pcre2_compile_context_free_8(ccontext8);
|
||||
}
|
||||
else
|
||||
printf("\n8 bit: Cannot allocate compile context\n");
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
|
||||
convert_utf8_to_utf16((PCRE2_SPTR8)current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
|
||||
else
|
||||
copy_char8_to_char16((PCRE2_SPTR8)current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
|
||||
|
||||
re16 = NULL;
|
||||
ccontext16 = pcre2_compile_context_create_16(NULL);
|
||||
if (ccontext16) {
|
||||
if (GET_NEWLINE(current->newline))
|
||||
pcre2_set_newline_16(ccontext16, GET_NEWLINE(current->newline));
|
||||
if (GET_BSR(current->newline))
|
||||
pcre2_set_bsr_16(ccontext16, GET_BSR(current->newline));
|
||||
|
||||
if (!(current->start_offset & F_NO16)) {
|
||||
re16 = pcre2_compile_16(regtest_buf16, PCRE2_ZERO_TERMINATED,
|
||||
current->compile_options & ~disabled_options,
|
||||
&error, &err_offs, ccontext16);
|
||||
|
||||
if (!re16 && (utf || is_ascii))
|
||||
printf("\n16 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
|
||||
}
|
||||
pcre2_compile_context_free_16(ccontext16);
|
||||
}
|
||||
else
|
||||
printf("\n16 bit: Cannot allocate compile context\n");
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
|
||||
convert_utf8_to_utf32((PCRE2_SPTR8)current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
|
||||
else
|
||||
copy_char8_to_char32((PCRE2_SPTR8)current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
|
||||
|
||||
re32 = NULL;
|
||||
ccontext32 = pcre2_compile_context_create_32(NULL);
|
||||
if (ccontext32) {
|
||||
if (GET_NEWLINE(current->newline))
|
||||
pcre2_set_newline_32(ccontext32, GET_NEWLINE(current->newline));
|
||||
if (GET_BSR(current->newline))
|
||||
pcre2_set_bsr_32(ccontext32, GET_BSR(current->newline));
|
||||
|
||||
if (!(current->start_offset & F_NO32)) {
|
||||
re32 = pcre2_compile_32(regtest_buf32, PCRE2_ZERO_TERMINATED,
|
||||
current->compile_options & ~disabled_options,
|
||||
&error, &err_offs, ccontext32);
|
||||
|
||||
if (!re32 && (utf || is_ascii))
|
||||
printf("\n32 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
|
||||
}
|
||||
pcre2_compile_context_free_32(ccontext32);
|
||||
}
|
||||
else
|
||||
printf("\n32 bit: Cannot allocate compile context\n");
|
||||
#endif
|
||||
|
||||
counter++;
|
||||
if ((counter & 0x3) != 0) {
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
setstack8(NULL);
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
setstack16(NULL);
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
setstack32(NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
return_value8[0] = -1000;
|
||||
return_value8[1] = -1000;
|
||||
mdata8_1 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
|
||||
mdata8_2 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
|
||||
mcontext8 = pcre2_match_context_create_8(NULL);
|
||||
if (!mdata8_1 || !mdata8_2 || !mcontext8) {
|
||||
printf("\n8 bit: Cannot allocate match data\n");
|
||||
pcre2_match_data_free_8(mdata8_1);
|
||||
pcre2_match_data_free_8(mdata8_2);
|
||||
pcre2_match_context_free_8(mcontext8);
|
||||
pcre2_code_free_8(re8);
|
||||
re8 = NULL;
|
||||
} else {
|
||||
ovector8_1 = pcre2_get_ovector_pointer_8(mdata8_1);
|
||||
ovector8_2 = pcre2_get_ovector_pointer_8(mdata8_2);
|
||||
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||
ovector8_1[i] = (PCRE2_SIZE)(-2);
|
||||
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||
ovector8_2[i] = (PCRE2_SIZE)(-2);
|
||||
pcre2_set_match_limit_8(mcontext8, 10000000);
|
||||
}
|
||||
if (re8) {
|
||||
return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, mcontext8);
|
||||
|
||||
if (pcre2_jit_compile_8(re8, jit_compile_mode)) {
|
||||
printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern);
|
||||
} else if ((counter & 0x1) != 0) {
|
||||
setstack8(mcontext8);
|
||||
return_value8[0] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
|
||||
} else {
|
||||
pcre2_jit_stack_assign_8(mcontext8, NULL, getstack8());
|
||||
return_value8[0] = pcre2_jit_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
return_value16[0] = -1000;
|
||||
return_value16[1] = -1000;
|
||||
mdata16_1 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
|
||||
mdata16_2 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
|
||||
mcontext16 = pcre2_match_context_create_16(NULL);
|
||||
if (!mdata16_1 || !mdata16_2 || !mcontext16) {
|
||||
printf("\n16 bit: Cannot allocate match data\n");
|
||||
pcre2_match_data_free_16(mdata16_1);
|
||||
pcre2_match_data_free_16(mdata16_2);
|
||||
pcre2_match_context_free_16(mcontext16);
|
||||
pcre2_code_free_16(re16);
|
||||
re16 = NULL;
|
||||
} else {
|
||||
ovector16_1 = pcre2_get_ovector_pointer_16(mdata16_1);
|
||||
ovector16_2 = pcre2_get_ovector_pointer_16(mdata16_2);
|
||||
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||
ovector16_1[i] = (PCRE2_SIZE)(-2);
|
||||
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||
ovector16_2[i] = (PCRE2_SIZE)(-2);
|
||||
pcre2_set_match_limit_16(mcontext16, 10000000);
|
||||
}
|
||||
if (re16) {
|
||||
if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
|
||||
length16 = convert_utf8_to_utf16((PCRE2_SPTR8)current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
|
||||
else
|
||||
length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
|
||||
|
||||
return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16,
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, mcontext16);
|
||||
|
||||
if (pcre2_jit_compile_16(re16, jit_compile_mode)) {
|
||||
printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern);
|
||||
} else if ((counter & 0x1) != 0) {
|
||||
setstack16(mcontext16);
|
||||
return_value16[0] = pcre2_match_16(re16, regtest_buf16, length16,
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
|
||||
} else {
|
||||
pcre2_jit_stack_assign_16(mcontext16, NULL, getstack16());
|
||||
return_value16[0] = pcre2_jit_match_16(re16, regtest_buf16, length16,
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
return_value32[0] = -1000;
|
||||
return_value32[1] = -1000;
|
||||
mdata32_1 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
|
||||
mdata32_2 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
|
||||
mcontext32 = pcre2_match_context_create_32(NULL);
|
||||
if (!mdata32_1 || !mdata32_2 || !mcontext32) {
|
||||
printf("\n32 bit: Cannot allocate match data\n");
|
||||
pcre2_match_data_free_32(mdata32_1);
|
||||
pcre2_match_data_free_32(mdata32_2);
|
||||
pcre2_match_context_free_32(mcontext32);
|
||||
pcre2_code_free_32(re32);
|
||||
re32 = NULL;
|
||||
} else {
|
||||
ovector32_1 = pcre2_get_ovector_pointer_32(mdata32_1);
|
||||
ovector32_2 = pcre2_get_ovector_pointer_32(mdata32_2);
|
||||
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||
ovector32_1[i] = (PCRE2_SIZE)(-2);
|
||||
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||
ovector32_2[i] = (PCRE2_SIZE)(-2);
|
||||
pcre2_set_match_limit_32(mcontext32, 10000000);
|
||||
}
|
||||
if (re32) {
|
||||
if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
|
||||
length32 = convert_utf8_to_utf32((PCRE2_SPTR8)current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
|
||||
else
|
||||
length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
|
||||
|
||||
return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32,
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, mcontext32);
|
||||
|
||||
if (pcre2_jit_compile_32(re32, jit_compile_mode)) {
|
||||
printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern);
|
||||
} else if ((counter & 0x1) != 0) {
|
||||
setstack32(mcontext32);
|
||||
return_value32[0] = pcre2_match_32(re32, regtest_buf32, length32,
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
|
||||
} else {
|
||||
pcre2_jit_stack_assign_32(mcontext32, NULL, getstack32());
|
||||
return_value32[0] = pcre2_jit_match_32(re32, regtest_buf32, length32,
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
|
||||
return_value8[0], return_value16[0], return_value32[0],
|
||||
(int)ovector8_1[0], (int)ovector8_1[1],
|
||||
(int)ovector16_1[0], (int)ovector16_1[1],
|
||||
(int)ovector32_1[0], (int)ovector32_1[1],
|
||||
(current->compile_options & PCRE2_CASELESS) ? "C" : ""); */
|
||||
|
||||
/* If F_DIFF is set, just run the test, but do not compare the results.
|
||||
Segfaults can still be captured. */
|
||||
|
||||
is_successful = 1;
|
||||
if (!(current->start_offset & F_DIFF)) {
|
||||
#if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
|
||||
if (!(current->start_offset & F_FORCECONV)) {
|
||||
|
||||
/* All results must be the same. */
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
if ((return_value = return_value8[0]) != return_value8[1]) {
|
||||
printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
|
||||
return_value8[0], return_value8[1], total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
} else
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
if ((return_value = return_value16[0]) != return_value16[1]) {
|
||||
printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
|
||||
return_value16[0], return_value16[1], total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
} else
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
if ((return_value = return_value32[0]) != return_value32[1]) {
|
||||
printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
|
||||
return_value32[0], return_value32[1], total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
} else
|
||||
#endif
|
||||
#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
|
||||
if (return_value8[0] != return_value16[0]) {
|
||||
printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
|
||||
return_value8[0], return_value16[0],
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
} else
|
||||
#endif
|
||||
#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
|
||||
if (return_value8[0] != return_value32[0]) {
|
||||
printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
|
||||
return_value8[0], return_value32[0],
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
} else
|
||||
#endif
|
||||
#if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
|
||||
if (return_value16[0] != return_value32[0]) {
|
||||
printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
|
||||
return_value16[0], return_value32[0],
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
} else
|
||||
#endif
|
||||
if (return_value >= 0 || return_value == PCRE2_ERROR_PARTIAL) {
|
||||
if (return_value == PCRE2_ERROR_PARTIAL) {
|
||||
return_value = 2;
|
||||
} else {
|
||||
return_value *= 2;
|
||||
}
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
return_value8[0] = return_value;
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
return_value16[0] = return_value;
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
return_value32[0] = return_value;
|
||||
#endif
|
||||
/* Transform back the results. */
|
||||
if (current->compile_options & PCRE2_UTF) {
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
for (i = 0; i < return_value; ++i) {
|
||||
if (ovector16_1[i] != PCRE2_UNSET)
|
||||
ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
|
||||
if (ovector16_2[i] != PCRE2_UNSET)
|
||||
ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
|
||||
}
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
for (i = 0; i < return_value; ++i) {
|
||||
if (ovector32_1[i] != PCRE2_UNSET)
|
||||
ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
|
||||
if (ovector32_2[i] != PCRE2_UNSET)
|
||||
ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
for (i = 0; i < return_value; ++i) {
|
||||
#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
|
||||
if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
|
||||
printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
|
||||
i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector16_1[i], (int)ovector16_2[i],
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
#endif
|
||||
#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
|
||||
if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
|
||||
printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
|
||||
i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
#endif
|
||||
#if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
|
||||
if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) {
|
||||
printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
|
||||
i, (int)ovector16_1[i], (int)ovector16_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
} else
|
||||
#endif /* more than one of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16 and SUPPORT_PCRE2_32 */
|
||||
{
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
if (return_value8[0] != return_value8[1]) {
|
||||
printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
||||
return_value8[0], return_value8[1], total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
} else if (return_value8[0] >= 0 || return_value8[0] == PCRE2_ERROR_PARTIAL) {
|
||||
if (return_value8[0] == PCRE2_ERROR_PARTIAL)
|
||||
return_value8[0] = 2;
|
||||
else
|
||||
return_value8[0] *= 2;
|
||||
|
||||
for (i = 0; i < return_value8[0]; ++i)
|
||||
if (ovector8_1[i] != ovector8_2[i]) {
|
||||
printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
||||
i, (int)ovector8_1[i], (int)ovector8_2[i], total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
if (return_value16[0] != return_value16[1]) {
|
||||
printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
||||
return_value16[0], return_value16[1], total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
} else if (return_value16[0] >= 0 || return_value16[0] == PCRE2_ERROR_PARTIAL) {
|
||||
if (return_value16[0] == PCRE2_ERROR_PARTIAL)
|
||||
return_value16[0] = 2;
|
||||
else
|
||||
return_value16[0] *= 2;
|
||||
|
||||
for (i = 0; i < return_value16[0]; ++i)
|
||||
if (ovector16_1[i] != ovector16_2[i]) {
|
||||
printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
||||
i, (int)ovector16_1[i], (int)ovector16_2[i], total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
if (return_value32[0] != return_value32[1]) {
|
||||
printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
||||
return_value32[0], return_value32[1], total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
} else if (return_value32[0] >= 0 || return_value32[0] == PCRE2_ERROR_PARTIAL) {
|
||||
if (return_value32[0] == PCRE2_ERROR_PARTIAL)
|
||||
return_value32[0] = 2;
|
||||
else
|
||||
return_value32[0] *= 2;
|
||||
|
||||
for (i = 0; i < return_value32[0]; ++i)
|
||||
if (ovector32_1[i] != ovector32_2[i]) {
|
||||
printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
||||
i, (int)ovector32_1[i], (int)ovector32_2[i], total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
if (is_successful) {
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
if (!(current->start_offset & F_NO8) && (utf || is_ascii)) {
|
||||
if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
|
||||
printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
|
||||
if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
|
||||
printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
if (!(current->start_offset & F_NO16) && (utf || is_ascii)) {
|
||||
if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
|
||||
printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
|
||||
if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
|
||||
printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
if (!(current->start_offset & F_NO32) && (utf || is_ascii)) {
|
||||
if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
|
||||
printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
|
||||
if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
|
||||
printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (is_successful) {
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
if (re8 && !(current->start_offset & F_NO8) && pcre2_get_mark_8(mdata8_1) != pcre2_get_mark_8(mdata8_2)) {
|
||||
printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
if (re16 && !(current->start_offset & F_NO16) && pcre2_get_mark_16(mdata16_1) != pcre2_get_mark_16(mdata16_2)) {
|
||||
printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
if (re32 && !(current->start_offset & F_NO32) && pcre2_get_mark_32(mdata32_1) != pcre2_get_mark_32(mdata32_2)) {
|
||||
printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
pcre2_code_free_8(re8);
|
||||
pcre2_match_data_free_8(mdata8_1);
|
||||
pcre2_match_data_free_8(mdata8_2);
|
||||
pcre2_match_context_free_8(mcontext8);
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
pcre2_code_free_16(re16);
|
||||
pcre2_match_data_free_16(mdata16_1);
|
||||
pcre2_match_data_free_16(mdata16_2);
|
||||
pcre2_match_context_free_16(mcontext16);
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
pcre2_code_free_32(re32);
|
||||
pcre2_match_data_free_32(mdata32_1);
|
||||
pcre2_match_data_free_32(mdata32_2);
|
||||
pcre2_match_context_free_32(mcontext32);
|
||||
#endif
|
||||
|
||||
if (is_successful) {
|
||||
successful++;
|
||||
successful_row++;
|
||||
printf(".");
|
||||
if (successful_row >= 60) {
|
||||
successful_row = 0;
|
||||
printf("\n");
|
||||
}
|
||||
} else
|
||||
successful_row = 0;
|
||||
|
||||
fflush(stdout);
|
||||
current++;
|
||||
}
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
setstack8(NULL);
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
setstack16(NULL);
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
setstack32(NULL);
|
||||
#endif
|
||||
|
||||
if (total == successful) {
|
||||
printf("\nAll JIT regression tests are successfully passed.\n");
|
||||
return 0;
|
||||
} else {
|
||||
printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined SUPPORT_UNICODE
|
||||
|
||||
static int check_invalid_utf_result(int pattern_index, const char *type, int result,
|
||||
int match_start, int match_end, PCRE2_SIZE *ovector)
|
||||
{
|
||||
if (match_start < 0) {
|
||||
if (result != -1) {
|
||||
printf("Pattern[%d] %s result is not -1.\n", pattern_index, type);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (result <= 0) {
|
||||
printf("Pattern[%d] %s result (%d) is not greater than 0.\n", pattern_index, type, result);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (ovector[0] != (PCRE2_SIZE)match_start) {
|
||||
printf("Pattern[%d] %s ovector[0] is unexpected (%d instead of %d)\n",
|
||||
pattern_index, type, (int)ovector[0], match_start);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (ovector[1] != (PCRE2_SIZE)match_end) {
|
||||
printf("Pattern[%d] %s ovector[1] is unexpected (%d instead of %d)\n",
|
||||
pattern_index, type, (int)ovector[1], match_end);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_8
|
||||
|
||||
#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
|
||||
#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
|
||||
#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
|
||||
|
||||
struct invalid_utf8_regression_test_case {
|
||||
uint32_t compile_options;
|
||||
int jit_compile_options;
|
||||
int start_offset;
|
||||
int skip_left;
|
||||
int skip_right;
|
||||
int match_start;
|
||||
int match_end;
|
||||
const char *pattern[2];
|
||||
const char *input;
|
||||
};
|
||||
|
||||
static const char invalid_utf8_newline_cr;
|
||||
|
||||
static const struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cases[] = {
|
||||
{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf0\x90\x80\x80" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf4\x90\x80\x80" },
|
||||
{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\x7f" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\xc0" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x8f\xbf\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf#" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80#" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80" },
|
||||
{ UDA, CI, 0, 0, 2, -1, -1, { ".", NULL }, "\xef\xbf\xbf#" },
|
||||
{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xef\xbf\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\x7f#" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\xc0" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf#" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xed\x9f\xbf#" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xa0\x80#" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xee\x80\x80#" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xbf\xbf#" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf##" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf#" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80##" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80#" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80##" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0##" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf##" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80###" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8###" },
|
||||
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8" },
|
||||
{ UDA, CI, 0, 0, 0, 0, 1, { ".", NULL }, "\x7f" },
|
||||
|
||||
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80\xf4\xa0\x80\x80" },
|
||||
{ UDA, CPI, 4, 1, 1, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf" },
|
||||
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xef\xbf\xbf#" },
|
||||
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xe0\xa0\x80#" },
|
||||
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf0\x90\x80\x80#" },
|
||||
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf\xf0\x8f\xbf\xbf" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80\xf5\x80\x80\x80" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80\xf4\x90\x80\x80" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff\xf4\x8f\xbf\xff" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf\xf4\x8f\xff\xbf" },
|
||||
{ UDA, CPI, 4, 0, 1, -1, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80\xef\x80\x80" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80\x80\x80\x80\x80" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf\xe0\x9f\xbf#" },
|
||||
{ UDA, CPI, 4, 2, 2, -1, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80\xe0\xa0\x80#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xf0\x80\x80\xf0\x80\x80#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xed\xa0\x80\xed\xa0\x80#" },
|
||||
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xdf\xbf#" },
|
||||
{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xdf\xbf#" },
|
||||
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xc2\x80#" },
|
||||
{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xc2\x80#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xc1\xbf\xc1\xbf##" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xdf\xc0\xdf\xc0##" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80\xe0\x80##" },
|
||||
|
||||
{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xef\xbf\xbf#" },
|
||||
{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xe0\xa0\x80#" },
|
||||
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf\xe0\x9f\xbf" },
|
||||
{ UDA, CPI, 3, 1, 1, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xbf\xef\xbf\xbf" },
|
||||
{ UDA, CPI, 3, 0, 1, -1, -1, { "\\B", "\\b" }, "\xdf\x80\x80\xdf\x80" },
|
||||
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xff\xef\xbf\xff" },
|
||||
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xff\xbf\xef\xff\xbf" },
|
||||
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xed\xbf\xbf\xed\xbf\xbf" },
|
||||
|
||||
{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xdf\xbf#" },
|
||||
{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xc2\x80#" },
|
||||
{ UDA, CPI, 2, 1, 1, -1, -1, { "\\B", "\\b" }, "\xdf\xbf\xdf\xbf" },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xc1\xbf\xc1\xbf" },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x80\xe0\x80" },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xff\xdf\xff" },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xff\xbf\xff\xbf" },
|
||||
|
||||
{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x7f#" },
|
||||
{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x01#" },
|
||||
{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80" },
|
||||
{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\xb0\xb0" },
|
||||
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "a\xff" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "\xc2\x80\x80" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 6, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 8, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
|
||||
|
||||
{ UDA, CPI, 0, 0, 0, 0, 1, { "\\X", NULL }, "A" },
|
||||
{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xff" },
|
||||
{ UDA, CPI, 0, 0, 0, 0, 2, { "\\X", NULL }, "\xc3\xa1" },
|
||||
{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xc3\xa1" },
|
||||
{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xc3\x7f" },
|
||||
{ UDA, CPI, 0, 0, 0, 0, 3, { "\\X", NULL }, "\xe1\xbd\xb8" },
|
||||
{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xe1\xbd\xb8" },
|
||||
{ UDA, CPI, 0, 0, 0, 0, 4, { "\\X", NULL }, "\xf0\x90\x90\x80" },
|
||||
{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xf0\x90\x90\x80" },
|
||||
|
||||
{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "#" },
|
||||
{ UDA, CPI, 0, 0, 0, 0, 4, { "[^#]", NULL }, "\xf4\x8f\xbf\xbf" },
|
||||
{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xf4\x90\x80\x80" },
|
||||
{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xc1\x80" },
|
||||
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { "^\\W", NULL }, " \x0a#"},
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 14, 15, { "^\\W", NULL }, " \xc0\x8a#\xe0\x80\x8a#\xf0\x80\x80\x8a#\x0a#"},
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf8\x0a#"},
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xc3\x0a#"},
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf1\x0a#"},
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xf2\xbf\x0a#"},
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \xf2\xbf\xbf\x0a#"},
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xef\x0a#"},
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xef\xbf\x0a#"},
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \x85#\xc2\x85#"},
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 7, 8, { "^\\W", NULL }, " \xe2\x80\xf8\xe2\x80\xa8#"},
|
||||
|
||||
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xe2\x80\xf8\xe2\x80\xa8#"},
|
||||
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 3, 4, { "#", NULL }, "\xe2\x80\xf8#\xe2\x80\xa8#"},
|
||||
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "abcd\xc2\x85#"},
|
||||
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 1, 2, { "#", NULL }, "\x85#\xc2\x85#"},
|
||||
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 5, 6, { "#", NULL }, "\xef,\x80,\xf8#\x0a"},
|
||||
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xef,\x80,\xf8\x0a#"},
|
||||
|
||||
{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
|
||||
{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
|
||||
{ PCRE2_UTF, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
|
||||
{ PCRE2_UTF, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
|
||||
|
||||
{ PCRE2_UTF | PCRE2_UCP, CI, 0, 0, 0, -1, -1, { "[\\s]", NULL }, "\xed\xa0\x80" },
|
||||
{ PCRE2_UTF, CI, 0, 0, 0, 0, 3, { "[\\D]", NULL }, "\xe0\xab\xaa@" },
|
||||
{ PCRE2_UTF, CI, 0, 0, 0, 0, 3, { "\\D+", NULL }, "n\xc3\xb1" },
|
||||
{ PCRE2_UTF, CI, 0, 0, 0, 0, 5, { "\\W+", NULL }, "@\xf0\x9d\x84\x9e" },
|
||||
|
||||
/* These two are not invalid UTF tests, but this infrastructure fits better for them. */
|
||||
{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\X{2}", NULL }, "\r\n\n" },
|
||||
{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\R{2}", NULL }, "\r\n\n" },
|
||||
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 0, 0, 0, -1, -1, { "^.a", &invalid_utf8_newline_cr }, "\xc3\xa7#a" },
|
||||
|
||||
{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
|
||||
};
|
||||
|
||||
#undef UDA
|
||||
#undef CI
|
||||
#undef CPI
|
||||
|
||||
static int run_invalid_utf8_test(const struct invalid_utf8_regression_test_case *current,
|
||||
int pattern_index, int i, pcre2_compile_context_8 *ccontext, pcre2_match_data_8 *mdata)
|
||||
{
|
||||
pcre2_code_8 *code;
|
||||
int result, errorcode;
|
||||
PCRE2_SIZE length, erroroffset;
|
||||
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_8(mdata);
|
||||
|
||||
if (current->pattern[i] == NULL)
|
||||
return 1;
|
||||
|
||||
code = pcre2_compile_8((PCRE2_UCHAR8*)current->pattern[i], PCRE2_ZERO_TERMINATED,
|
||||
current->compile_options, &errorcode, &erroroffset, ccontext);
|
||||
|
||||
if (!code) {
|
||||
printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (pcre2_jit_compile_8(code, current->jit_compile_options) != 0) {
|
||||
printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
|
||||
pcre2_code_free_8(code);
|
||||
return 0;
|
||||
}
|
||||
|
||||
length = (PCRE2_SIZE)(strlen(current->input) - current->skip_left - current->skip_right);
|
||||
|
||||
if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
|
||||
result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
|
||||
length, current->start_offset - current->skip_left, 0, mdata, NULL);
|
||||
|
||||
if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
|
||||
pcre2_code_free_8(code);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
|
||||
result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
|
||||
length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
|
||||
|
||||
if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
|
||||
pcre2_code_free_8(code);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
pcre2_code_free_8(code);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int invalid_utf8_regression_tests(void)
|
||||
{
|
||||
const struct invalid_utf8_regression_test_case *current;
|
||||
pcre2_compile_context_8 *ccontext;
|
||||
pcre2_match_data_8 *mdata;
|
||||
int total = 0, successful = 0;
|
||||
int result;
|
||||
|
||||
printf("\nRunning invalid-utf8 JIT regression tests\n");
|
||||
|
||||
ccontext = pcre2_compile_context_create_8(NULL);
|
||||
pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
|
||||
mdata = pcre2_match_data_create_8(4, NULL);
|
||||
|
||||
for (current = invalid_utf8_regression_test_cases; current->pattern[0]; current++) {
|
||||
/* printf("\nPattern: %s :\n", current->pattern); */
|
||||
total++;
|
||||
|
||||
result = 1;
|
||||
if (current->pattern[1] != &invalid_utf8_newline_cr)
|
||||
{
|
||||
if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
|
||||
result = 0;
|
||||
if (!run_invalid_utf8_test(current, total - 1, 1, ccontext, mdata))
|
||||
result = 0;
|
||||
} else {
|
||||
pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_CR);
|
||||
if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
|
||||
result = 0;
|
||||
pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
|
||||
}
|
||||
|
||||
if (result) {
|
||||
successful++;
|
||||
}
|
||||
|
||||
printf(".");
|
||||
if ((total % 60) == 0)
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
if ((total % 60) != 0)
|
||||
printf("\n");
|
||||
|
||||
pcre2_match_data_free_8(mdata);
|
||||
pcre2_compile_context_free_8(ccontext);
|
||||
|
||||
if (total == successful) {
|
||||
printf("\nAll invalid UTF8 JIT regression tests are successfully passed.\n");
|
||||
return 0;
|
||||
} else {
|
||||
printf("\nInvalid UTF8 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_8 */
|
||||
|
||||
static int invalid_utf8_regression_tests(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_8 */
|
||||
|
||||
#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_16
|
||||
|
||||
#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
|
||||
#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
|
||||
#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
|
||||
|
||||
struct invalid_utf16_regression_test_case {
|
||||
uint32_t compile_options;
|
||||
int jit_compile_options;
|
||||
int start_offset;
|
||||
int skip_left;
|
||||
int skip_right;
|
||||
int match_start;
|
||||
int match_end;
|
||||
const PCRE2_UCHAR16 *pattern[2];
|
||||
const PCRE2_UCHAR16 *input;
|
||||
};
|
||||
|
||||
static PCRE2_UCHAR16 allany16[] = { '.', 0 };
|
||||
static PCRE2_UCHAR16 non_word_boundary16[] = { '\\', 'B', 0 };
|
||||
static PCRE2_UCHAR16 word_boundary16[] = { '\\', 'b', 0 };
|
||||
static PCRE2_UCHAR16 backreference16[] = { '(', '.', ')', '\\', '1', 0 };
|
||||
static PCRE2_UCHAR16 grapheme16[] = { '\\', 'X', 0 };
|
||||
static PCRE2_UCHAR16 nothashmark16[] = { '[', '^', '#', ']', 0 };
|
||||
static PCRE2_UCHAR16 afternl16[] = { '^', '\\', 'W', 0 };
|
||||
static PCRE2_UCHAR16 generic16[] = { '#', 0xd800, 0xdc00, '#', 0 };
|
||||
static PCRE2_UCHAR16 test16_1[] = { 0xd7ff, 0xe000, 0xffff, 0x01, '#', 0 };
|
||||
static PCRE2_UCHAR16 test16_2[] = { 0xd800, 0xdc00, 0xd800, 0xdc00, 0 };
|
||||
static PCRE2_UCHAR16 test16_3[] = { 0xdbff, 0xdfff, 0xdbff, 0xdfff, 0 };
|
||||
static PCRE2_UCHAR16 test16_4[] = { 0xd800, 0xdbff, 0xd800, 0xdbff, 0 };
|
||||
static PCRE2_UCHAR16 test16_5[] = { '#', 0xd800, 0xdc00, '#', 0 };
|
||||
static PCRE2_UCHAR16 test16_6[] = { 'a', 'A', 0xdc28, 0 };
|
||||
static PCRE2_UCHAR16 test16_7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 };
|
||||
static PCRE2_UCHAR16 test16_8[] = { '#', 0xd800, 0xdc00, 0 };
|
||||
static PCRE2_UCHAR16 test16_9[] = { ' ', 0x2028, '#', 0 };
|
||||
static PCRE2_UCHAR16 test16_10[] = { ' ', 0xdc00, 0xd800, 0x2028, '#', 0 };
|
||||
static PCRE2_UCHAR16 test16_11[] = { 0xdc00, 0xdc00, 0xd800, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
|
||||
static PCRE2_UCHAR16 test16_12[] = { '#', 0xd800, 0xdc00, 0xd800, '#', 0xd800, 0xdc00, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
|
||||
|
||||
static const struct invalid_utf16_regression_test_case invalid_utf16_regression_test_cases[] = {
|
||||
{ UDA, CI, 0, 0, 0, 0, 1, { allany16, NULL }, test16_1 },
|
||||
{ UDA, CI, 1, 0, 0, 1, 2, { allany16, NULL }, test16_1 },
|
||||
{ UDA, CI, 2, 0, 0, 2, 3, { allany16, NULL }, test16_1 },
|
||||
{ UDA, CI, 3, 0, 0, 3, 4, { allany16, NULL }, test16_1 },
|
||||
{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_2 },
|
||||
{ UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_2 },
|
||||
{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_2 },
|
||||
{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_3 },
|
||||
{ UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_3 },
|
||||
{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_3 },
|
||||
|
||||
{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary16, NULL }, test16_1 },
|
||||
{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_1 },
|
||||
{ UDA, CPI, 3, 0, 0, 3, 3, { non_word_boundary16, NULL }, test16_1 },
|
||||
{ UDA, CPI, 4, 0, 0, 4, 4, { non_word_boundary16, NULL }, test16_1 },
|
||||
{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_2 },
|
||||
{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_3 },
|
||||
{ UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_2 },
|
||||
{ UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_3 },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_4 },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_5 },
|
||||
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference16, NULL }, test16_6 },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference16, NULL }, test16_6 },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { backreference16, NULL }, test16_7 },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { backreference16, NULL }, test16_7 },
|
||||
|
||||
{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme16, NULL }, test16_6 },
|
||||
{ UDA, CPI, 1, 0, 0, 1, 2, { grapheme16, NULL }, test16_6 },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme16, NULL }, test16_6 },
|
||||
{ UDA, CPI, 0, 0, 0, 0, 2, { grapheme16, NULL }, test16_7 },
|
||||
{ UDA, CPI, 2, 0, 0, 2, 4, { grapheme16, NULL }, test16_7 },
|
||||
{ UDA, CPI, 1, 0, 0, -1, -1, { grapheme16, NULL }, test16_7 },
|
||||
|
||||
{ UDA, CPI, 0, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
|
||||
{ UDA, CPI, 1, 0, 0, 1, 3, { nothashmark16, NULL }, test16_8 },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
|
||||
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl16, NULL }, test16_9 },
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { afternl16, NULL }, test16_10 },
|
||||
|
||||
{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
|
||||
{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
|
||||
{ PCRE2_UTF, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
|
||||
{ PCRE2_UTF, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
|
||||
|
||||
{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
|
||||
};
|
||||
|
||||
#undef UDA
|
||||
#undef CI
|
||||
#undef CPI
|
||||
|
||||
static int run_invalid_utf16_test(const struct invalid_utf16_regression_test_case *current,
|
||||
int pattern_index, int i, pcre2_compile_context_16 *ccontext, pcre2_match_data_16 *mdata)
|
||||
{
|
||||
pcre2_code_16 *code;
|
||||
int result, errorcode;
|
||||
PCRE2_SIZE length, erroroffset;
|
||||
const PCRE2_UCHAR16 *input;
|
||||
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(mdata);
|
||||
|
||||
if (current->pattern[i] == NULL)
|
||||
return 1;
|
||||
|
||||
code = pcre2_compile_16(current->pattern[i], PCRE2_ZERO_TERMINATED,
|
||||
current->compile_options, &errorcode, &erroroffset, ccontext);
|
||||
|
||||
if (!code) {
|
||||
printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (pcre2_jit_compile_16(code, current->jit_compile_options) != 0) {
|
||||
printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
|
||||
pcre2_code_free_16(code);
|
||||
return 0;
|
||||
}
|
||||
|
||||
input = current->input;
|
||||
length = 0;
|
||||
|
||||
while (*input++ != 0)
|
||||
length++;
|
||||
|
||||
length -= current->skip_left + current->skip_right;
|
||||
|
||||
if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
|
||||
result = pcre2_jit_match_16(code, (current->input + current->skip_left),
|
||||
length, current->start_offset - current->skip_left, 0, mdata, NULL);
|
||||
|
||||
if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
|
||||
pcre2_code_free_16(code);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
|
||||
result = pcre2_jit_match_16(code, (current->input + current->skip_left),
|
||||
length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
|
||||
|
||||
if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
|
||||
pcre2_code_free_16(code);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
pcre2_code_free_16(code);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int invalid_utf16_regression_tests(void)
|
||||
{
|
||||
const struct invalid_utf16_regression_test_case *current;
|
||||
pcre2_compile_context_16 *ccontext;
|
||||
pcre2_match_data_16 *mdata;
|
||||
int total = 0, successful = 0;
|
||||
int result;
|
||||
|
||||
printf("\nRunning invalid-utf16 JIT regression tests\n");
|
||||
|
||||
ccontext = pcre2_compile_context_create_16(NULL);
|
||||
pcre2_set_newline_16(ccontext, PCRE2_NEWLINE_ANY);
|
||||
mdata = pcre2_match_data_create_16(4, NULL);
|
||||
|
||||
for (current = invalid_utf16_regression_test_cases; current->pattern[0]; current++) {
|
||||
/* printf("\nPattern: %s :\n", current->pattern); */
|
||||
total++;
|
||||
|
||||
result = 1;
|
||||
if (!run_invalid_utf16_test(current, total - 1, 0, ccontext, mdata))
|
||||
result = 0;
|
||||
if (!run_invalid_utf16_test(current, total - 1, 1, ccontext, mdata))
|
||||
result = 0;
|
||||
|
||||
if (result) {
|
||||
successful++;
|
||||
}
|
||||
|
||||
printf(".");
|
||||
if ((total % 60) == 0)
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
if ((total % 60) != 0)
|
||||
printf("\n");
|
||||
|
||||
pcre2_match_data_free_16(mdata);
|
||||
pcre2_compile_context_free_16(ccontext);
|
||||
|
||||
if (total == successful) {
|
||||
printf("\nAll invalid UTF16 JIT regression tests are successfully passed.\n");
|
||||
return 0;
|
||||
} else {
|
||||
printf("\nInvalid UTF16 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_16 */
|
||||
|
||||
static int invalid_utf16_regression_tests(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_16 */
|
||||
|
||||
#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_32
|
||||
|
||||
#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
|
||||
#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
|
||||
#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
|
||||
|
||||
struct invalid_utf32_regression_test_case {
|
||||
uint32_t compile_options;
|
||||
int jit_compile_options;
|
||||
int start_offset;
|
||||
int skip_left;
|
||||
int skip_right;
|
||||
int match_start;
|
||||
int match_end;
|
||||
const PCRE2_UCHAR32 *pattern[2];
|
||||
const PCRE2_UCHAR32 *input;
|
||||
};
|
||||
|
||||
static PCRE2_UCHAR32 allany32[] = { '.', 0 };
|
||||
static PCRE2_UCHAR32 non_word_boundary32[] = { '\\', 'B', 0 };
|
||||
static PCRE2_UCHAR32 word_boundary32[] = { '\\', 'b', 0 };
|
||||
static PCRE2_UCHAR32 backreference32[] = { '(', '.', ')', '\\', '1', 0 };
|
||||
static PCRE2_UCHAR32 grapheme32[] = { '\\', 'X', 0 };
|
||||
static PCRE2_UCHAR32 nothashmark32[] = { '[', '^', '#', ']', 0 };
|
||||
static PCRE2_UCHAR32 afternl32[] = { '^', '\\', 'W', 0 };
|
||||
static PCRE2_UCHAR32 test32_1[] = { 0x10ffff, 0x10ffff, 0x110000, 0x110000, 0x10ffff, 0 };
|
||||
static PCRE2_UCHAR32 test32_2[] = { 0xd7ff, 0xe000, 0xd800, 0xdfff, 0xe000, 0xdfff, 0xd800, 0 };
|
||||
static PCRE2_UCHAR32 test32_3[] = { 'a', 'A', 0x110000, 0 };
|
||||
static PCRE2_UCHAR32 test32_4[] = { '#', 0x10ffff, 0x110000, 0 };
|
||||
static PCRE2_UCHAR32 test32_5[] = { ' ', 0x2028, '#', 0 };
|
||||
static PCRE2_UCHAR32 test32_6[] = { ' ', 0x110000, 0x2028, '#', 0 };
|
||||
|
||||
static const struct invalid_utf32_regression_test_case invalid_utf32_regression_test_cases[] = {
|
||||
{ UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_1 },
|
||||
{ UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_1 },
|
||||
{ UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_2 },
|
||||
{ UDA, CI, 1, 0, 0, 1, 2, { allany32, NULL }, test32_2 },
|
||||
{ UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
|
||||
{ UDA, CI, 3, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
|
||||
|
||||
{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_1 },
|
||||
{ UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 },
|
||||
{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_2 },
|
||||
{ UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
|
||||
{ UDA, CPI, 6, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
|
||||
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference32, NULL }, test32_3 },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference32, NULL }, test32_3 },
|
||||
|
||||
{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme32, NULL }, test32_1 },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_1 },
|
||||
{ UDA, CPI, 1, 0, 0, 1, 2, { grapheme32, NULL }, test32_2 },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
|
||||
{ UDA, CPI, 3, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
|
||||
{ UDA, CPI, 4, 0, 0, 4, 5, { grapheme32, NULL }, test32_2 },
|
||||
|
||||
{ UDA, CPI, 0, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
|
||||
{ UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_4 },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
|
||||
{ UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_2 },
|
||||
{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_2 },
|
||||
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl32, NULL }, test32_5 },
|
||||
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { afternl32, NULL }, test32_6 },
|
||||
|
||||
{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
|
||||
};
|
||||
|
||||
#undef UDA
|
||||
#undef CI
|
||||
#undef CPI
|
||||
|
||||
static int run_invalid_utf32_test(const struct invalid_utf32_regression_test_case *current,
|
||||
int pattern_index, int i, pcre2_compile_context_32 *ccontext, pcre2_match_data_32 *mdata)
|
||||
{
|
||||
pcre2_code_32 *code;
|
||||
int result, errorcode;
|
||||
PCRE2_SIZE length, erroroffset;
|
||||
const PCRE2_UCHAR32 *input;
|
||||
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_32(mdata);
|
||||
|
||||
if (current->pattern[i] == NULL)
|
||||
return 1;
|
||||
|
||||
code = pcre2_compile_32(current->pattern[i], PCRE2_ZERO_TERMINATED,
|
||||
current->compile_options, &errorcode, &erroroffset, ccontext);
|
||||
|
||||
if (!code) {
|
||||
printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (pcre2_jit_compile_32(code, current->jit_compile_options) != 0) {
|
||||
printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
|
||||
pcre2_code_free_32(code);
|
||||
return 0;
|
||||
}
|
||||
|
||||
input = current->input;
|
||||
length = 0;
|
||||
|
||||
while (*input++ != 0)
|
||||
length++;
|
||||
|
||||
length -= current->skip_left + current->skip_right;
|
||||
|
||||
if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
|
||||
result = pcre2_jit_match_32(code, (current->input + current->skip_left),
|
||||
length, current->start_offset - current->skip_left, 0, mdata, NULL);
|
||||
|
||||
if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
|
||||
pcre2_code_free_32(code);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
|
||||
result = pcre2_jit_match_32(code, (current->input + current->skip_left),
|
||||
length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
|
||||
|
||||
if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
|
||||
pcre2_code_free_32(code);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
pcre2_code_free_32(code);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int invalid_utf32_regression_tests(void)
|
||||
{
|
||||
const struct invalid_utf32_regression_test_case *current;
|
||||
pcre2_compile_context_32 *ccontext;
|
||||
pcre2_match_data_32 *mdata;
|
||||
int total = 0, successful = 0;
|
||||
int result;
|
||||
|
||||
printf("\nRunning invalid-utf32 JIT regression tests\n");
|
||||
|
||||
ccontext = pcre2_compile_context_create_32(NULL);
|
||||
pcre2_set_newline_32(ccontext, PCRE2_NEWLINE_ANY);
|
||||
mdata = pcre2_match_data_create_32(4, NULL);
|
||||
|
||||
for (current = invalid_utf32_regression_test_cases; current->pattern[0]; current++) {
|
||||
/* printf("\nPattern: %s :\n", current->pattern); */
|
||||
total++;
|
||||
|
||||
result = 1;
|
||||
if (!run_invalid_utf32_test(current, total - 1, 0, ccontext, mdata))
|
||||
result = 0;
|
||||
if (!run_invalid_utf32_test(current, total - 1, 1, ccontext, mdata))
|
||||
result = 0;
|
||||
|
||||
if (result) {
|
||||
successful++;
|
||||
}
|
||||
|
||||
printf(".");
|
||||
if ((total % 60) == 0)
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
if ((total % 60) != 0)
|
||||
printf("\n");
|
||||
|
||||
pcre2_match_data_free_32(mdata);
|
||||
pcre2_compile_context_free_32(ccontext);
|
||||
|
||||
if (total == successful) {
|
||||
printf("\nAll invalid UTF32 JIT regression tests are successfully passed.\n");
|
||||
return 0;
|
||||
} else {
|
||||
printf("\nInvalid UTF32 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_32 */
|
||||
|
||||
static int invalid_utf32_regression_tests(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_32 */
|
||||
|
||||
/* End of pcre2_jit_test.c */
|
||||
165
3rd/pcre2/src/pcre2_maketables.c
Normal file
165
3rd/pcre2/src/pcre2_maketables.c
Normal file
@@ -0,0 +1,165 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre2_maketables(), which builds
|
||||
character tables for PCRE2 in the current locale. The file is compiled on its
|
||||
own as part of the PCRE2 library. It is also included in the compilation of
|
||||
pcre2_dftables.c as a freestanding program, in which case the macro
|
||||
PCRE2_DFTABLES is defined. */
|
||||
|
||||
#ifndef PCRE2_DFTABLES /* Compiling the library */
|
||||
# ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
# endif
|
||||
# include "pcre2_internal.h"
|
||||
#endif
|
||||
|
||||
/*************************************************
|
||||
* Create PCRE2 character tables *
|
||||
*************************************************/
|
||||
|
||||
/* This function builds a set of character tables for use by PCRE2 and returns
|
||||
a pointer to them. They are build using the ctype functions, and consequently
|
||||
their contents will depend upon the current locale setting. When compiled as
|
||||
part of the library, the store is obtained via a general context malloc, if
|
||||
supplied, but when PCRE2_DFTABLES is defined (when compiling the pcre2_dftables
|
||||
freestanding auxiliary program) malloc() is used, and the function has a
|
||||
different name so as not to clash with the prototype in pcre2.h.
|
||||
|
||||
Arguments: none when PCRE2_DFTABLES is defined
|
||||
else a PCRE2 general context or NULL
|
||||
Returns: pointer to the contiguous block of data
|
||||
else NULL if memory allocation failed
|
||||
*/
|
||||
|
||||
#ifdef PCRE2_DFTABLES /* Included in freestanding pcre2_dftables program */
|
||||
static const uint8_t *maketables(void)
|
||||
{
|
||||
uint8_t *yield = (uint8_t *)malloc(TABLES_LENGTH);
|
||||
|
||||
#else /* Not PCRE2_DFTABLES, that is, compiling the library */
|
||||
PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION
|
||||
pcre2_maketables(pcre2_general_context *gcontext)
|
||||
{
|
||||
uint8_t *yield = (uint8_t *)((gcontext != NULL)?
|
||||
gcontext->memctl.malloc(TABLES_LENGTH, gcontext->memctl.memory_data) :
|
||||
malloc(TABLES_LENGTH));
|
||||
#endif /* PCRE2_DFTABLES */
|
||||
|
||||
int i;
|
||||
uint8_t *p;
|
||||
|
||||
if (yield == NULL) return NULL;
|
||||
p = yield;
|
||||
|
||||
/* First comes the lower casing table */
|
||||
|
||||
for (i = 0; i < 256; i++) *p++ = tolower(i);
|
||||
|
||||
/* Next the case-flipping table */
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
int c = islower(i)? toupper(i) : tolower(i);
|
||||
*p++ = (c < 256)? c : i;
|
||||
}
|
||||
|
||||
/* Then the character class tables. Don't try to be clever and save effort on
|
||||
exclusive ones - in some locales things may be different.
|
||||
|
||||
Note that the table for "space" includes everything "isspace" gives, including
|
||||
VT in the default locale. This makes it work for the POSIX class [:space:].
|
||||
From PCRE1 release 8.34 and for all PCRE2 releases it is also correct for Perl
|
||||
space, because Perl added VT at release 5.18.
|
||||
|
||||
Note also that it is possible for a character to be alnum or alpha without
|
||||
being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
|
||||
fr_FR locale (at least under Debian Linux's locales as of 12/2005). So we must
|
||||
test for alnum specially. */
|
||||
|
||||
memset(p, 0, cbit_length);
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if (isdigit(i)) p[cbit_digit + i/8] |= 1u << (i&7);
|
||||
if (isupper(i)) p[cbit_upper + i/8] |= 1u << (i&7);
|
||||
if (islower(i)) p[cbit_lower + i/8] |= 1u << (i&7);
|
||||
if (isalnum(i)) p[cbit_word + i/8] |= 1u << (i&7);
|
||||
if (i == '_') p[cbit_word + i/8] |= 1u << (i&7);
|
||||
if (isspace(i)) p[cbit_space + i/8] |= 1u << (i&7);
|
||||
if (isxdigit(i)) p[cbit_xdigit + i/8] |= 1u << (i&7);
|
||||
if (isgraph(i)) p[cbit_graph + i/8] |= 1u << (i&7);
|
||||
if (isprint(i)) p[cbit_print + i/8] |= 1u << (i&7);
|
||||
if (ispunct(i)) p[cbit_punct + i/8] |= 1u << (i&7);
|
||||
if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1u << (i&7);
|
||||
}
|
||||
p += cbit_length;
|
||||
|
||||
/* Finally, the character type table. In this, we used to exclude VT from the
|
||||
white space chars, because Perl didn't recognize it as such for \s and for
|
||||
comments within regexes. However, Perl changed at release 5.18, so PCRE1
|
||||
changed at release 8.34 and it's always been this way for PCRE2. */
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
int x = 0;
|
||||
if (isspace(i)) x += ctype_space;
|
||||
if (isalpha(i)) x += ctype_letter;
|
||||
if (islower(i)) x += ctype_lcletter;
|
||||
if (isdigit(i)) x += ctype_digit;
|
||||
if (isalnum(i) || i == '_') x += ctype_word;
|
||||
*p++ = x;
|
||||
}
|
||||
|
||||
return yield;
|
||||
}
|
||||
|
||||
#ifndef PCRE2_DFTABLES /* Compiling the library */
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables)
|
||||
{
|
||||
if (gcontext != NULL)
|
||||
gcontext->memctl.free((void *)tables, gcontext->memctl.memory_data);
|
||||
else
|
||||
free((void *)tables);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* End of pcre2_maketables.c */
|
||||
8080
3rd/pcre2/src/pcre2_match.c
Normal file
8080
3rd/pcre2/src/pcre2_match.c
Normal file
File diff suppressed because it is too large
Load Diff
187
3rd/pcre2/src/pcre2_match_data.c
Normal file
187
3rd/pcre2/src/pcre2_match_data.c
Normal file
@@ -0,0 +1,187 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create a match data block given ovector size *
|
||||
*************************************************/
|
||||
|
||||
/* A minimum of 1 is imposed on the number of ovector pairs. A maximum is also
|
||||
imposed because the oveccount field in a match data block is uintt6_t. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_match_data *yield;
|
||||
if (oveccount < 1) oveccount = 1;
|
||||
if (oveccount > UINT16_MAX) oveccount = UINT16_MAX;
|
||||
yield = PRIV(memctl_malloc)(
|
||||
offsetof(pcre2_match_data, ovector) + 2*oveccount*sizeof(PCRE2_SIZE),
|
||||
(pcre2_memctl *)gcontext);
|
||||
if (yield == NULL) return NULL;
|
||||
yield->oveccount = oveccount;
|
||||
yield->flags = 0;
|
||||
yield->heapframes = NULL;
|
||||
yield->heapframes_size = 0;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create a match data block using pattern data *
|
||||
*************************************************/
|
||||
|
||||
/* If no context is supplied, use the memory allocator from the code. This code
|
||||
assumes that a general context contains nothing other than a memory allocator.
|
||||
If that ever changes, this code will need fixing. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_data_create_from_pattern(const pcre2_code *code,
|
||||
pcre2_general_context *gcontext)
|
||||
{
|
||||
if (gcontext == NULL) gcontext = (pcre2_general_context *)code;
|
||||
return pcre2_match_data_create(((const pcre2_real_code *)code)->top_bracket + 1,
|
||||
gcontext);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free a match data block *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_match_data_free(pcre2_match_data *match_data)
|
||||
{
|
||||
if (match_data != NULL)
|
||||
{
|
||||
if (match_data->heapframes != NULL)
|
||||
match_data->memctl.free(match_data->heapframes,
|
||||
match_data->memctl.memory_data);
|
||||
if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
|
||||
match_data->memctl.free((void *)match_data->subject,
|
||||
match_data->memctl.memory_data);
|
||||
match_data->memctl.free(match_data, match_data->memctl.memory_data);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get last mark in match *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_SPTR PCRE2_CALL_CONVENTION
|
||||
pcre2_get_mark(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->mark;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get pointer to ovector *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_SIZE * PCRE2_CALL_CONVENTION
|
||||
pcre2_get_ovector_pointer(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->ovector;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get number of ovector slots *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN uint32_t PCRE2_CALL_CONVENTION
|
||||
pcre2_get_ovector_count(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->oveccount;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get starting code unit in match *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
|
||||
pcre2_get_startchar(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->startchar;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get size of match data block *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
|
||||
pcre2_get_match_data_size(pcre2_match_data *match_data)
|
||||
{
|
||||
return offsetof(pcre2_match_data, ovector) +
|
||||
2 * (match_data->oveccount) * sizeof(PCRE2_SIZE);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get heapframes size *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
|
||||
pcre2_get_match_data_heapframes_size(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->heapframes_size;
|
||||
}
|
||||
|
||||
/* End of pcre2_match_data.c */
|
||||
243
3rd/pcre2/src/pcre2_newline.c
Normal file
243
3rd/pcre2/src/pcre2_newline.c
Normal file
@@ -0,0 +1,243 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains internal functions for testing newlines when more than
|
||||
one kind of newline is to be recognized. When a newline is found, its length is
|
||||
returned. In principle, we could implement several newline "types", each
|
||||
referring to a different set of newline characters. At present, PCRE2 supports
|
||||
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
|
||||
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
|
||||
http://unicode.org/unicode/reports/tr18/. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check for newline at given position *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called only via the IS_NEWLINE macro, which does so only
|
||||
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
|
||||
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit
|
||||
pointed to by ptr is less than the end of the string.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
type the newline type
|
||||
endptr pointer to the end of the string
|
||||
lenptr where to return the length
|
||||
utf TRUE if in utf mode
|
||||
|
||||
Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
|
||||
uint32_t *lenptr, BOOL utf)
|
||||
{
|
||||
uint32_t c;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) { GETCHAR(c, ptr); } else c = *ptr;
|
||||
#else
|
||||
(void)utf;
|
||||
c = *ptr;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case CHAR_LF:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
case CHAR_CR:
|
||||
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* NLTYPE_ANY */
|
||||
|
||||
else switch(c)
|
||||
{
|
||||
#ifdef EBCDIC
|
||||
case CHAR_NEL:
|
||||
#endif
|
||||
case CHAR_LF:
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
case CHAR_CR:
|
||||
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
#ifndef EBCDIC
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
case CHAR_NEL:
|
||||
*lenptr = utf? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 3;
|
||||
return TRUE;
|
||||
|
||||
#else /* 16-bit or 32-bit code units */
|
||||
case CHAR_NEL:
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
#endif
|
||||
#endif /* Not EBCDIC */
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check for newline at previous position *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called only via the WAS_NEWLINE macro, which does so only
|
||||
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
|
||||
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the initial
|
||||
value of ptr is greater than the start of the string that is being processed.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
type the newline type
|
||||
startptr pointer to the start of the string
|
||||
lenptr where to return the length
|
||||
utf TRUE if in utf mode
|
||||
|
||||
Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
|
||||
uint32_t *lenptr, BOOL utf)
|
||||
{
|
||||
uint32_t c;
|
||||
ptr--;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
BACKCHAR(ptr);
|
||||
GETCHAR(c, ptr);
|
||||
}
|
||||
else c = *ptr;
|
||||
#else
|
||||
(void)utf;
|
||||
c = *ptr;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case CHAR_LF:
|
||||
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
case CHAR_CR:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* NLTYPE_ANY */
|
||||
|
||||
else switch(c)
|
||||
{
|
||||
case CHAR_LF:
|
||||
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
#ifdef EBCDIC
|
||||
case CHAR_NEL:
|
||||
#endif
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
case CHAR_CR:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
#ifndef EBCDIC
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
case CHAR_NEL:
|
||||
*lenptr = utf? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 3;
|
||||
return TRUE;
|
||||
|
||||
#else /* 16-bit or 32-bit code units */
|
||||
case CHAR_NEL:
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
#endif
|
||||
#endif /* Not EBCDIC */
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_newline.c */
|
||||
120
3rd/pcre2/src/pcre2_ord2utf.c
Normal file
120
3rd/pcre2/src/pcre2_ord2utf.c
Normal file
@@ -0,0 +1,120 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This file contains a function that converts a Unicode character code point
|
||||
into a UTF string. The behaviour is different for each code unit width. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/* If SUPPORT_UNICODE is not defined, this function will never be called.
|
||||
Supply a dummy function because some compilers do not like empty source
|
||||
modules. */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
unsigned int
|
||||
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
|
||||
{
|
||||
(void)(cvalue);
|
||||
(void)(buffer);
|
||||
return 0;
|
||||
}
|
||||
#else /* SUPPORT_UNICODE */
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Convert code point to UTF *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
cvalue the character value
|
||||
buffer pointer to buffer for result
|
||||
|
||||
Returns: number of code units placed in the buffer
|
||||
*/
|
||||
|
||||
unsigned int
|
||||
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
|
||||
{
|
||||
/* Convert to UTF-8 */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
int i, j;
|
||||
for (i = 0; i < PRIV(utf8_table1_size); i++)
|
||||
if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
|
||||
buffer += i;
|
||||
for (j = i; j > 0; j--)
|
||||
{
|
||||
*buffer-- = 0x80 | (cvalue & 0x3f);
|
||||
cvalue >>= 6;
|
||||
}
|
||||
*buffer = PRIV(utf8_table2)[i] | cvalue;
|
||||
return i + 1;
|
||||
|
||||
/* Convert to UTF-16 */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
if (cvalue <= 0xffff)
|
||||
{
|
||||
*buffer = (PCRE2_UCHAR)cvalue;
|
||||
return 1;
|
||||
}
|
||||
cvalue -= 0x10000;
|
||||
*buffer++ = 0xd800 | (cvalue >> 10);
|
||||
*buffer = 0xdc00 | (cvalue & 0x3ff);
|
||||
return 2;
|
||||
|
||||
/* Convert to UTF-32 */
|
||||
|
||||
#else
|
||||
*buffer = (PCRE2_UCHAR)cvalue;
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* End of pcre2_ord2utf.c */
|
||||
434
3rd/pcre2/src/pcre2_pattern_info.c
Normal file
434
3rd/pcre2/src/pcre2_pattern_info.c
Normal file
@@ -0,0 +1,434 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return info about compiled pattern *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
code points to compiled code
|
||||
what what information is required
|
||||
where where to put the information; if NULL, return length
|
||||
|
||||
Returns: 0 when data returned
|
||||
> 0 when length requested
|
||||
< 0 on error or unset value
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
|
||||
{
|
||||
const pcre2_real_code *re = (const pcre2_real_code *)code;
|
||||
|
||||
if (where == NULL) /* Requests field length */
|
||||
{
|
||||
switch(what)
|
||||
{
|
||||
case PCRE2_INFO_ALLOPTIONS:
|
||||
case PCRE2_INFO_ARGOPTIONS:
|
||||
case PCRE2_INFO_BACKREFMAX:
|
||||
case PCRE2_INFO_BSR:
|
||||
case PCRE2_INFO_CAPTURECOUNT:
|
||||
case PCRE2_INFO_DEPTHLIMIT:
|
||||
case PCRE2_INFO_EXTRAOPTIONS:
|
||||
case PCRE2_INFO_FIRSTCODETYPE:
|
||||
case PCRE2_INFO_FIRSTCODEUNIT:
|
||||
case PCRE2_INFO_HASBACKSLASHC:
|
||||
case PCRE2_INFO_HASCRORLF:
|
||||
case PCRE2_INFO_HEAPLIMIT:
|
||||
case PCRE2_INFO_JCHANGED:
|
||||
case PCRE2_INFO_LASTCODETYPE:
|
||||
case PCRE2_INFO_LASTCODEUNIT:
|
||||
case PCRE2_INFO_MATCHEMPTY:
|
||||
case PCRE2_INFO_MATCHLIMIT:
|
||||
case PCRE2_INFO_MAXLOOKBEHIND:
|
||||
case PCRE2_INFO_MINLENGTH:
|
||||
case PCRE2_INFO_NAMEENTRYSIZE:
|
||||
case PCRE2_INFO_NAMECOUNT:
|
||||
case PCRE2_INFO_NEWLINE:
|
||||
return sizeof(uint32_t);
|
||||
|
||||
case PCRE2_INFO_FIRSTBITMAP:
|
||||
return sizeof(const uint8_t *);
|
||||
|
||||
case PCRE2_INFO_JITSIZE:
|
||||
case PCRE2_INFO_SIZE:
|
||||
case PCRE2_INFO_FRAMESIZE:
|
||||
return sizeof(size_t);
|
||||
|
||||
case PCRE2_INFO_NAMETABLE:
|
||||
return sizeof(PCRE2_SPTR);
|
||||
}
|
||||
}
|
||||
|
||||
if (re == NULL) return PCRE2_ERROR_NULL;
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
return with PCRE2_ERROR_BADMAGIC. */
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||
|
||||
/* Check that this pattern was compiled in the correct bit mode */
|
||||
|
||||
if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
|
||||
|
||||
switch(what)
|
||||
{
|
||||
case PCRE2_INFO_ALLOPTIONS:
|
||||
*((uint32_t *)where) = re->overall_options;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_ARGOPTIONS:
|
||||
*((uint32_t *)where) = re->compile_options;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_BACKREFMAX:
|
||||
*((uint32_t *)where) = re->top_backref;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_BSR:
|
||||
*((uint32_t *)where) = re->bsr_convention;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_CAPTURECOUNT:
|
||||
*((uint32_t *)where) = re->top_bracket;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_DEPTHLIMIT:
|
||||
*((uint32_t *)where) = re->limit_depth;
|
||||
if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_EXTRAOPTIONS:
|
||||
*((uint32_t *)where) = re->extra_options;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_FIRSTCODETYPE:
|
||||
*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :
|
||||
((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_FIRSTCODEUNIT:
|
||||
*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)?
|
||||
re->first_codeunit : 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_FIRSTBITMAP:
|
||||
*((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)?
|
||||
&(re->start_bitmap[0]) : NULL;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_FRAMESIZE:
|
||||
*((size_t *)where) = offsetof(heapframe, ovector) +
|
||||
re->top_bracket * 2 * sizeof(PCRE2_SIZE);
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_HASBACKSLASHC:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_HASCRORLF:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_HEAPLIMIT:
|
||||
*((uint32_t *)where) = re->limit_heap;
|
||||
if (re->limit_heap == UINT32_MAX) return PCRE2_ERROR_UNSET;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_JCHANGED:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_JITSIZE:
|
||||
#ifdef SUPPORT_JIT
|
||||
*((size_t *)where) = (re->executable_jit != NULL)?
|
||||
PRIV(jit_get_size)(re->executable_jit) : 0;
|
||||
#else
|
||||
*((size_t *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_LASTCODETYPE:
|
||||
*((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_LASTCODEUNIT:
|
||||
*((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)?
|
||||
re->last_codeunit : 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_MATCHEMPTY:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_MATCHLIMIT:
|
||||
*((uint32_t *)where) = re->limit_match;
|
||||
if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_MAXLOOKBEHIND:
|
||||
*((uint32_t *)where) = re->max_lookbehind;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_MINLENGTH:
|
||||
*((uint32_t *)where) = re->minlength;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_NAMEENTRYSIZE:
|
||||
*((uint32_t *)where) = re->name_entry_size;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_NAMECOUNT:
|
||||
*((uint32_t *)where) = re->name_count;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_NAMETABLE:
|
||||
*((PCRE2_SPTR *)where) = (PCRE2_SPTR)((const char *)re +
|
||||
sizeof(pcre2_real_code));
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_NEWLINE:
|
||||
*((uint32_t *)where) = re->newline_convention;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_SIZE:
|
||||
*((size_t *)where) = re->blocksize;
|
||||
break;
|
||||
|
||||
default: return PCRE2_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Callout enumerator *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
code points to compiled code
|
||||
callback function called for each callout block
|
||||
callout_data user data passed to the callback
|
||||
|
||||
Returns: 0 when successfully completed
|
||||
< 0 on local error
|
||||
!= 0 for callback error
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_callout_enumerate(const pcre2_code *code,
|
||||
int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data)
|
||||
{
|
||||
const pcre2_real_code *re = (const pcre2_real_code *)code;
|
||||
pcre2_callout_enumerate_block cb;
|
||||
PCRE2_SPTR cc;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf;
|
||||
#endif
|
||||
|
||||
if (re == NULL) return PCRE2_ERROR_NULL;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
#endif
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
return with PCRE2_ERROR_BADMAGIC. */
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||
|
||||
/* Check that this pattern was compiled in the correct bit mode */
|
||||
|
||||
if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
|
||||
|
||||
cb.version = 0;
|
||||
cc = (PCRE2_SPTR)((const uint8_t *)re + sizeof(pcre2_real_code))
|
||||
+ re->name_count * re->name_entry_size;
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
int rc;
|
||||
switch (*cc)
|
||||
{
|
||||
case OP_END:
|
||||
return 0;
|
||||
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
case OP_NOT:
|
||||
case OP_NOTI:
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_EXACT:
|
||||
case OP_POSSTAR:
|
||||
case OP_POSPLUS:
|
||||
case OP_POSQUERY:
|
||||
case OP_POSUPTO:
|
||||
case OP_STARI:
|
||||
case OP_MINSTARI:
|
||||
case OP_PLUSI:
|
||||
case OP_MINPLUSI:
|
||||
case OP_QUERYI:
|
||||
case OP_MINQUERYI:
|
||||
case OP_UPTOI:
|
||||
case OP_MINUPTOI:
|
||||
case OP_EXACTI:
|
||||
case OP_POSSTARI:
|
||||
case OP_POSPLUSI:
|
||||
case OP_POSQUERYI:
|
||||
case OP_POSUPTOI:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTPOSQUERY:
|
||||
case OP_NOTPOSUPTO:
|
||||
case OP_NOTSTARI:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_NOTEXACTI:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_NOTPOSQUERYI:
|
||||
case OP_NOTPOSUPTOI:
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEPOSQUERY:
|
||||
case OP_TYPEPOSUPTO:
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2;
|
||||
#endif
|
||||
break;
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_XCLASS:
|
||||
case OP_ECLASS:
|
||||
cc += GET(cc, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
cc += PRIV(OP_lengths)[*cc] + cc[1];
|
||||
break;
|
||||
|
||||
case OP_CALLOUT:
|
||||
cb.pattern_position = GET(cc, 1);
|
||||
cb.next_item_length = GET(cc, 1 + LINK_SIZE);
|
||||
cb.callout_number = cc[1 + 2*LINK_SIZE];
|
||||
cb.callout_string_offset = 0;
|
||||
cb.callout_string_length = 0;
|
||||
cb.callout_string = NULL;
|
||||
rc = callback(&cb, callout_data);
|
||||
if (rc != 0) return rc;
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
cb.pattern_position = GET(cc, 1);
|
||||
cb.next_item_length = GET(cc, 1 + LINK_SIZE);
|
||||
cb.callout_number = 0;
|
||||
cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE);
|
||||
cb.callout_string_length =
|
||||
GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2;
|
||||
cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1;
|
||||
rc = callback(&cb, callout_data);
|
||||
if (rc != 0) return rc;
|
||||
cc += GET(cc, 1 + 2*LINK_SIZE);
|
||||
break;
|
||||
|
||||
default:
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_pattern_info.c */
|
||||
1111
3rd/pcre2/src/pcre2_printint.c
Normal file
1111
3rd/pcre2/src/pcre2_printint.c
Normal file
@@ -0,0 +1,1111 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains a PCRE private debugging function for printing out the
|
||||
internal form of a compiled regular expression, along with some supporting
|
||||
local functions. This source file is #included in pcre2test.c at each supported
|
||||
code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
|
||||
that comprise the library. It can also optionally be included in
|
||||
pcre2_compile.c for detailed debugging in error situations. */
|
||||
|
||||
|
||||
/* Tables of operator names. The same 8-bit table is used for all code unit
|
||||
widths, so it must be defined only once. The list itself is defined in
|
||||
pcre2_internal.h, which is #included by pcre2test before this file. */
|
||||
|
||||
#ifndef OP_LISTS_DEFINED
|
||||
static const char *OP_names[] = { OP_NAME_LIST };
|
||||
STATIC_ASSERT(sizeof(OP_names)/sizeof(*OP_names) == OP_TABLE_LENGTH, OP_names);
|
||||
#define OP_LISTS_DEFINED
|
||||
#endif
|
||||
|
||||
/* The functions and tables herein must all have mode-dependent names. */
|
||||
|
||||
#define OP_lengths PCRE2_SUFFIX(OP_lengths_)
|
||||
#define get_ucpname PCRE2_SUFFIX(get_ucpname_)
|
||||
#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
|
||||
#define print_char PCRE2_SUFFIX(print_char_)
|
||||
#define print_custring PCRE2_SUFFIX(print_custring_)
|
||||
#define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_)
|
||||
#define print_prop PCRE2_SUFFIX(print_prop_)
|
||||
#define print_char_list PCRE2_SUFFIX(print_char_list_)
|
||||
#define print_map PCRE2_SUFFIX(print_map_)
|
||||
#define print_class PCRE2_SUFFIX(print_class_)
|
||||
|
||||
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||
the definition is next to the definition of the opcodes in pcre2_internal.h.
|
||||
The contents of the table are, however, mode-dependent. */
|
||||
|
||||
static const uint8_t OP_lengths[] = { OP_LENGTHS };
|
||||
STATIC_ASSERT(sizeof(OP_lengths)/sizeof(*OP_lengths) == OP_TABLE_LENGTH,
|
||||
PCRE2_SUFFIX(OP_lengths_));
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print one character from a string *
|
||||
*************************************************/
|
||||
|
||||
/* In UTF mode the character may occupy more than one code unit.
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
ptr pointer to first code unit of the character
|
||||
utf TRUE if string is UTF (will be FALSE if UTF is not supported)
|
||||
|
||||
Returns: number of additional code units used
|
||||
*/
|
||||
|
||||
static unsigned int
|
||||
print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
|
||||
{
|
||||
uint32_t c = *ptr;
|
||||
BOOL one_code_unit = !utf;
|
||||
|
||||
/* If UTF is supported and requested, check for a valid single code unit. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
one_code_unit = c < 0x80;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
one_code_unit = (c & 0xfc00) != 0xd800;
|
||||
#else
|
||||
one_code_unit = (c & 0xfffff800u) != 0xd800u;
|
||||
#endif /* CODE_UNIT_WIDTH */
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Handle a valid one-code-unit character at any width. */
|
||||
|
||||
if (one_code_unit)
|
||||
{
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
|
||||
else if (c < 0x80) fprintf(f, "\\x%02x", c);
|
||||
else fprintf(f, "\\x{%02x}", c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Code for invalid UTF code units and multi-unit UTF characters is different
|
||||
for each width. If UTF is not supported, control should never get here, but we
|
||||
need a return statement to keep the compiler happy. */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
return 0;
|
||||
#else
|
||||
|
||||
/* Malformed UTF-8 should occur only if the sanity check has been turned off.
|
||||
Rather than swallow random bytes, just stop if we hit a bad one. Print it with
|
||||
\X instead of \x as an indication. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if ((c & 0xc0) != 0xc0)
|
||||
{
|
||||
fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
|
||||
int s = 6*a;
|
||||
c = (c & PRIV(utf8_table3)[a]) << s;
|
||||
for (i = 1; i <= a; i++)
|
||||
{
|
||||
if ((ptr[i] & 0xc0) != 0x80)
|
||||
{
|
||||
fprintf(f, "\\X{%x}", c); /* Invalid secondary byte */
|
||||
return i - 1;
|
||||
}
|
||||
s -= 6;
|
||||
c |= (ptr[i] & 0x3f) << s;
|
||||
}
|
||||
fprintf(f, "\\x{%x}", c);
|
||||
return a;
|
||||
}
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
|
||||
/* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
|
||||
Print it with \X instead of \x as an indication. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 16
|
||||
if ((ptr[1] & 0xfc00) != 0xdc00)
|
||||
{
|
||||
fprintf(f, "\\X{%x}", c);
|
||||
return 0;
|
||||
}
|
||||
c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
|
||||
fprintf(f, "\\x{%x}", c);
|
||||
return 1;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
|
||||
|
||||
/* For UTF-32 we get here only for a malformed code unit, which should only
|
||||
occur if the sanity check has been turned off. Print it with \X instead of \x
|
||||
as an indication. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
fprintf(f, "\\X{%x}", c);
|
||||
return 0;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print string as a list of code units *
|
||||
*************************************************/
|
||||
|
||||
/* These take no account of UTF as they always print each individual code unit.
|
||||
The string is zero-terminated for print_custring(); the length is given for
|
||||
print_custring_bylen().
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
ptr point to the string
|
||||
len length for print_custring_bylen()
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
print_custring(FILE *f, PCRE2_SPTR ptr)
|
||||
{
|
||||
while (*ptr != '\0')
|
||||
{
|
||||
uint32_t c = *ptr++;
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
|
||||
{
|
||||
for (; len > 0; len--)
|
||||
{
|
||||
uint32_t c = *ptr++;
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find Unicode property name *
|
||||
*************************************************/
|
||||
|
||||
/* When there is no UTF/UCP support, the table of names does not exist. This
|
||||
function should not be called in such configurations, because a pattern that
|
||||
tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
|
||||
into the main code, however, we just put one into this function.
|
||||
|
||||
Now that the table contains both full names and their abbreviations, we do some
|
||||
fiddling to try to get the full name, which is either the longer of two found
|
||||
names, or a 3-character script name. */
|
||||
|
||||
static const char *
|
||||
get_ucpname(unsigned int ptype, unsigned int pvalue)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
int count = 0;
|
||||
const char *yield = "??";
|
||||
size_t len = 0;
|
||||
unsigned int ptypex = (ptype == PT_SC)? PT_SCX : ptype;
|
||||
|
||||
for (ptrdiff_t i = PRIV(utt_size) - 1; i >= 0; i--)
|
||||
{
|
||||
const ucp_type_table *u = PRIV(utt) + i;
|
||||
|
||||
if ((ptype == u->type || ptypex == u->type) && pvalue == u->value)
|
||||
{
|
||||
const char *s = PRIV(utt_names) + u->name_offset;
|
||||
size_t sl = strlen(s);
|
||||
|
||||
if (sl == 3 && (u->type == PT_SC || u->type == PT_SCX))
|
||||
{
|
||||
yield = s;
|
||||
break;
|
||||
}
|
||||
|
||||
if (sl > len)
|
||||
{
|
||||
yield = s;
|
||||
len = sl;
|
||||
}
|
||||
|
||||
if (++count >= 2) break;
|
||||
}
|
||||
}
|
||||
|
||||
return yield;
|
||||
|
||||
#else /* No UTF support */
|
||||
(void)ptype;
|
||||
(void)pvalue;
|
||||
return "??";
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print Unicode property value *
|
||||
*************************************************/
|
||||
|
||||
/* "Normal" properties can be printed from tables. The PT_CLIST property is a
|
||||
pseudo-property that contains a pointer to a list of case-equivalent
|
||||
characters.
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
code pointer in the compiled code
|
||||
before text to print before
|
||||
after text to print after
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
|
||||
{
|
||||
if (code[1] != PT_CLIST)
|
||||
{
|
||||
const char *sc = (code[1] == PT_SC)? "script:" : "";
|
||||
const char *s = get_ucpname(code[1], code[2]);
|
||||
fprintf(f, "%s%s %s%c%s%s", before, OP_names[*code], sc, toupper(s[0]), s+1, after);
|
||||
}
|
||||
else
|
||||
{
|
||||
const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
|
||||
fprintf (f, "%s%sclist", before, (*code == OP_PROP)? "" : "not ");
|
||||
while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
|
||||
fprintf(f, "%s", after);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print character list *
|
||||
*************************************************/
|
||||
|
||||
/* Prints the characters and character ranges in a character list.
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
code pointer in the compiled code
|
||||
*/
|
||||
|
||||
static PCRE2_SPTR
|
||||
print_char_list(FILE *f, PCRE2_SPTR code, const uint8_t *char_lists_end)
|
||||
{
|
||||
uint32_t type, list_ind;
|
||||
uint32_t char_list_add = XCL_CHAR_LIST_LOW_16_ADD;
|
||||
uint32_t range_start = ~(uint32_t)0, range_end = 0;
|
||||
const uint8_t *next_char;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
type = (uint32_t)(code[0] << 8) | code[1];
|
||||
code += 2;
|
||||
#else
|
||||
type = code[0];
|
||||
code++;
|
||||
#endif /* CODE_UNIT_WIDTH */
|
||||
|
||||
/* Align characters. */
|
||||
next_char = char_lists_end - (GET(code, 0) << 1);
|
||||
type &= XCL_TYPE_MASK;
|
||||
list_ind = 0;
|
||||
|
||||
if ((type & XCL_BEGIN_WITH_RANGE) != 0)
|
||||
range_start = XCL_CHAR_LIST_LOW_16_START;
|
||||
|
||||
while (type > 0)
|
||||
{
|
||||
uint32_t item_count = type & XCL_ITEM_COUNT_MASK;
|
||||
|
||||
if (item_count == XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
if (list_ind <= 1)
|
||||
{
|
||||
item_count = *(const uint16_t*)next_char;
|
||||
next_char += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
item_count = *(const uint32_t*)next_char;
|
||||
next_char += 4;
|
||||
}
|
||||
}
|
||||
|
||||
while (item_count > 0)
|
||||
{
|
||||
if (list_ind <= 1)
|
||||
{
|
||||
range_end = *(const uint16_t*)next_char;
|
||||
next_char += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
range_end = *(const uint32_t*)next_char;
|
||||
next_char += 4;
|
||||
}
|
||||
|
||||
if ((range_end & XCL_CHAR_END) != 0)
|
||||
{
|
||||
range_end = char_list_add + (range_end >> XCL_CHAR_SHIFT);
|
||||
|
||||
if (range_start < range_end)
|
||||
fprintf(f, "\\x{%x}-", range_start);
|
||||
|
||||
fprintf(f, "\\x{%x}", range_end);
|
||||
range_start = ~(uint32_t)0;
|
||||
}
|
||||
else
|
||||
range_start = char_list_add + (range_end >> XCL_CHAR_SHIFT);
|
||||
|
||||
item_count--;
|
||||
}
|
||||
|
||||
list_ind++;
|
||||
type >>= XCL_TYPE_BIT_LEN;
|
||||
|
||||
/* The following code could be optimized to 8/16/32 bit,
|
||||
but it is not worth it for a debugging function. */
|
||||
|
||||
if (range_start == ~(uint32_t)0)
|
||||
{
|
||||
if ((type & XCL_BEGIN_WITH_RANGE) != 0)
|
||||
{
|
||||
if (list_ind == 1) range_start = XCL_CHAR_LIST_HIGH_16_START;
|
||||
else if (list_ind == 2) range_start = XCL_CHAR_LIST_LOW_32_START;
|
||||
else range_start = XCL_CHAR_LIST_HIGH_32_START;
|
||||
}
|
||||
}
|
||||
else if ((type & XCL_BEGIN_WITH_RANGE) == 0)
|
||||
{
|
||||
fprintf(f, "\\x{%x}-", range_start);
|
||||
|
||||
if (list_ind == 1) range_end = XCL_CHAR_LIST_LOW_16_END;
|
||||
else if (list_ind == 2) range_end = XCL_CHAR_LIST_HIGH_16_END;
|
||||
else if (list_ind == 3) range_end = XCL_CHAR_LIST_LOW_32_END;
|
||||
else range_end = XCL_CHAR_LIST_HIGH_32_END;
|
||||
|
||||
fprintf(f, "\\x{%x}", range_end);
|
||||
range_start = ~(uint32_t)0;
|
||||
}
|
||||
|
||||
if (list_ind == 1) char_list_add = XCL_CHAR_LIST_HIGH_16_ADD;
|
||||
else if (list_ind == 2) char_list_add = XCL_CHAR_LIST_LOW_32_ADD;
|
||||
else char_list_add = XCL_CHAR_LIST_HIGH_32_ADD;
|
||||
}
|
||||
|
||||
return code + LINK_SIZE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print a character bitmap *
|
||||
*************************************************/
|
||||
|
||||
/* Prints a 32-byte bitmap, which occurs within a character class opcode.
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
map pointer to the bitmap
|
||||
negated TRUE if the bitmap will be printed as negated
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
print_map(FILE *f, const uint8_t *map, BOOL negated)
|
||||
{
|
||||
BOOL first = TRUE;
|
||||
uint8_t inverted_map[32];
|
||||
int i;
|
||||
|
||||
if (negated)
|
||||
{
|
||||
/* Using 255 ^ instead of ~ avoids clang sanitize warning. */
|
||||
for (i = 0; i < 32; i++) inverted_map[i] = 255 ^ map[i];
|
||||
map = inverted_map;
|
||||
}
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((map[i/8] & (1u << (i&7))) != 0)
|
||||
{
|
||||
int j;
|
||||
for (j = i+1; j < 256; j++)
|
||||
if ((map[j/8] & (1u << (j&7))) == 0) break;
|
||||
if (i == '-' || i == '\\' || i == ']' || (first && i == '^'))
|
||||
fprintf(f, "\\");
|
||||
if (PRINTABLE(i)) fprintf(f, "%c", i);
|
||||
else fprintf(f, "\\x%02x", i);
|
||||
first = FALSE;
|
||||
if (--j > i)
|
||||
{
|
||||
if (j != i + 1) fprintf(f, "-");
|
||||
if (j == '-' || j == '\\' || j == ']') fprintf(f, "\\");
|
||||
if (PRINTABLE(j)) fprintf(f, "%c", j);
|
||||
else fprintf(f, "\\x%02x", j);
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print character class *
|
||||
*************************************************/
|
||||
|
||||
/* Prints a character class, which must be either an OP_CLASS, OP_NCLASS, or
|
||||
OP_XCLASS.
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
type OP_CLASS, OP_NCLASS, or OP_XCLASS
|
||||
code pointer in the compiled code (after the OP tag)
|
||||
utf TRUE if re is UTF (will be FALSE if UTF is not supported)
|
||||
before text to print before
|
||||
after text to print after
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
print_class(FILE *f, int type, PCRE2_SPTR code, const uint8_t *char_lists_end,
|
||||
BOOL utf, const char *before, const char *after)
|
||||
{
|
||||
BOOL printmap, negated;
|
||||
PCRE2_SPTR ccode;
|
||||
|
||||
/* Negative XCLASS and NCLASS both have a bitmap indicating which characters
|
||||
are accepted. For clarity we print this inverted and prefixed by "^". */
|
||||
if (type == OP_XCLASS)
|
||||
{
|
||||
ccode = code + LINK_SIZE;
|
||||
printmap = (*ccode & XCL_MAP) != 0;
|
||||
negated = (*ccode & XCL_NOT) != 0;
|
||||
ccode++;
|
||||
}
|
||||
else /* CLASS or NCLASS */
|
||||
{
|
||||
printmap = TRUE;
|
||||
negated = type == OP_NCLASS;
|
||||
ccode = code;
|
||||
}
|
||||
|
||||
fprintf(f, "%s[%s", before, negated? "^" : "");
|
||||
|
||||
/* Print a bit map */
|
||||
if (printmap)
|
||||
{
|
||||
print_map(f, (const uint8_t *)ccode, negated);
|
||||
ccode += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
|
||||
/* For an XCLASS there is always some additional data */
|
||||
if (type == OP_XCLASS)
|
||||
{
|
||||
PCRE2_UCHAR ch;
|
||||
|
||||
while ((ch = *ccode++) != XCL_END)
|
||||
{
|
||||
const char *notch = "";
|
||||
|
||||
if (ch >= XCL_LIST)
|
||||
{
|
||||
ccode = print_char_list(f, ccode - 1, char_lists_end);
|
||||
break;
|
||||
}
|
||||
|
||||
switch(ch)
|
||||
{
|
||||
case XCL_NOTPROP:
|
||||
notch = "^";
|
||||
/* Fall through */
|
||||
case XCL_PROP:
|
||||
{
|
||||
unsigned int ptype = *ccode++;
|
||||
unsigned int pvalue = *ccode++;
|
||||
const char *s;
|
||||
switch(ptype)
|
||||
{
|
||||
case PT_PXGRAPH:
|
||||
fprintf(f, "[:%sgraph:]", notch);
|
||||
break;
|
||||
case PT_PXPRINT:
|
||||
fprintf(f, "[:%sprint:]", notch);
|
||||
break;
|
||||
case PT_PXPUNCT:
|
||||
fprintf(f, "[:%spunct:]", notch);
|
||||
break;
|
||||
case PT_PXXDIGIT:
|
||||
fprintf(f, "[:%sxdigit:]", notch);
|
||||
break;
|
||||
default:
|
||||
s = get_ucpname(ptype, pvalue);
|
||||
fprintf(f, "\\%c{%c%s}", ((notch[0] == '^')? 'P':'p'),
|
||||
toupper(s[0]), s+1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
ccode += 1 + print_char(f, ccode, utf);
|
||||
if (ch == XCL_RANGE)
|
||||
{
|
||||
fprintf(f, "-");
|
||||
ccode += 1 + print_char(f, ccode, utf);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
PCRE2_ASSERT(ccode == code + (GET(code, 0) - 1));
|
||||
}
|
||||
|
||||
/* Indicate a non-UTF class which was created by negation */
|
||||
fprintf(f, "]%s", after);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print compiled pattern *
|
||||
*************************************************/
|
||||
|
||||
/* The print_lengths flag controls whether offsets and lengths of items are
|
||||
printed. Lenths can be turned off from pcre2test so that automatic tests on
|
||||
bytecode can be written that do not depend on the value of LINK_SIZE.
|
||||
|
||||
Arguments:
|
||||
re a compiled pattern
|
||||
f the file to write to
|
||||
print_lengths show various lengths
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
|
||||
{
|
||||
PCRE2_SPTR codestart, nametable, code;
|
||||
uint32_t nesize = re->name_entry_size;
|
||||
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
|
||||
nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
|
||||
code = codestart = (PCRE2_SPTR)((uint8_t *)re + re->code_start);
|
||||
|
||||
for(;;)
|
||||
{
|
||||
PCRE2_SPTR ccode;
|
||||
uint32_t c;
|
||||
int i;
|
||||
const char *flag = " ";
|
||||
unsigned int extra = 0;
|
||||
|
||||
if (print_lengths)
|
||||
fprintf(f, "%3d ", (int)(code - codestart));
|
||||
else
|
||||
fprintf(f, " ");
|
||||
|
||||
switch(*code)
|
||||
{
|
||||
case OP_END:
|
||||
fprintf(f, " %s\n", OP_names[*code]);
|
||||
fprintf(f, "------------------------------------------------------------------\n");
|
||||
return;
|
||||
|
||||
case OP_CHAR:
|
||||
fprintf(f, " ");
|
||||
do
|
||||
{
|
||||
code++;
|
||||
code += 1 + print_char(f, code, utf);
|
||||
}
|
||||
while (*code == OP_CHAR);
|
||||
fprintf(f, "\n");
|
||||
continue;
|
||||
|
||||
case OP_CHARI:
|
||||
fprintf(f, " /i ");
|
||||
do
|
||||
{
|
||||
code++;
|
||||
code += 1 + print_char(f, code, utf);
|
||||
}
|
||||
while (*code == OP_CHARI);
|
||||
fprintf(f, "\n");
|
||||
continue;
|
||||
|
||||
case OP_CBRA:
|
||||
case OP_CBRAPOS:
|
||||
case OP_SCBRA:
|
||||
case OP_SCBRAPOS:
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_BRA:
|
||||
case OP_BRAPOS:
|
||||
case OP_SBRA:
|
||||
case OP_SBRAPOS:
|
||||
case OP_KETRMAX:
|
||||
case OP_KETRMIN:
|
||||
case OP_KETRPOS:
|
||||
case OP_ALT:
|
||||
case OP_KET:
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ASSERT_NA:
|
||||
case OP_ASSERTBACK_NA:
|
||||
case OP_ASSERT_SCS:
|
||||
case OP_ONCE:
|
||||
case OP_SCRIPT_RUN:
|
||||
case OP_COND:
|
||||
case OP_SCOND:
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_REVERSE:
|
||||
if (print_lengths) fprintf(f, "%3d ", GET2(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_VREVERSE:
|
||||
if (print_lengths) fprintf(f, "%3d %d ", GET2(code, 1),
|
||||
GET2(code, 1 + IMM2_SIZE));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_CLOSE:
|
||||
fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
|
||||
break;
|
||||
|
||||
case OP_CREF:
|
||||
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_DNCREF:
|
||||
{
|
||||
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
|
||||
fprintf(f, " %s Capture ref <", flag);
|
||||
print_custring(f, entry);
|
||||
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_RREF:
|
||||
c = GET2(code, 1);
|
||||
if (c == RREF_ANY)
|
||||
fprintf(f, " Cond recurse any");
|
||||
else
|
||||
fprintf(f, " Cond recurse %d", c);
|
||||
break;
|
||||
|
||||
case OP_DNRREF:
|
||||
{
|
||||
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
|
||||
fprintf(f, " %s Cond recurse <", flag);
|
||||
print_custring(f, entry);
|
||||
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_FALSE:
|
||||
fprintf(f, " Cond false");
|
||||
break;
|
||||
|
||||
case OP_TRUE:
|
||||
fprintf(f, " Cond true");
|
||||
break;
|
||||
|
||||
case OP_STARI:
|
||||
case OP_MINSTARI:
|
||||
case OP_POSSTARI:
|
||||
case OP_PLUSI:
|
||||
case OP_MINPLUSI:
|
||||
case OP_POSPLUSI:
|
||||
case OP_QUERYI:
|
||||
case OP_MINQUERYI:
|
||||
case OP_POSQUERYI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_POSSTAR:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_POSPLUS:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_POSQUERY:
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSQUERY:
|
||||
fprintf(f, " %s ", flag);
|
||||
|
||||
if (*code >= OP_TYPESTAR)
|
||||
{
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
|
||||
{
|
||||
print_prop(f, code + 1, "", " ");
|
||||
extra = 2;
|
||||
}
|
||||
else fprintf(f, "%s", OP_names[code[1]]);
|
||||
}
|
||||
else extra = print_char(f, code+1, utf);
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_EXACTI:
|
||||
case OP_UPTOI:
|
||||
case OP_MINUPTOI:
|
||||
case OP_POSUPTOI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
case OP_EXACT:
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_POSUPTO:
|
||||
fprintf(f, " %s ", flag);
|
||||
extra = print_char(f, code + 1 + IMM2_SIZE, utf);
|
||||
fprintf(f, "{");
|
||||
if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
|
||||
else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
|
||||
break;
|
||||
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEPOSUPTO:
|
||||
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
|
||||
{
|
||||
print_prop(f, code + IMM2_SIZE + 1, " ", " ");
|
||||
extra = 2;
|
||||
}
|
||||
else fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]);
|
||||
fprintf(f, "{");
|
||||
if (*code != OP_TYPEEXACT) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
|
||||
else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
|
||||
break;
|
||||
|
||||
case OP_NOTI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
case OP_NOT:
|
||||
fprintf(f, " %s [^", flag);
|
||||
extra = print_char(f, code + 1, utf);
|
||||
fprintf(f, "] (not)");
|
||||
break;
|
||||
|
||||
case OP_NOTSTARI:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_NOTPOSQUERYI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTPOSQUERY:
|
||||
fprintf(f, " %s [^", flag);
|
||||
extra = print_char(f, code + 1, utf);
|
||||
fprintf(f, "]%s (not)", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_NOTEXACTI:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_NOTPOSUPTOI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTPOSUPTO:
|
||||
fprintf(f, " %s [^", flag);
|
||||
extra = print_char(f, code + 1 + IMM2_SIZE, utf);
|
||||
fprintf(f, "]{");
|
||||
if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
|
||||
else
|
||||
if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
|
||||
fprintf(f, " (not)");
|
||||
break;
|
||||
|
||||
case OP_RECURSE:
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_REFI:
|
||||
flag = "/i";
|
||||
extra = code[1 + IMM2_SIZE];
|
||||
/* Fall through */
|
||||
case OP_REF:
|
||||
fprintf(f, " %s \\%d", flag, GET2(code,1));
|
||||
if (extra != 0) fprintf(f, " 0x%02x", extra);
|
||||
ccode = code + OP_lengths[*code];
|
||||
goto CLASS_REF_REPEAT;
|
||||
|
||||
case OP_DNREFI:
|
||||
flag = "/i";
|
||||
extra = code[1 + 2*IMM2_SIZE];
|
||||
/* Fall through */
|
||||
case OP_DNREF:
|
||||
{
|
||||
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
|
||||
fprintf(f, " %s \\k<", flag);
|
||||
print_custring(f, entry);
|
||||
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
|
||||
if (extra != 0) fprintf(f, " 0x%02x", extra);
|
||||
}
|
||||
ccode = code + OP_lengths[*code];
|
||||
goto CLASS_REF_REPEAT;
|
||||
|
||||
case OP_CALLOUT:
|
||||
fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
|
||||
GET(code, 1), GET(code, 1 + LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
c = code[1 + 4*LINK_SIZE];
|
||||
fprintf(f, " %s %c", OP_names[*code], c);
|
||||
extra = GET(code, 1 + 2*LINK_SIZE);
|
||||
print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
|
||||
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
|
||||
if (c == PRIV(callout_start_delims)[i])
|
||||
{
|
||||
c = PRIV(callout_end_delims)[i];
|
||||
break;
|
||||
}
|
||||
fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
|
||||
GET(code, 1 + LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_PROP:
|
||||
case OP_NOTPROP:
|
||||
print_prop(f, code, " ", "");
|
||||
break;
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_ECLASS:
|
||||
extra = GET(code, 1);
|
||||
fprintf(f, " eclass[\n");
|
||||
/* We print the opcodes contained inside as well. */
|
||||
ccode = code + 1 + LINK_SIZE + 1;
|
||||
if ((ccode[-1] & ECL_MAP) != 0)
|
||||
{
|
||||
const uint8_t *map = (const uint8_t *)ccode;
|
||||
/* The first 6 ASCII characters (SOH...ACK) are totally, utterly useless.
|
||||
If they're set in the bitmap, then it's clearly been formed by negation.*/
|
||||
BOOL print_negated = (map[0] & 0x7e) == 0x7e;
|
||||
|
||||
fprintf(f, " bitmap: [%s", print_negated? "^" : "");
|
||||
print_map(f, map, print_negated);
|
||||
fprintf(f, "]\n");
|
||||
ccode += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
else
|
||||
fprintf(f, " no bitmap\n");
|
||||
while (ccode < code + extra)
|
||||
{
|
||||
if (print_lengths)
|
||||
fprintf(f, "%3d ", (int)(ccode - codestart));
|
||||
else
|
||||
fprintf(f, " ");
|
||||
|
||||
switch (*ccode)
|
||||
{
|
||||
case ECL_AND:
|
||||
fprintf(f, " AND\n");
|
||||
ccode += 1;
|
||||
break;
|
||||
case ECL_OR:
|
||||
fprintf(f, " OR\n");
|
||||
ccode += 1;
|
||||
break;
|
||||
case ECL_XOR:
|
||||
fprintf(f, " XOR\n");
|
||||
ccode += 1;
|
||||
break;
|
||||
case ECL_NOT:
|
||||
fprintf(f, " NOT\n");
|
||||
ccode += 1;
|
||||
break;
|
||||
|
||||
case ECL_XCLASS:
|
||||
print_class(f, OP_XCLASS, ccode+1, (uint8_t*)codestart, utf,
|
||||
" xclass: ", "\n");
|
||||
ccode += GET(ccode, 1);
|
||||
break;
|
||||
|
||||
default:
|
||||
fprintf(f, " UNEXPECTED\n");
|
||||
ccode += 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
fprintf(f, " ]");
|
||||
goto CLASS_REF_REPEAT;
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
|
||||
case OP_CLASS:
|
||||
case OP_NCLASS:
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_XCLASS:
|
||||
if (*code == OP_XCLASS)
|
||||
extra = GET(code, 1);
|
||||
#endif
|
||||
print_class(f, *code, code+1, (uint8_t*)codestart, utf, " ", "");
|
||||
ccode = code + OP_lengths[*code] + extra;
|
||||
|
||||
/* Handle repeats after a class or a back reference */
|
||||
|
||||
CLASS_REF_REPEAT:
|
||||
switch(*ccode)
|
||||
{
|
||||
unsigned int min, max;
|
||||
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRPLUS:
|
||||
case OP_CRMINPLUS:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
case OP_CRPOSSTAR:
|
||||
case OP_CRPOSPLUS:
|
||||
case OP_CRPOSQUERY:
|
||||
fprintf(f, "%s", OP_names[*ccode]);
|
||||
extra += OP_lengths[*ccode];
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
case OP_CRPOSRANGE:
|
||||
min = GET2(ccode,1);
|
||||
max = GET2(ccode,1 + IMM2_SIZE);
|
||||
if (max == 0) fprintf(f, "{%u,}", min);
|
||||
else fprintf(f, "{%u,%u}", min, max);
|
||||
if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
|
||||
else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
|
||||
extra += OP_lengths[*ccode];
|
||||
break;
|
||||
|
||||
/* Do nothing if it's not a repeat; this code stops picky compilers
|
||||
warning about the lack of a default code path. */
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
fprintf(f, " %s ", OP_names[*code]);
|
||||
print_custring_bylen(f, code + 2, code[1]);
|
||||
extra += code[1];
|
||||
break;
|
||||
|
||||
case OP_THEN:
|
||||
fprintf(f, " %s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_CIRCM:
|
||||
case OP_DOLLM:
|
||||
flag = "/m";
|
||||
/* Fall through */
|
||||
|
||||
/* Anything else is just an item with no data, but possibly a flag. */
|
||||
|
||||
default:
|
||||
fprintf(f, " %s %s", flag, OP_names[*code]);
|
||||
break;
|
||||
}
|
||||
|
||||
code += OP_lengths[*code] + extra;
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_printint.c */
|
||||
344
3rd/pcre2/src/pcre2_script_run.c
Normal file
344
3rd/pcre2/src/pcre2_script_run.c
Normal file
@@ -0,0 +1,344 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2021 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains the function for checking a script run. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check script run *
|
||||
*************************************************/
|
||||
|
||||
/* A script run is conceptually a sequence of characters all in the same
|
||||
Unicode script. However, it isn't quite that simple. There are special rules
|
||||
for scripts that are commonly used together, and also special rules for digits.
|
||||
This function implements the appropriate checks, which is possible only when
|
||||
PCRE2 is compiled with Unicode support. The function returns TRUE if there is
|
||||
no Unicode support; however, it should never be called in that circumstance
|
||||
because an error is given by pcre2_compile() if a script run is called for in a
|
||||
version of PCRE2 compiled without Unicode support.
|
||||
|
||||
Arguments:
|
||||
pgr point to the first character
|
||||
endptr point after the last character
|
||||
utf TRUE if in UTF mode
|
||||
|
||||
Returns: TRUE if this is a valid script run
|
||||
*/
|
||||
|
||||
/* These are states in the checking process. */
|
||||
|
||||
enum { SCRIPT_UNSET, /* Requirement as yet unknown */
|
||||
SCRIPT_MAP, /* Bitmap contains acceptable scripts */
|
||||
SCRIPT_HANPENDING, /* Have had only Han characters */
|
||||
SCRIPT_HANHIRAKATA, /* Expect Han or Hirikata */
|
||||
SCRIPT_HANBOPOMOFO, /* Expect Han or Bopomofo */
|
||||
SCRIPT_HANHANGUL /* Expect Han or Hangul */
|
||||
};
|
||||
|
||||
#define UCD_MAPSIZE (ucp_Unknown/32 + 1)
|
||||
#define FULL_MAPSIZE (ucp_Script_Count/32 + 1)
|
||||
|
||||
BOOL
|
||||
PRIV(script_run)(PCRE2_SPTR ptr, PCRE2_SPTR endptr, BOOL utf)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
uint32_t require_state = SCRIPT_UNSET;
|
||||
uint32_t require_map[FULL_MAPSIZE];
|
||||
uint32_t map[FULL_MAPSIZE];
|
||||
uint32_t require_digitset = 0;
|
||||
uint32_t c;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
(void)utf; /* Avoid compiler warning */
|
||||
#endif
|
||||
|
||||
/* Any string containing fewer than 2 characters is a valid script run. */
|
||||
|
||||
if (ptr >= endptr) return TRUE;
|
||||
GETCHARINCTEST(c, ptr);
|
||||
if (ptr >= endptr) return TRUE;
|
||||
|
||||
/* Initialize the require map. This is a full-size bitmap that has a bit for
|
||||
every script, as opposed to the maps in ucd_script_sets, which only have bits
|
||||
for scripts less than ucp_Unknown - those that appear in script extension
|
||||
lists. */
|
||||
|
||||
for (int i = 0; i < FULL_MAPSIZE; i++) require_map[i] = 0;
|
||||
|
||||
/* Scan strings of two or more characters, checking the Unicode characteristics
|
||||
of each code point. There is special code for scripts that can be combined with
|
||||
characters from the Han Chinese script. This may be used in conjunction with
|
||||
four other scripts in these combinations:
|
||||
|
||||
. Han with Hiragana and Katakana is allowed (for Japanese).
|
||||
. Han with Bopomofo is allowed (for Taiwanese Mandarin).
|
||||
. Han with Hangul is allowed (for Korean).
|
||||
|
||||
If the first significant character's script is one of the four, the required
|
||||
script type is immediately known. However, if the first significant
|
||||
character's script is Han, we have to keep checking for a non-Han character.
|
||||
Hence the SCRIPT_HANPENDING state. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
const ucd_record *ucd = GET_UCD(c);
|
||||
uint32_t script = ucd->script;
|
||||
|
||||
/* If the script is Unknown, the string is not a valid script run. Such
|
||||
characters can only form script runs of length one (see test above). */
|
||||
|
||||
if (script == ucp_Unknown) return FALSE;
|
||||
|
||||
/* A character without any script extensions whose script is Inherited or
|
||||
Common is always accepted with any script. If there are extensions, the
|
||||
following processing happens for all scripts. */
|
||||
|
||||
if (UCD_SCRIPTX_PROP(ucd) != 0 || (script != ucp_Inherited && script != ucp_Common))
|
||||
{
|
||||
BOOL OK;
|
||||
|
||||
/* Set up a full-sized map for this character that can include bits for all
|
||||
scripts. Copy the scriptx map for this character (which covers those
|
||||
scripts that appear in script extension lists), set the remaining values to
|
||||
zero, and then, except for Common or Inherited, add this script's bit to
|
||||
the map. */
|
||||
|
||||
memcpy(map, PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(ucd), UCD_MAPSIZE * sizeof(uint32_t));
|
||||
memset(map + UCD_MAPSIZE, 0, (FULL_MAPSIZE - UCD_MAPSIZE) * sizeof(uint32_t));
|
||||
if (script != ucp_Common && script != ucp_Inherited) MAPSET(map, script);
|
||||
|
||||
/* Handle the different checking states */
|
||||
|
||||
switch(require_state)
|
||||
{
|
||||
/* First significant character - it might follow Common or Inherited
|
||||
characters that do not have any script extensions. */
|
||||
|
||||
case SCRIPT_UNSET:
|
||||
switch(script)
|
||||
{
|
||||
case ucp_Han:
|
||||
require_state = SCRIPT_HANPENDING;
|
||||
break;
|
||||
|
||||
case ucp_Hiragana:
|
||||
case ucp_Katakana:
|
||||
require_state = SCRIPT_HANHIRAKATA;
|
||||
break;
|
||||
|
||||
case ucp_Bopomofo:
|
||||
require_state = SCRIPT_HANBOPOMOFO;
|
||||
break;
|
||||
|
||||
case ucp_Hangul:
|
||||
require_state = SCRIPT_HANHANGUL;
|
||||
break;
|
||||
|
||||
default:
|
||||
memcpy(require_map, map, FULL_MAPSIZE * sizeof(uint32_t));
|
||||
require_state = SCRIPT_MAP;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/* The first significant character was Han. An inspection of the Unicode
|
||||
11.0.0 files shows that there are the following types of Script Extension
|
||||
list that involve the Han, Bopomofo, Hiragana, Katakana, and Hangul
|
||||
scripts:
|
||||
|
||||
. Bopomofo + Han
|
||||
. Han + Hiragana + Katakana
|
||||
. Hiragana + Katakana
|
||||
. Bopopmofo + Hangul + Han + Hiragana + Katakana
|
||||
|
||||
The following code tries to make sense of this. */
|
||||
|
||||
#define FOUND_BOPOMOFO 1
|
||||
#define FOUND_HIRAGANA 2
|
||||
#define FOUND_KATAKANA 4
|
||||
#define FOUND_HANGUL 8
|
||||
|
||||
case SCRIPT_HANPENDING:
|
||||
if (script != ucp_Han) /* Another Han does nothing */
|
||||
{
|
||||
uint32_t chspecial = 0;
|
||||
|
||||
if (MAPBIT(map, ucp_Bopomofo) != 0) chspecial |= FOUND_BOPOMOFO;
|
||||
if (MAPBIT(map, ucp_Hiragana) != 0) chspecial |= FOUND_HIRAGANA;
|
||||
if (MAPBIT(map, ucp_Katakana) != 0) chspecial |= FOUND_KATAKANA;
|
||||
if (MAPBIT(map, ucp_Hangul) != 0) chspecial |= FOUND_HANGUL;
|
||||
|
||||
if (chspecial == 0) return FALSE; /* Not allowed with Han */
|
||||
|
||||
if (chspecial == FOUND_BOPOMOFO)
|
||||
require_state = SCRIPT_HANBOPOMOFO;
|
||||
else if (chspecial == (FOUND_HIRAGANA|FOUND_KATAKANA))
|
||||
require_state = SCRIPT_HANHIRAKATA;
|
||||
|
||||
/* Otherwise this character must be allowed with all of them, so remain
|
||||
in the pending state. */
|
||||
}
|
||||
break;
|
||||
|
||||
/* Previously encountered one of the "with Han" scripts. Check that
|
||||
this character is appropriate. */
|
||||
|
||||
case SCRIPT_HANHIRAKATA:
|
||||
if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Hiragana) +
|
||||
MAPBIT(map, ucp_Katakana) == 0) return FALSE;
|
||||
break;
|
||||
|
||||
case SCRIPT_HANBOPOMOFO:
|
||||
if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Bopomofo) == 0) return FALSE;
|
||||
break;
|
||||
|
||||
case SCRIPT_HANHANGUL:
|
||||
if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Hangul) == 0) return FALSE;
|
||||
break;
|
||||
|
||||
/* Previously encountered one or more characters that are allowed with a
|
||||
list of scripts. */
|
||||
|
||||
case SCRIPT_MAP:
|
||||
OK = FALSE;
|
||||
|
||||
for (int i = 0; i < FULL_MAPSIZE; i++)
|
||||
{
|
||||
if ((require_map[i] & map[i]) != 0)
|
||||
{
|
||||
OK = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!OK) return FALSE;
|
||||
|
||||
/* The rest of the string must be in this script, but we have to
|
||||
allow for the Han complications. */
|
||||
|
||||
switch(script)
|
||||
{
|
||||
case ucp_Han:
|
||||
require_state = SCRIPT_HANPENDING;
|
||||
break;
|
||||
|
||||
case ucp_Hiragana:
|
||||
case ucp_Katakana:
|
||||
require_state = SCRIPT_HANHIRAKATA;
|
||||
break;
|
||||
|
||||
case ucp_Bopomofo:
|
||||
require_state = SCRIPT_HANBOPOMOFO;
|
||||
break;
|
||||
|
||||
case ucp_Hangul:
|
||||
require_state = SCRIPT_HANHANGUL;
|
||||
break;
|
||||
|
||||
/* Compute the intersection of the required list of scripts and the
|
||||
allowed scripts for this character. */
|
||||
|
||||
default:
|
||||
for (int i = 0; i < FULL_MAPSIZE; i++) require_map[i] &= map[i];
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
} /* End checking character's script and extensions. */
|
||||
|
||||
/* The character is in an acceptable script. We must now ensure that all
|
||||
decimal digits in the string come from the same set. Some scripts (e.g.
|
||||
Common, Arabic) have more than one set of decimal digits. This code does
|
||||
not allow mixing sets, even within the same script. The vector called
|
||||
PRIV(ucd_digit_sets)[] contains, in its first element, the number of
|
||||
following elements, and then, in ascending order, the code points of the
|
||||
'9' characters in every set of 10 digits. Each set is identified by the
|
||||
offset in the vector of its '9' character. An initial check of the first
|
||||
value picks up ASCII digits quickly. Otherwise, a binary chop is used. */
|
||||
|
||||
if (ucd->chartype == ucp_Nd)
|
||||
{
|
||||
uint32_t digitset;
|
||||
|
||||
if (c <= PRIV(ucd_digit_sets)[1]) digitset = 1; else
|
||||
{
|
||||
int mid;
|
||||
int bot = 1;
|
||||
int top = PRIV(ucd_digit_sets)[0];
|
||||
for (;;)
|
||||
{
|
||||
if (top <= bot + 1) /* <= rather than == is paranoia */
|
||||
{
|
||||
digitset = top;
|
||||
break;
|
||||
}
|
||||
mid = (top + bot) / 2;
|
||||
if (c <= PRIV(ucd_digit_sets)[mid]) top = mid; else bot = mid;
|
||||
}
|
||||
}
|
||||
|
||||
/* A required value of 0 means "unset". */
|
||||
|
||||
if (require_digitset == 0) require_digitset = digitset;
|
||||
else if (digitset != require_digitset) return FALSE;
|
||||
} /* End digit handling */
|
||||
|
||||
/* If we haven't yet got to the end, pick up the next character. */
|
||||
|
||||
if (ptr >= endptr) return TRUE;
|
||||
GETCHARINCTEST(c, ptr);
|
||||
} /* End checking loop */
|
||||
|
||||
#else /* NOT SUPPORT_UNICODE */
|
||||
(void)ptr;
|
||||
(void)endptr;
|
||||
(void)utf;
|
||||
return TRUE;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
|
||||
/* End of pcre2_script_run.c */
|
||||
286
3rd/pcre2/src/pcre2_serialize.c
Normal file
286
3rd/pcre2/src/pcre2_serialize.c
Normal file
@@ -0,0 +1,286 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains functions for serializing and deserializing
|
||||
a sequence of compiled codes. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
/* Magic number to provide a small check against being handed junk. */
|
||||
|
||||
#define SERIALIZED_DATA_MAGIC 0x50523253u
|
||||
|
||||
/* Deserialization is limited to the current PCRE version and
|
||||
character width. */
|
||||
|
||||
#define SERIALIZED_DATA_VERSION \
|
||||
((PCRE2_MAJOR) | ((PCRE2_MINOR) << 16))
|
||||
|
||||
#define SERIALIZED_DATA_CONFIG \
|
||||
(sizeof(PCRE2_UCHAR) | ((sizeof(void*)) << 8) | ((sizeof(PCRE2_SIZE)) << 16))
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Serialize compiled patterns *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
|
||||
pcre2_serialize_encode(const pcre2_code **codes, int32_t number_of_codes,
|
||||
uint8_t **serialized_bytes, PCRE2_SIZE *serialized_size,
|
||||
pcre2_general_context *gcontext)
|
||||
{
|
||||
uint8_t *bytes;
|
||||
uint8_t *dst_bytes;
|
||||
int32_t i;
|
||||
PCRE2_SIZE total_size;
|
||||
const pcre2_real_code *re;
|
||||
const uint8_t *tables;
|
||||
pcre2_serialized_data *data;
|
||||
|
||||
const pcre2_memctl *memctl = (gcontext != NULL) ?
|
||||
&gcontext->memctl : &PRIV(default_compile_context).memctl;
|
||||
|
||||
if (codes == NULL || serialized_bytes == NULL || serialized_size == NULL)
|
||||
return PCRE2_ERROR_NULL;
|
||||
|
||||
if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
|
||||
|
||||
/* Compute total size. */
|
||||
total_size = sizeof(pcre2_serialized_data) + TABLES_LENGTH;
|
||||
tables = NULL;
|
||||
|
||||
for (i = 0; i < number_of_codes; i++)
|
||||
{
|
||||
if (codes[i] == NULL) return PCRE2_ERROR_NULL;
|
||||
re = (const pcre2_real_code *)(codes[i]);
|
||||
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||
if (tables == NULL)
|
||||
tables = re->tables;
|
||||
else if (tables != re->tables)
|
||||
return PCRE2_ERROR_MIXEDTABLES;
|
||||
total_size += re->blocksize;
|
||||
}
|
||||
|
||||
/* Initialize the byte stream. */
|
||||
bytes = memctl->malloc(total_size + sizeof(pcre2_memctl), memctl->memory_data);
|
||||
if (bytes == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
/* The controller is stored as a hidden parameter. */
|
||||
memcpy(bytes, memctl, sizeof(pcre2_memctl));
|
||||
bytes += sizeof(pcre2_memctl);
|
||||
|
||||
data = (pcre2_serialized_data *)bytes;
|
||||
data->magic = SERIALIZED_DATA_MAGIC;
|
||||
data->version = SERIALIZED_DATA_VERSION;
|
||||
data->config = SERIALIZED_DATA_CONFIG;
|
||||
data->number_of_codes = number_of_codes;
|
||||
|
||||
/* Copy all compiled code data. */
|
||||
dst_bytes = bytes + sizeof(pcre2_serialized_data);
|
||||
memcpy(dst_bytes, tables, TABLES_LENGTH);
|
||||
dst_bytes += TABLES_LENGTH;
|
||||
|
||||
for (i = 0; i < number_of_codes; i++)
|
||||
{
|
||||
re = (const pcre2_real_code *)(codes[i]);
|
||||
(void)memcpy(dst_bytes, (const char *)re, re->blocksize);
|
||||
|
||||
/* Certain fields in the compiled code block are re-set during
|
||||
deserialization. In order to ensure that the serialized data stream is always
|
||||
the same for the same pattern, set them to zero here. We can't assume the
|
||||
copy of the pattern is correctly aligned for accessing the fields as part of
|
||||
a structure. Note the use of sizeof(void *) in the second of these, to
|
||||
specify the size of a pointer. If sizeof(uint8_t *) is used (tables is a
|
||||
pointer to uint8_t), gcc gives a warning because the first argument is also a
|
||||
pointer to uint8_t. Casting the first argument to (void *) can stop this, but
|
||||
it didn't stop Coverity giving the same complaint. */
|
||||
|
||||
(void)memset(dst_bytes + offsetof(pcre2_real_code, memctl), 0,
|
||||
sizeof(pcre2_memctl));
|
||||
(void)memset(dst_bytes + offsetof(pcre2_real_code, tables), 0,
|
||||
sizeof(void *));
|
||||
(void)memset(dst_bytes + offsetof(pcre2_real_code, executable_jit), 0,
|
||||
sizeof(void *));
|
||||
|
||||
dst_bytes += re->blocksize;
|
||||
}
|
||||
|
||||
*serialized_bytes = bytes;
|
||||
*serialized_size = total_size;
|
||||
return number_of_codes;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Deserialize compiled patterns *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
|
||||
pcre2_serialize_decode(pcre2_code **codes, int32_t number_of_codes,
|
||||
const uint8_t *bytes, pcre2_general_context *gcontext)
|
||||
{
|
||||
const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes;
|
||||
const pcre2_memctl *memctl = (gcontext != NULL) ?
|
||||
&gcontext->memctl : &PRIV(default_compile_context).memctl;
|
||||
|
||||
const uint8_t *src_bytes;
|
||||
pcre2_real_code *dst_re;
|
||||
uint8_t *tables;
|
||||
int32_t i, j;
|
||||
|
||||
/* Sanity checks. */
|
||||
|
||||
if (data == NULL || codes == NULL) return PCRE2_ERROR_NULL;
|
||||
if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
|
||||
if (data->number_of_codes <= 0) return PCRE2_ERROR_BADSERIALIZEDDATA;
|
||||
if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC;
|
||||
if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE;
|
||||
if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE;
|
||||
|
||||
if (number_of_codes > data->number_of_codes)
|
||||
number_of_codes = data->number_of_codes;
|
||||
|
||||
src_bytes = bytes + sizeof(pcre2_serialized_data);
|
||||
|
||||
/* Decode tables. The reference count for the tables is stored immediately
|
||||
following them. */
|
||||
|
||||
tables = memctl->malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE), memctl->memory_data);
|
||||
if (tables == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
memcpy(tables, src_bytes, TABLES_LENGTH);
|
||||
*(PCRE2_SIZE *)(tables + TABLES_LENGTH) = number_of_codes;
|
||||
src_bytes += TABLES_LENGTH;
|
||||
|
||||
/* Decode the byte stream. We must not try to read the size from the compiled
|
||||
code block in the stream, because it might be unaligned, which causes errors on
|
||||
hardware such as Sparc-64 that doesn't like unaligned memory accesses. The type
|
||||
of the blocksize field is given its own name to ensure that it is the same here
|
||||
as in the block. */
|
||||
|
||||
for (i = 0; i < number_of_codes; i++)
|
||||
{
|
||||
CODE_BLOCKSIZE_TYPE blocksize;
|
||||
memcpy(&blocksize, src_bytes + offsetof(pcre2_real_code, blocksize),
|
||||
sizeof(CODE_BLOCKSIZE_TYPE));
|
||||
if (blocksize <= sizeof(pcre2_real_code))
|
||||
return PCRE2_ERROR_BADSERIALIZEDDATA;
|
||||
|
||||
/* The allocator provided by gcontext replaces the original one. */
|
||||
|
||||
dst_re = (pcre2_real_code *)PRIV(memctl_malloc)(blocksize,
|
||||
(pcre2_memctl *)gcontext);
|
||||
if (dst_re == NULL)
|
||||
{
|
||||
memctl->free(tables, memctl->memory_data);
|
||||
for (j = 0; j < i; j++)
|
||||
{
|
||||
memctl->free(codes[j], memctl->memory_data);
|
||||
codes[j] = NULL;
|
||||
}
|
||||
return PCRE2_ERROR_NOMEMORY;
|
||||
}
|
||||
|
||||
/* The new allocator must be preserved. */
|
||||
|
||||
memcpy(((uint8_t *)dst_re) + sizeof(pcre2_memctl),
|
||||
src_bytes + sizeof(pcre2_memctl), blocksize - sizeof(pcre2_memctl));
|
||||
if (dst_re->magic_number != MAGIC_NUMBER ||
|
||||
dst_re->name_entry_size > MAX_NAME_SIZE + IMM2_SIZE + 1 ||
|
||||
dst_re->name_count > MAX_NAME_COUNT)
|
||||
{
|
||||
memctl->free(dst_re, memctl->memory_data);
|
||||
return PCRE2_ERROR_BADSERIALIZEDDATA;
|
||||
}
|
||||
|
||||
/* At the moment only one table is supported. */
|
||||
|
||||
dst_re->tables = tables;
|
||||
dst_re->executable_jit = NULL;
|
||||
dst_re->flags |= PCRE2_DEREF_TABLES;
|
||||
|
||||
codes[i] = dst_re;
|
||||
src_bytes += blocksize;
|
||||
}
|
||||
|
||||
return number_of_codes;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get the number of serialized patterns *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
|
||||
pcre2_serialize_get_number_of_codes(const uint8_t *bytes)
|
||||
{
|
||||
const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes;
|
||||
|
||||
if (data == NULL) return PCRE2_ERROR_NULL;
|
||||
if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC;
|
||||
if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE;
|
||||
if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE;
|
||||
|
||||
return data->number_of_codes;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free the allocated stream *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_serialize_free(uint8_t *bytes)
|
||||
{
|
||||
if (bytes != NULL)
|
||||
{
|
||||
pcre2_memctl *memctl = (pcre2_memctl *)(bytes - sizeof(pcre2_memctl));
|
||||
memctl->free(memctl, memctl->memory_data);
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_serialize.c */
|
||||
237
3rd/pcre2/src/pcre2_string_utils.c
Normal file
237
3rd/pcre2/src/pcre2_string_utils.c
Normal file
@@ -0,0 +1,237 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2018-2021 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains internal functions for comparing and finding the length
|
||||
of strings. These are used instead of strcmp() etc because the standard
|
||||
functions work only on 8-bit data. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Emulated memmove() for systems without it *
|
||||
*************************************************/
|
||||
|
||||
/* This function can make use of bcopy() if it is available. Otherwise do it by
|
||||
steam, as there some non-Unix environments that lack both memmove() and
|
||||
bcopy(). */
|
||||
|
||||
#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
|
||||
void *
|
||||
PRIV(memmove)(void *d, const void *s, size_t n)
|
||||
{
|
||||
#ifdef HAVE_BCOPY
|
||||
bcopy(s, d, n);
|
||||
return d;
|
||||
#else
|
||||
size_t i;
|
||||
unsigned char *dest = (unsigned char *)d;
|
||||
const unsigned char *src = (const unsigned char *)s;
|
||||
if (dest > src)
|
||||
{
|
||||
dest += n;
|
||||
src += n;
|
||||
for (i = 0; i < n; ++i) *(--dest) = *(--src);
|
||||
return (void *)dest;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < n; ++i) *dest++ = *src++;
|
||||
return (void *)(dest - n);
|
||||
}
|
||||
#endif /* not HAVE_BCOPY */
|
||||
}
|
||||
#endif /* not VPCOMPAT && not HAVE_MEMMOVE */
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare two zero-terminated PCRE2 strings *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strcmp)(PCRE2_SPTR str1, PCRE2_SPTR str2)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
while (*str1 != '\0' || *str2 != '\0')
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare zero-terminated PCRE2 & 8-bit strings *
|
||||
*************************************************/
|
||||
|
||||
/* As the 8-bit string is almost always a literal, its type is specified as
|
||||
const char *.
|
||||
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strcmp_c8)(PCRE2_SPTR str1, const char *str2)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
while (*str1 != '\0' || *str2 != '\0')
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare two PCRE2 strings, given a length *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
len the length
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strncmp)(PCRE2_SPTR str1, PCRE2_SPTR str2, size_t len)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
for (; len > 0; len--)
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare PCRE2 string to 8-bit string by length *
|
||||
*************************************************/
|
||||
|
||||
/* As the 8-bit string is almost always a literal, its type is specified as
|
||||
const char *.
|
||||
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
len the length
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strncmp_c8)(PCRE2_SPTR str1, const char *str2, size_t len)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
for (; len > 0; len--)
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find the length of a PCRE2 string *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Argument: the string
|
||||
Returns: the length
|
||||
*/
|
||||
|
||||
PCRE2_SIZE
|
||||
PRIV(strlen)(PCRE2_SPTR str)
|
||||
{
|
||||
PCRE2_SIZE c = 0;
|
||||
while (*str++ != 0) c++;
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy 8-bit 0-terminated string to PCRE2 string *
|
||||
*************************************************/
|
||||
|
||||
/* Arguments:
|
||||
str1 buffer to receive the string
|
||||
str2 8-bit string to be copied
|
||||
|
||||
Returns: the number of code units used (excluding trailing zero)
|
||||
*/
|
||||
|
||||
PCRE2_SIZE
|
||||
PRIV(strcpy_c8)(PCRE2_UCHAR *str1, const char *str2)
|
||||
{
|
||||
PCRE2_UCHAR *t = str1;
|
||||
while (*str2 != 0) *t++ = *str2++;
|
||||
*t = 0;
|
||||
return t - str1;
|
||||
}
|
||||
|
||||
/* End of pcre2_string_utils.c */
|
||||
2069
3rd/pcre2/src/pcre2_study.c
Normal file
2069
3rd/pcre2/src/pcre2_study.c
Normal file
@@ -0,0 +1,2069 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains functions for scanning a compiled pattern and
|
||||
collecting data (e.g. minimum matching length). */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
/* The maximum remembered capturing brackets minimum. */
|
||||
|
||||
#define MAX_CACHE_BACKREF 128
|
||||
|
||||
/* Set a bit in the starting code unit bit map. */
|
||||
|
||||
#define SET_BIT(c) re->start_bitmap[(c)/8] |= (1u << ((c)&7))
|
||||
|
||||
/* Returns from set_start_bits() */
|
||||
|
||||
enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN, SSB_TOODEEP };
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find the minimum subject length for a group *
|
||||
*************************************************/
|
||||
|
||||
/* Scan a parenthesized group and compute the minimum length of subject that
|
||||
is needed to match it. This is a lower bound; it does not mean there is a
|
||||
string of that length that matches. In UTF mode, the result is in characters
|
||||
rather than code units. The field in a compiled pattern for storing the minimum
|
||||
length is 16-bits long (on the grounds that anything longer than that is
|
||||
pathological), so we give up when we reach that amount. This also means that
|
||||
integer overflow for really crazy patterns cannot happen.
|
||||
|
||||
Backreference minimum lengths are cached to speed up multiple references. This
|
||||
function is called only when the highest back reference in the pattern is less
|
||||
than or equal to MAX_CACHE_BACKREF, which is one less than the size of the
|
||||
caching vector. The zeroth element contains the number of the highest set
|
||||
value.
|
||||
|
||||
Arguments:
|
||||
re compiled pattern block
|
||||
code pointer to start of group (the bracket)
|
||||
startcode pointer to start of the whole pattern's code
|
||||
utf UTF flag
|
||||
recurses chain of recurse_check to catch mutual recursion
|
||||
countptr pointer to call count (to catch over complexity)
|
||||
backref_cache vector for caching back references.
|
||||
|
||||
This function is no longer called when the pattern contains (*ACCEPT); however,
|
||||
the old code for returning -1 is retained, just in case.
|
||||
|
||||
Returns: the minimum length
|
||||
-1 \C in UTF-8 mode
|
||||
or (*ACCEPT)
|
||||
or pattern too complicated
|
||||
-2 internal error (missing capturing bracket)
|
||||
-3 internal error (opcode not listed)
|
||||
*/
|
||||
|
||||
static int
|
||||
find_minlength(const pcre2_real_code *re, PCRE2_SPTR code,
|
||||
PCRE2_SPTR startcode, BOOL utf, recurse_check *recurses, int *countptr,
|
||||
int *backref_cache)
|
||||
{
|
||||
int length = -1;
|
||||
int branchlength = 0;
|
||||
int prev_cap_recno = -1;
|
||||
int prev_cap_d = 0;
|
||||
int prev_recurse_recno = -1;
|
||||
int prev_recurse_d = 0;
|
||||
uint32_t once_fudge = 0;
|
||||
BOOL had_recurse = FALSE;
|
||||
BOOL dupcapused = (re->flags & PCRE2_DUPCAPUSED) != 0;
|
||||
PCRE2_SPTR nextbranch = code + GET(code, 1);
|
||||
PCRE2_SPTR cc = code + 1 + LINK_SIZE;
|
||||
recurse_check this_recurse;
|
||||
|
||||
/* If this is a "could be empty" group, its minimum length is 0. */
|
||||
|
||||
if (*code >= OP_SBRA && *code <= OP_SCOND) return 0;
|
||||
|
||||
/* Skip over capturing bracket number */
|
||||
|
||||
if (*code == OP_CBRA || *code == OP_CBRAPOS) cc += IMM2_SIZE;
|
||||
|
||||
/* A large and/or complex regex can take too long to process. */
|
||||
|
||||
if ((*countptr)++ > 1000) return -1;
|
||||
|
||||
/* Scan along the opcodes for this branch. If we get to the end of the branch,
|
||||
check the length against that of the other branches. If the accumulated length
|
||||
passes 16-bits, reset to that value and skip the rest of the branch. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
int d, min, recno;
|
||||
PCRE2_UCHAR op;
|
||||
PCRE2_SPTR cs, ce;
|
||||
|
||||
if (branchlength >= UINT16_MAX)
|
||||
{
|
||||
branchlength = UINT16_MAX;
|
||||
cc = nextbranch;
|
||||
}
|
||||
|
||||
op = *cc;
|
||||
switch (op)
|
||||
{
|
||||
case OP_COND:
|
||||
case OP_SCOND:
|
||||
|
||||
/* If there is only one branch in a condition, the implied branch has zero
|
||||
length, so we don't add anything. This covers the DEFINE "condition"
|
||||
automatically. If there are two branches we can treat it the same as any
|
||||
other non-capturing subpattern. */
|
||||
|
||||
cs = cc + GET(cc, 1);
|
||||
if (*cs != OP_ALT)
|
||||
{
|
||||
cc = cs + 1 + LINK_SIZE;
|
||||
break;
|
||||
}
|
||||
goto PROCESS_NON_CAPTURE;
|
||||
|
||||
case OP_BRA:
|
||||
/* There's a special case of OP_BRA, when it is wrapped round a repeated
|
||||
OP_RECURSE. We'd like to process the latter at this level so that
|
||||
remembering the value works for repeated cases. So we do nothing, but
|
||||
set a fudge value to skip over the OP_KET after the recurse. */
|
||||
|
||||
if (cc[1+LINK_SIZE] == OP_RECURSE && cc[2*(1+LINK_SIZE)] == OP_KET)
|
||||
{
|
||||
once_fudge = 1 + LINK_SIZE;
|
||||
cc += 1 + LINK_SIZE;
|
||||
break;
|
||||
}
|
||||
/* Fall through */
|
||||
|
||||
case OP_ONCE:
|
||||
case OP_SCRIPT_RUN:
|
||||
case OP_SBRA:
|
||||
case OP_BRAPOS:
|
||||
case OP_SBRAPOS:
|
||||
PROCESS_NON_CAPTURE:
|
||||
d = find_minlength(re, cc, startcode, utf, recurses, countptr,
|
||||
backref_cache);
|
||||
if (d < 0) return d;
|
||||
branchlength += d;
|
||||
do cc += GET(cc, 1); while (*cc == OP_ALT);
|
||||
cc += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* To save time for repeated capturing subpatterns, we remember the
|
||||
length of the previous one. Unfortunately we can't do the same for
|
||||
the unnumbered ones above. Nor can we do this if (?| is present in the
|
||||
pattern because captures with the same number are not then identical. */
|
||||
|
||||
case OP_CBRA:
|
||||
case OP_SCBRA:
|
||||
case OP_CBRAPOS:
|
||||
case OP_SCBRAPOS:
|
||||
recno = (int)GET2(cc, 1+LINK_SIZE);
|
||||
if (dupcapused || recno != prev_cap_recno)
|
||||
{
|
||||
prev_cap_recno = recno;
|
||||
prev_cap_d = find_minlength(re, cc, startcode, utf, recurses, countptr,
|
||||
backref_cache);
|
||||
if (prev_cap_d < 0) return prev_cap_d;
|
||||
}
|
||||
branchlength += prev_cap_d;
|
||||
do cc += GET(cc, 1); while (*cc == OP_ALT);
|
||||
cc += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* ACCEPT makes things far too complicated; we have to give up. In fact,
|
||||
from 10.34 onwards, if a pattern contains (*ACCEPT), this function is not
|
||||
used. However, leave the code in place, just in case. */
|
||||
|
||||
case OP_ACCEPT:
|
||||
case OP_ASSERT_ACCEPT:
|
||||
return -1;
|
||||
|
||||
/* Reached end of a branch; if it's a ket it is the end of a nested
|
||||
call. If it's ALT it is an alternation in a nested call. If it is END it's
|
||||
the end of the outer call. All can be handled by the same code. If the
|
||||
length of any branch is zero, there is no need to scan any subsequent
|
||||
branches. */
|
||||
|
||||
case OP_ALT:
|
||||
case OP_KET:
|
||||
case OP_KETRMAX:
|
||||
case OP_KETRMIN:
|
||||
case OP_KETRPOS:
|
||||
case OP_END:
|
||||
if (length < 0 || (!had_recurse && branchlength < length))
|
||||
length = branchlength;
|
||||
if (op != OP_ALT || length == 0) return length;
|
||||
nextbranch = cc + GET(cc, 1);
|
||||
cc += 1 + LINK_SIZE;
|
||||
branchlength = 0;
|
||||
had_recurse = FALSE;
|
||||
break;
|
||||
|
||||
/* Skip over assertive subpatterns */
|
||||
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ASSERT_NA:
|
||||
case OP_ASSERT_SCS:
|
||||
case OP_ASSERTBACK_NA:
|
||||
do cc += GET(cc, 1); while (*cc == OP_ALT);
|
||||
/* Fall through */
|
||||
|
||||
/* Skip over things that don't match chars */
|
||||
|
||||
case OP_REVERSE:
|
||||
case OP_VREVERSE:
|
||||
case OP_CREF:
|
||||
case OP_DNCREF:
|
||||
case OP_RREF:
|
||||
case OP_DNRREF:
|
||||
case OP_FALSE:
|
||||
case OP_TRUE:
|
||||
case OP_CALLOUT:
|
||||
case OP_SOD:
|
||||
case OP_SOM:
|
||||
case OP_EOD:
|
||||
case OP_EODN:
|
||||
case OP_CIRC:
|
||||
case OP_CIRCM:
|
||||
case OP_DOLL:
|
||||
case OP_DOLLM:
|
||||
case OP_NOT_WORD_BOUNDARY:
|
||||
case OP_WORD_BOUNDARY:
|
||||
case OP_NOT_UCP_WORD_BOUNDARY:
|
||||
case OP_UCP_WORD_BOUNDARY:
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
cc += GET(cc, 1 + 2*LINK_SIZE);
|
||||
break;
|
||||
|
||||
/* Skip over a subpattern that has a {0} or {0,x} quantifier */
|
||||
|
||||
case OP_BRAZERO:
|
||||
case OP_BRAMINZERO:
|
||||
case OP_BRAPOSZERO:
|
||||
case OP_SKIPZERO:
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
do cc += GET(cc, 1); while (*cc == OP_ALT);
|
||||
cc += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Handle literal characters and + repetitions */
|
||||
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
case OP_NOT:
|
||||
case OP_NOTI:
|
||||
case OP_PLUS:
|
||||
case OP_PLUSI:
|
||||
case OP_MINPLUS:
|
||||
case OP_MINPLUSI:
|
||||
case OP_POSPLUS:
|
||||
case OP_POSPLUSI:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTPOSPLUSI:
|
||||
branchlength++;
|
||||
cc += 2;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEPOSPLUS:
|
||||
branchlength++;
|
||||
cc += (cc[1] == OP_PROP || cc[1] == OP_NOTPROP)? 4 : 2;
|
||||
break;
|
||||
|
||||
/* Handle exact repetitions. The count is already in characters, but we
|
||||
may need to skip over a multibyte character in UTF mode. */
|
||||
|
||||
case OP_EXACT:
|
||||
case OP_EXACTI:
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTEXACTI:
|
||||
branchlength += GET2(cc,1);
|
||||
cc += 2 + IMM2_SIZE;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case OP_TYPEEXACT:
|
||||
branchlength += GET2(cc,1);
|
||||
cc += 2 + IMM2_SIZE + ((cc[1 + IMM2_SIZE] == OP_PROP
|
||||
|| cc[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);
|
||||
break;
|
||||
|
||||
/* Handle single-char non-literal matchers */
|
||||
|
||||
case OP_PROP:
|
||||
case OP_NOTPROP:
|
||||
cc += 2;
|
||||
/* Fall through */
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
case OP_DIGIT:
|
||||
case OP_NOT_WHITESPACE:
|
||||
case OP_WHITESPACE:
|
||||
case OP_NOT_WORDCHAR:
|
||||
case OP_WORDCHAR:
|
||||
case OP_ANY:
|
||||
case OP_ALLANY:
|
||||
case OP_EXTUNI:
|
||||
case OP_HSPACE:
|
||||
case OP_NOT_HSPACE:
|
||||
case OP_VSPACE:
|
||||
case OP_NOT_VSPACE:
|
||||
branchlength++;
|
||||
cc++;
|
||||
break;
|
||||
|
||||
/* "Any newline" might match two characters, but it also might match just
|
||||
one. */
|
||||
|
||||
case OP_ANYNL:
|
||||
branchlength += 1;
|
||||
cc++;
|
||||
break;
|
||||
|
||||
/* The single-byte matcher means we can't proceed in UTF mode. (In
|
||||
non-UTF mode \C will actually be turned into OP_ALLANY, so won't ever
|
||||
appear, but leave the code, just in case.) */
|
||||
|
||||
case OP_ANYBYTE:
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) return -1;
|
||||
#endif
|
||||
branchlength++;
|
||||
cc++;
|
||||
break;
|
||||
|
||||
/* For repeated character types, we have to test for \p and \P, which have
|
||||
an extra two bytes of parameters. */
|
||||
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPOSQUERY:
|
||||
if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2;
|
||||
cc += PRIV(OP_lengths)[op];
|
||||
break;
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEPOSUPTO:
|
||||
if (cc[1 + IMM2_SIZE] == OP_PROP
|
||||
|| cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2;
|
||||
cc += PRIV(OP_lengths)[op];
|
||||
break;
|
||||
|
||||
/* Check a class for variable quantification */
|
||||
|
||||
case OP_CLASS:
|
||||
case OP_NCLASS:
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_XCLASS:
|
||||
case OP_ECLASS:
|
||||
/* The original code caused an unsigned overflow in 64 bit systems,
|
||||
so now we use a conditional statement. */
|
||||
if (op == OP_XCLASS || op == OP_ECLASS)
|
||||
cc += GET(cc, 1);
|
||||
else
|
||||
#endif
|
||||
cc += PRIV(OP_lengths)[OP_CLASS];
|
||||
|
||||
switch (*cc)
|
||||
{
|
||||
case OP_CRPLUS:
|
||||
case OP_CRMINPLUS:
|
||||
case OP_CRPOSPLUS:
|
||||
branchlength++;
|
||||
/* Fall through */
|
||||
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
case OP_CRPOSSTAR:
|
||||
case OP_CRPOSQUERY:
|
||||
cc++;
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
case OP_CRPOSRANGE:
|
||||
branchlength += GET2(cc,1);
|
||||
cc += 1 + 2 * IMM2_SIZE;
|
||||
break;
|
||||
|
||||
default:
|
||||
branchlength++;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/* Backreferences and subroutine calls (OP_RECURSE) are treated in the same
|
||||
way: we find the minimum length for the subpattern. A recursion
|
||||
(backreference or subroutine) causes an a flag to be set that causes the
|
||||
length of this branch to be ignored. The logic is that a recursion can only
|
||||
make sense if there is another alternative that stops the recursing. That
|
||||
will provide the minimum length (when no recursion happens).
|
||||
|
||||
If PCRE2_MATCH_UNSET_BACKREF is set, a backreference to an unset bracket
|
||||
matches an empty string (by default it causes a matching failure), so in
|
||||
that case we must set the minimum length to zero.
|
||||
|
||||
For backreferenes, if duplicate numbers are present in the pattern we check
|
||||
for a reference to a duplicate. If it is, we don't know which version will
|
||||
be referenced, so we have to set the minimum length to zero. */
|
||||
|
||||
/* Duplicate named pattern back reference. */
|
||||
|
||||
case OP_DNREF:
|
||||
case OP_DNREFI:
|
||||
if (!dupcapused && (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
|
||||
{
|
||||
int count = GET2(cc, 1+IMM2_SIZE);
|
||||
PCRE2_SPTR slot =
|
||||
(PCRE2_SPTR)((const uint8_t *)re + sizeof(pcre2_real_code)) +
|
||||
GET2(cc, 1) * re->name_entry_size;
|
||||
|
||||
d = INT_MAX;
|
||||
|
||||
/* Scan all groups with the same name; find the shortest. */
|
||||
|
||||
while (count-- > 0)
|
||||
{
|
||||
int dd, i;
|
||||
recno = GET2(slot, 0);
|
||||
|
||||
if (recno <= backref_cache[0] && backref_cache[recno] >= 0)
|
||||
dd = backref_cache[recno];
|
||||
else
|
||||
{
|
||||
ce = cs = PRIV(find_bracket)(startcode, utf, recno);
|
||||
if (cs == NULL) return -2;
|
||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||
|
||||
dd = 0;
|
||||
if (!dupcapused || PRIV(find_bracket)(ce, utf, recno) == NULL)
|
||||
{
|
||||
if (cc > cs && cc < ce) /* Simple recursion */
|
||||
{
|
||||
had_recurse = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
recurse_check *r = recurses;
|
||||
for (r = recurses; r != NULL; r = r->prev)
|
||||
if (r->group == cs) break;
|
||||
if (r != NULL) /* Mutual recursion */
|
||||
{
|
||||
had_recurse = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
this_recurse.prev = recurses; /* No recursion */
|
||||
this_recurse.group = cs;
|
||||
dd = find_minlength(re, cs, startcode, utf, &this_recurse,
|
||||
countptr, backref_cache);
|
||||
if (dd < 0) return dd;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
backref_cache[recno] = dd;
|
||||
for (i = backref_cache[0] + 1; i < recno; i++) backref_cache[i] = -1;
|
||||
backref_cache[0] = recno;
|
||||
}
|
||||
|
||||
if (dd < d) d = dd;
|
||||
if (d <= 0) break; /* No point looking at any more */
|
||||
slot += re->name_entry_size;
|
||||
}
|
||||
}
|
||||
else d = 0;
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
goto REPEAT_BACK_REFERENCE;
|
||||
|
||||
/* Single back reference by number. References by name are converted to by
|
||||
number when there is no duplication. */
|
||||
|
||||
case OP_REF:
|
||||
case OP_REFI:
|
||||
recno = GET2(cc, 1);
|
||||
if (recno <= backref_cache[0] && backref_cache[recno] >= 0)
|
||||
d = backref_cache[recno];
|
||||
else
|
||||
{
|
||||
int i;
|
||||
d = 0;
|
||||
|
||||
if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
|
||||
{
|
||||
ce = cs = PRIV(find_bracket)(startcode, utf, recno);
|
||||
if (cs == NULL) return -2;
|
||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||
|
||||
if (!dupcapused || PRIV(find_bracket)(ce, utf, recno) == NULL)
|
||||
{
|
||||
if (cc > cs && cc < ce) /* Simple recursion */
|
||||
{
|
||||
had_recurse = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
recurse_check *r = recurses;
|
||||
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
|
||||
if (r != NULL) /* Mutual recursion */
|
||||
{
|
||||
had_recurse = TRUE;
|
||||
}
|
||||
else /* No recursion */
|
||||
{
|
||||
this_recurse.prev = recurses;
|
||||
this_recurse.group = cs;
|
||||
d = find_minlength(re, cs, startcode, utf, &this_recurse, countptr,
|
||||
backref_cache);
|
||||
if (d < 0) return d;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
backref_cache[recno] = d;
|
||||
for (i = backref_cache[0] + 1; i < recno; i++) backref_cache[i] = -1;
|
||||
backref_cache[0] = recno;
|
||||
}
|
||||
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
|
||||
/* Handle repeated back references */
|
||||
|
||||
REPEAT_BACK_REFERENCE:
|
||||
switch (*cc)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
case OP_CRPOSSTAR:
|
||||
case OP_CRPOSQUERY:
|
||||
min = 0;
|
||||
cc++;
|
||||
break;
|
||||
|
||||
case OP_CRPLUS:
|
||||
case OP_CRMINPLUS:
|
||||
case OP_CRPOSPLUS:
|
||||
min = 1;
|
||||
cc++;
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
case OP_CRPOSRANGE:
|
||||
min = GET2(cc, 1);
|
||||
cc += 1 + 2 * IMM2_SIZE;
|
||||
break;
|
||||
|
||||
default:
|
||||
min = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Take care not to overflow: (1) min and d are ints, so check that their
|
||||
product is not greater than INT_MAX. (2) branchlength is limited to
|
||||
UINT16_MAX (checked at the top of the loop). */
|
||||
|
||||
if ((d > 0 && (INT_MAX/d) < min) || UINT16_MAX - branchlength < min*d)
|
||||
branchlength = UINT16_MAX;
|
||||
else branchlength += min * d;
|
||||
break;
|
||||
|
||||
/* Recursion always refers to the first occurrence of a subpattern with a
|
||||
given number. Therefore, we can always make use of caching, even when the
|
||||
pattern contains multiple subpatterns with the same number. */
|
||||
|
||||
case OP_RECURSE:
|
||||
cs = ce = startcode + GET(cc, 1);
|
||||
recno = GET2(cs, 1+LINK_SIZE);
|
||||
if (recno == prev_recurse_recno)
|
||||
{
|
||||
branchlength += prev_recurse_d;
|
||||
}
|
||||
else
|
||||
{
|
||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||
if (cc > cs && cc < ce) /* Simple recursion */
|
||||
had_recurse = TRUE;
|
||||
else
|
||||
{
|
||||
recurse_check *r = recurses;
|
||||
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
|
||||
if (r != NULL) /* Mutual recursion */
|
||||
had_recurse = TRUE;
|
||||
else
|
||||
{
|
||||
this_recurse.prev = recurses;
|
||||
this_recurse.group = cs;
|
||||
prev_recurse_d = find_minlength(re, cs, startcode, utf, &this_recurse,
|
||||
countptr, backref_cache);
|
||||
if (prev_recurse_d < 0) return prev_recurse_d;
|
||||
prev_recurse_recno = recno;
|
||||
branchlength += prev_recurse_d;
|
||||
}
|
||||
}
|
||||
}
|
||||
cc += 1 + LINK_SIZE + once_fudge;
|
||||
once_fudge = 0;
|
||||
break;
|
||||
|
||||
/* Anything else does not or need not match a character. We can get the
|
||||
item's length from the table, but for those that can match zero occurrences
|
||||
of a character, we must take special action for UTF-8 characters. As it
|
||||
happens, the "NOT" versions of these opcodes are used at present only for
|
||||
ASCII characters, so they could be omitted from this list. However, in
|
||||
future that may change, so we include them here so as not to leave a
|
||||
gotcha for a future maintainer. */
|
||||
|
||||
case OP_UPTO:
|
||||
case OP_UPTOI:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_MINUPTO:
|
||||
case OP_MINUPTOI:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_POSUPTO:
|
||||
case OP_POSUPTOI:
|
||||
case OP_NOTPOSUPTO:
|
||||
case OP_NOTPOSUPTOI:
|
||||
|
||||
case OP_STAR:
|
||||
case OP_STARI:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTSTARI:
|
||||
case OP_MINSTAR:
|
||||
case OP_MINSTARI:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_POSSTAR:
|
||||
case OP_POSSTARI:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPOSSTARI:
|
||||
|
||||
case OP_QUERY:
|
||||
case OP_QUERYI:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_MINQUERY:
|
||||
case OP_MINQUERYI:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_POSQUERY:
|
||||
case OP_POSQUERYI:
|
||||
case OP_NOTPOSQUERY:
|
||||
case OP_NOTPOSQUERYI:
|
||||
|
||||
cc += PRIV(OP_lengths)[op];
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||||
#endif
|
||||
break;
|
||||
|
||||
/* Skip these, but we need to add in the name length. */
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
cc += PRIV(OP_lengths)[op] + cc[1];
|
||||
break;
|
||||
|
||||
/* The remaining opcodes are just skipped over. */
|
||||
|
||||
case OP_CLOSE:
|
||||
case OP_COMMIT:
|
||||
case OP_FAIL:
|
||||
case OP_PRUNE:
|
||||
case OP_SET_SOM:
|
||||
case OP_SKIP:
|
||||
case OP_THEN:
|
||||
cc += PRIV(OP_lengths)[op];
|
||||
break;
|
||||
|
||||
/* This should not occur: we list all opcodes explicitly so that when
|
||||
new ones get added they are properly considered. */
|
||||
|
||||
default:
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return -3;
|
||||
}
|
||||
}
|
||||
|
||||
PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
|
||||
return -3; /* Avoid compiler warnings */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Set a bit and maybe its alternate case *
|
||||
*************************************************/
|
||||
|
||||
/* Given a character, set its first code unit's bit in the table, and also the
|
||||
corresponding bit for the other version of a letter if we are caseless.
|
||||
|
||||
Arguments:
|
||||
re points to the regex block
|
||||
p points to the first code unit of the character
|
||||
caseless TRUE if caseless
|
||||
utf TRUE for UTF mode
|
||||
ucp TRUE for UCP mode
|
||||
|
||||
Returns: pointer after the character
|
||||
*/
|
||||
|
||||
static PCRE2_SPTR
|
||||
set_table_bit(pcre2_real_code *re, PCRE2_SPTR p, BOOL caseless, BOOL utf,
|
||||
BOOL ucp)
|
||||
{
|
||||
uint32_t c = *p++; /* First code unit */
|
||||
|
||||
(void)utf; /* Stop compiler warnings when UTF not supported */
|
||||
(void)ucp;
|
||||
|
||||
/* In 16-bit and 32-bit modes, code units greater than 0xff set the bit for
|
||||
0xff. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (c > 0xff) SET_BIT(0xff); else
|
||||
#endif
|
||||
|
||||
SET_BIT(c);
|
||||
|
||||
/* In UTF-8 or UTF-16 mode, pick up the remaining code units in order to find
|
||||
the end of the character, even when caseless. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (c >= 0xc0) GETUTF8INC(c, p);
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, p);
|
||||
#endif
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* If caseless, handle the other case of the character. */
|
||||
|
||||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf || ucp)
|
||||
{
|
||||
c = UCD_OTHERCASE(c);
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (utf)
|
||||
{
|
||||
PCRE2_UCHAR buff[6];
|
||||
(void)PRIV(ord2utf)(c, buff);
|
||||
SET_BIT(buff[0]);
|
||||
}
|
||||
else if (c < 256) SET_BIT(c);
|
||||
#else /* 16-bit or 32-bit mode */
|
||||
if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
|
||||
#endif
|
||||
}
|
||||
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Not UTF or UCP */
|
||||
|
||||
if (MAX_255(c)) SET_BIT(re->tables[fcc_offset + c]);
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Set bits for a positive character type *
|
||||
*************************************************/
|
||||
|
||||
/* This function sets starting bits for a character type. In UTF-8 mode, we can
|
||||
only do a direct setting for bytes less than 128, as otherwise there can be
|
||||
confusion with bytes in the middle of UTF-8 characters. In a "traditional"
|
||||
environment, the tables will only recognize ASCII characters anyway, but in at
|
||||
least one Windows environment, some higher bytes bits were set in the tables.
|
||||
So we deal with that case by considering the UTF-8 encoding.
|
||||
|
||||
Arguments:
|
||||
re the regex block
|
||||
cbit type the type of character wanted
|
||||
table_limit 32 for non-UTF-8; 16 for UTF-8
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
set_type_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
|
||||
{
|
||||
uint32_t c;
|
||||
for (c = 0; c < table_limit; c++)
|
||||
re->start_bitmap[c] |= re->tables[c+cbits_offset+cbit_type];
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (table_limit == 32) return;
|
||||
for (c = 128; c < 256; c++)
|
||||
{
|
||||
if ((re->tables[cbits_offset + c/8] & (1u << (c&7))) != 0)
|
||||
{
|
||||
PCRE2_UCHAR buff[6];
|
||||
(void)PRIV(ord2utf)(c, buff);
|
||||
SET_BIT(buff[0]);
|
||||
}
|
||||
}
|
||||
#endif /* UTF-8 */
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Set bits for a negative character type *
|
||||
*************************************************/
|
||||
|
||||
/* This function sets starting bits for a negative character type such as \D.
|
||||
In UTF-8 mode, we can only do a direct setting for bytes less than 128, as
|
||||
otherwise there can be confusion with bytes in the middle of UTF-8 characters.
|
||||
Unlike in the positive case, where we can set appropriate starting bits for
|
||||
specific high-valued UTF-8 characters, in this case we have to set the bits for
|
||||
all high-valued characters. The lowest is 0xc2, but we overkill by starting at
|
||||
0xc0 (192) for simplicity.
|
||||
|
||||
Arguments:
|
||||
re the regex block
|
||||
cbit type the type of character wanted
|
||||
table_limit 32 for non-UTF-8; 16 for UTF-8
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
set_nottype_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
|
||||
{
|
||||
uint32_t c;
|
||||
for (c = 0; c < table_limit; c++)
|
||||
re->start_bitmap[c] |= (uint8_t)(~(re->tables[c+cbits_offset+cbit_type]));
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
/*************************************************
|
||||
* Set starting bits for a character list. *
|
||||
*************************************************/
|
||||
|
||||
/* This function sets starting bits for a character list. It enumerates
|
||||
all characters and character ranges in the character list, and sets
|
||||
the starting bits accordingly.
|
||||
|
||||
Arguments:
|
||||
code pointer to the code
|
||||
start_bitmap pointer to the starting bitmap
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
static void
|
||||
study_char_list(PCRE2_SPTR code, uint8_t *start_bitmap,
|
||||
const uint8_t *char_lists_end)
|
||||
{
|
||||
uint32_t type, list_ind;
|
||||
uint32_t char_list_add = XCL_CHAR_LIST_LOW_16_ADD;
|
||||
uint32_t range_start = ~(uint32_t)0, range_end = 0;
|
||||
const uint8_t *next_char;
|
||||
PCRE2_UCHAR start_buffer[6], end_buffer[6];
|
||||
PCRE2_UCHAR start, end;
|
||||
|
||||
/* Only needed in 8-bit mode at the moment. */
|
||||
type = (uint32_t)(code[0] << 8) | code[1];
|
||||
code += 2;
|
||||
|
||||
/* Align characters. */
|
||||
next_char = char_lists_end - (GET(code, 0) << 1);
|
||||
type &= XCL_TYPE_MASK;
|
||||
list_ind = 0;
|
||||
|
||||
if ((type & XCL_BEGIN_WITH_RANGE) != 0)
|
||||
range_start = XCL_CHAR_LIST_LOW_16_START;
|
||||
|
||||
while (type > 0)
|
||||
{
|
||||
uint32_t item_count = type & XCL_ITEM_COUNT_MASK;
|
||||
|
||||
if (item_count == XCL_ITEM_COUNT_MASK)
|
||||
{
|
||||
if (list_ind <= 1)
|
||||
{
|
||||
item_count = *(const uint16_t*)next_char;
|
||||
next_char += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
item_count = *(const uint32_t*)next_char;
|
||||
next_char += 4;
|
||||
}
|
||||
}
|
||||
|
||||
while (item_count > 0)
|
||||
{
|
||||
if (list_ind <= 1)
|
||||
{
|
||||
range_end = *(const uint16_t*)next_char;
|
||||
next_char += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
range_end = *(const uint32_t*)next_char;
|
||||
next_char += 4;
|
||||
}
|
||||
|
||||
if ((range_end & XCL_CHAR_END) != 0)
|
||||
{
|
||||
range_end = char_list_add + (range_end >> XCL_CHAR_SHIFT);
|
||||
|
||||
PRIV(ord2utf)(range_end, end_buffer);
|
||||
end = end_buffer[0];
|
||||
|
||||
if (range_start < range_end)
|
||||
{
|
||||
PRIV(ord2utf)(range_start, start_buffer);
|
||||
for (start = start_buffer[0]; start <= end; start++)
|
||||
start_bitmap[start / 8] |= (1u << (start & 7));
|
||||
}
|
||||
else
|
||||
start_bitmap[end / 8] |= (1u << (end & 7));
|
||||
|
||||
range_start = ~(uint32_t)0;
|
||||
}
|
||||
else
|
||||
range_start = char_list_add + (range_end >> XCL_CHAR_SHIFT);
|
||||
|
||||
item_count--;
|
||||
}
|
||||
|
||||
list_ind++;
|
||||
type >>= XCL_TYPE_BIT_LEN;
|
||||
|
||||
if (range_start == ~(uint32_t)0)
|
||||
{
|
||||
if ((type & XCL_BEGIN_WITH_RANGE) != 0)
|
||||
{
|
||||
/* In 8 bit mode XCL_CHAR_LIST_HIGH_32_START is not possible. */
|
||||
if (list_ind == 1) range_start = XCL_CHAR_LIST_HIGH_16_START;
|
||||
else range_start = XCL_CHAR_LIST_LOW_32_START;
|
||||
}
|
||||
}
|
||||
else if ((type & XCL_BEGIN_WITH_RANGE) == 0)
|
||||
{
|
||||
PRIV(ord2utf)(range_start, start_buffer);
|
||||
|
||||
/* In 8 bit mode XCL_CHAR_LIST_LOW_32_END and
|
||||
XCL_CHAR_LIST_HIGH_32_END are not possible. */
|
||||
if (list_ind == 1) range_end = XCL_CHAR_LIST_LOW_16_END;
|
||||
else range_end = XCL_CHAR_LIST_HIGH_16_END;
|
||||
|
||||
PRIV(ord2utf)(range_end, end_buffer);
|
||||
end = end_buffer[0];
|
||||
|
||||
for (start = start_buffer[0]; start <= end; start++)
|
||||
start_bitmap[start / 8] |= (1u << (start & 7));
|
||||
|
||||
range_start = ~(uint32_t)0;
|
||||
}
|
||||
|
||||
/* In 8 bit mode XCL_CHAR_LIST_HIGH_32_ADD is not possible. */
|
||||
if (list_ind == 1) char_list_add = XCL_CHAR_LIST_HIGH_16_ADD;
|
||||
else char_list_add = XCL_CHAR_LIST_LOW_32_ADD;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create bitmap of starting code units *
|
||||
*************************************************/
|
||||
|
||||
/* This function scans a compiled unanchored expression recursively and
|
||||
attempts to build a bitmap of the set of possible starting code units whose
|
||||
values are less than 256. In 16-bit and 32-bit mode, values above 255 all cause
|
||||
the 255 bit to be set. When calling set[_not]_type_bits() in UTF-8 (sic) mode
|
||||
we pass a value of 16 rather than 32 as the final argument. (See comments in
|
||||
those functions for the reason.)
|
||||
|
||||
The SSB_CONTINUE return is useful for parenthesized groups in patterns such as
|
||||
(a*)b where the group provides some optional starting code units but scanning
|
||||
must continue at the outer level to find at least one mandatory code unit. At
|
||||
the outermost level, this function fails unless the result is SSB_DONE.
|
||||
|
||||
We restrict recursion (for nested groups) to 1000 to avoid stack overflow
|
||||
issues.
|
||||
|
||||
Arguments:
|
||||
re points to the compiled regex block
|
||||
code points to an expression
|
||||
utf TRUE if in UTF mode
|
||||
ucp TRUE if in UCP mode
|
||||
depthptr pointer to recurse depth
|
||||
|
||||
Returns: SSB_FAIL => Failed to find any starting code units
|
||||
SSB_DONE => Found mandatory starting code units
|
||||
SSB_CONTINUE => Found optional starting code units
|
||||
SSB_UNKNOWN => Hit an unrecognized opcode
|
||||
SSB_TOODEEP => Recursion is too deep
|
||||
*/
|
||||
|
||||
static int
|
||||
set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf, BOOL ucp,
|
||||
int *depthptr)
|
||||
{
|
||||
uint32_t c;
|
||||
int yield = SSB_DONE;
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
int table_limit = utf? 16:32;
|
||||
#else
|
||||
int table_limit = 32;
|
||||
#endif
|
||||
|
||||
*depthptr += 1;
|
||||
if (*depthptr > 1000) return SSB_TOODEEP;
|
||||
|
||||
do
|
||||
{
|
||||
BOOL try_next = TRUE;
|
||||
PCRE2_SPTR tcode = code + 1 + LINK_SIZE;
|
||||
|
||||
if (*code == OP_CBRA || *code == OP_SCBRA ||
|
||||
*code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += IMM2_SIZE;
|
||||
|
||||
while (try_next) /* Loop for items in this branch */
|
||||
{
|
||||
int rc;
|
||||
PCRE2_SPTR ncode;
|
||||
const uint8_t *classmap = NULL;
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
PCRE2_UCHAR xclassflags;
|
||||
#endif
|
||||
|
||||
switch(*tcode)
|
||||
{
|
||||
/* If we reach something we don't understand, it means a new opcode has
|
||||
been created that hasn't been added to this function. Hopefully this
|
||||
problem will be discovered during testing. */
|
||||
|
||||
default:
|
||||
return SSB_UNKNOWN;
|
||||
|
||||
/* Fail for a valid opcode that implies no starting bits. */
|
||||
|
||||
case OP_ACCEPT:
|
||||
case OP_ASSERT_ACCEPT:
|
||||
case OP_ALLANY:
|
||||
case OP_ANY:
|
||||
case OP_ANYBYTE:
|
||||
case OP_CIRCM:
|
||||
case OP_CLOSE:
|
||||
case OP_COMMIT:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_COND:
|
||||
case OP_CREF:
|
||||
case OP_FALSE:
|
||||
case OP_TRUE:
|
||||
case OP_DNCREF:
|
||||
case OP_DNREF:
|
||||
case OP_DNREFI:
|
||||
case OP_DNRREF:
|
||||
case OP_DOLL:
|
||||
case OP_DOLLM:
|
||||
case OP_END:
|
||||
case OP_EOD:
|
||||
case OP_EODN:
|
||||
case OP_EXTUNI:
|
||||
case OP_FAIL:
|
||||
case OP_MARK:
|
||||
case OP_NOT:
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTEXACTI:
|
||||
case OP_NOTI:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_NOTPOSQUERY:
|
||||
case OP_NOTPOSQUERYI:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_NOTPOSUPTO:
|
||||
case OP_NOTPOSUPTOI:
|
||||
case OP_NOTPROP:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTSTARI:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_NOT_HSPACE:
|
||||
case OP_NOT_VSPACE:
|
||||
case OP_PRUNE:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_RECURSE:
|
||||
case OP_REF:
|
||||
case OP_REFI:
|
||||
case OP_REVERSE:
|
||||
case OP_VREVERSE:
|
||||
case OP_RREF:
|
||||
case OP_SCOND:
|
||||
case OP_SET_SOM:
|
||||
case OP_SKIP:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_SOD:
|
||||
case OP_SOM:
|
||||
case OP_THEN:
|
||||
case OP_THEN_ARG:
|
||||
return SSB_FAIL;
|
||||
|
||||
/* OP_CIRC happens only at the start of an anchored branch (multiline ^
|
||||
uses OP_CIRCM). Skip over it. */
|
||||
|
||||
case OP_CIRC:
|
||||
tcode += PRIV(OP_lengths)[OP_CIRC];
|
||||
break;
|
||||
|
||||
/* A "real" property test implies no starting bits, but the fake property
|
||||
PT_CLIST identifies a list of characters. These lists are short, as they
|
||||
are used for characters with more than one "other case", so there is no
|
||||
point in recognizing them for OP_NOTPROP. */
|
||||
|
||||
case OP_PROP:
|
||||
if (tcode[1] != PT_CLIST) return SSB_FAIL;
|
||||
{
|
||||
const uint32_t *p = PRIV(ucd_caseless_sets) + tcode[2];
|
||||
while ((c = *p++) < NOTACHAR)
|
||||
{
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (utf)
|
||||
{
|
||||
PCRE2_UCHAR buff[6];
|
||||
(void)PRIV(ord2utf)(c, buff);
|
||||
c = buff[0];
|
||||
}
|
||||
#endif
|
||||
if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
|
||||
}
|
||||
}
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* We can ignore word boundary tests. */
|
||||
|
||||
case OP_WORD_BOUNDARY:
|
||||
case OP_NOT_WORD_BOUNDARY:
|
||||
case OP_UCP_WORD_BOUNDARY:
|
||||
case OP_NOT_UCP_WORD_BOUNDARY:
|
||||
tcode++;
|
||||
break;
|
||||
|
||||
/* For a positive lookahead assertion, inspect what immediately follows,
|
||||
ignoring intermediate assertions and callouts. If the next item is one
|
||||
that sets a mandatory character, skip this assertion. Otherwise, treat it
|
||||
the same as other bracket groups. */
|
||||
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NA:
|
||||
ncode = tcode + GET(tcode, 1);
|
||||
while (*ncode == OP_ALT) ncode += GET(ncode, 1);
|
||||
ncode += 1 + LINK_SIZE;
|
||||
|
||||
/* Skip irrelevant items */
|
||||
|
||||
for (BOOL done = FALSE; !done;)
|
||||
{
|
||||
switch (*ncode)
|
||||
{
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ASSERT_NA:
|
||||
case OP_ASSERTBACK_NA:
|
||||
case OP_ASSERT_SCS:
|
||||
ncode += GET(ncode, 1);
|
||||
while (*ncode == OP_ALT) ncode += GET(ncode, 1);
|
||||
ncode += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
case OP_WORD_BOUNDARY:
|
||||
case OP_NOT_WORD_BOUNDARY:
|
||||
case OP_UCP_WORD_BOUNDARY:
|
||||
case OP_NOT_UCP_WORD_BOUNDARY:
|
||||
ncode++;
|
||||
break;
|
||||
|
||||
case OP_CALLOUT:
|
||||
ncode += PRIV(OP_lengths)[OP_CALLOUT];
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
ncode += GET(ncode, 1 + 2*LINK_SIZE);
|
||||
break;
|
||||
|
||||
default:
|
||||
done = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now check the next significant item. */
|
||||
|
||||
switch(*ncode)
|
||||
{
|
||||
default:
|
||||
break;
|
||||
|
||||
case OP_PROP:
|
||||
if (ncode[1] != PT_CLIST) break;
|
||||
/* Fall through */
|
||||
case OP_ANYNL:
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
case OP_EXACT:
|
||||
case OP_EXACTI:
|
||||
case OP_HSPACE:
|
||||
case OP_MINPLUS:
|
||||
case OP_MINPLUSI:
|
||||
case OP_PLUS:
|
||||
case OP_PLUSI:
|
||||
case OP_POSPLUS:
|
||||
case OP_POSPLUSI:
|
||||
case OP_VSPACE:
|
||||
/* Note that these types will only be present in non-UCP mode. */
|
||||
case OP_DIGIT:
|
||||
case OP_NOT_DIGIT:
|
||||
case OP_WORDCHAR:
|
||||
case OP_NOT_WORDCHAR:
|
||||
case OP_WHITESPACE:
|
||||
case OP_NOT_WHITESPACE:
|
||||
tcode = ncode;
|
||||
continue; /* With the following significant opcode */
|
||||
}
|
||||
/* Fall through */
|
||||
|
||||
/* For a group bracket or a positive assertion without an immediately
|
||||
following mandatory setting, recurse to set bits from within the
|
||||
subpattern. If it can't find anything, we have to give up. If it finds
|
||||
some mandatory character(s), we are done for this branch. Otherwise,
|
||||
carry on scanning after the subpattern. */
|
||||
|
||||
case OP_BRA:
|
||||
case OP_SBRA:
|
||||
case OP_CBRA:
|
||||
case OP_SCBRA:
|
||||
case OP_BRAPOS:
|
||||
case OP_SBRAPOS:
|
||||
case OP_CBRAPOS:
|
||||
case OP_SCBRAPOS:
|
||||
case OP_ONCE:
|
||||
case OP_SCRIPT_RUN:
|
||||
rc = set_start_bits(re, tcode, utf, ucp, depthptr);
|
||||
if (rc == SSB_DONE)
|
||||
{
|
||||
try_next = FALSE;
|
||||
}
|
||||
else if (rc == SSB_CONTINUE)
|
||||
{
|
||||
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
}
|
||||
else return rc; /* FAIL, UNKNOWN, or TOODEEP */
|
||||
break;
|
||||
|
||||
/* If we hit ALT or KET, it means we haven't found anything mandatory in
|
||||
this branch, though we might have found something optional. For ALT, we
|
||||
continue with the next alternative, but we have to arrange that the final
|
||||
result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET,
|
||||
return SSB_CONTINUE: if this is the top level, that indicates failure,
|
||||
but after a nested subpattern, it causes scanning to continue. */
|
||||
|
||||
case OP_ALT:
|
||||
yield = SSB_CONTINUE;
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_KET:
|
||||
case OP_KETRMAX:
|
||||
case OP_KETRMIN:
|
||||
case OP_KETRPOS:
|
||||
return SSB_CONTINUE;
|
||||
|
||||
/* Skip over callout */
|
||||
|
||||
case OP_CALLOUT:
|
||||
tcode += PRIV(OP_lengths)[OP_CALLOUT];
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
tcode += GET(tcode, 1 + 2*LINK_SIZE);
|
||||
break;
|
||||
|
||||
/* Skip over lookbehind, negative lookahead, and scan substring
|
||||
assertions */
|
||||
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ASSERTBACK_NA:
|
||||
case OP_ASSERT_SCS:
|
||||
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* BRAZERO does the bracket, but carries on. */
|
||||
|
||||
case OP_BRAZERO:
|
||||
case OP_BRAMINZERO:
|
||||
case OP_BRAPOSZERO:
|
||||
rc = set_start_bits(re, ++tcode, utf, ucp, depthptr);
|
||||
if (rc == SSB_FAIL || rc == SSB_UNKNOWN || rc == SSB_TOODEEP) return rc;
|
||||
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* SKIPZERO skips the bracket. */
|
||||
|
||||
case OP_SKIPZERO:
|
||||
tcode++;
|
||||
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Single-char * or ? sets the bit and tries the next item */
|
||||
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_POSSTAR:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_POSQUERY:
|
||||
tcode = set_table_bit(re, tcode + 1, FALSE, utf, ucp);
|
||||
break;
|
||||
|
||||
case OP_STARI:
|
||||
case OP_MINSTARI:
|
||||
case OP_POSSTARI:
|
||||
case OP_QUERYI:
|
||||
case OP_MINQUERYI:
|
||||
case OP_POSQUERYI:
|
||||
tcode = set_table_bit(re, tcode + 1, TRUE, utf, ucp);
|
||||
break;
|
||||
|
||||
/* Single-char upto sets the bit and tries the next */
|
||||
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_POSUPTO:
|
||||
tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, FALSE, utf, ucp);
|
||||
break;
|
||||
|
||||
case OP_UPTOI:
|
||||
case OP_MINUPTOI:
|
||||
case OP_POSUPTOI:
|
||||
tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, TRUE, utf, ucp);
|
||||
break;
|
||||
|
||||
/* At least one single char sets the bit and stops */
|
||||
|
||||
case OP_EXACT:
|
||||
tcode += IMM2_SIZE;
|
||||
/* Fall through */
|
||||
case OP_CHAR:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_POSPLUS:
|
||||
(void)set_table_bit(re, tcode + 1, FALSE, utf, ucp);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_EXACTI:
|
||||
tcode += IMM2_SIZE;
|
||||
/* Fall through */
|
||||
case OP_CHARI:
|
||||
case OP_PLUSI:
|
||||
case OP_MINPLUSI:
|
||||
case OP_POSPLUSI:
|
||||
(void)set_table_bit(re, tcode + 1, TRUE, utf, ucp);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* Special spacing and line-terminating items. These recognize specific
|
||||
lists of characters. The difference between VSPACE and ANYNL is that the
|
||||
latter can match the two-character CRLF sequence, but that is not
|
||||
relevant for finding the first character, so their code here is
|
||||
identical. */
|
||||
|
||||
case OP_HSPACE:
|
||||
SET_BIT(CHAR_HT);
|
||||
SET_BIT(CHAR_SPACE);
|
||||
|
||||
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
|
||||
the bits for 0xA0 and for code units >= 255, independently of UTF. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
SET_BIT(0xA0);
|
||||
SET_BIT(0xFF);
|
||||
#else
|
||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||
units of horizontal space characters. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
SET_BIT(0xC2); /* For U+00A0 */
|
||||
SET_BIT(0xE1); /* For U+1680, U+180E */
|
||||
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */
|
||||
SET_BIT(0xE3); /* For U+3000 */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
/* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless
|
||||
the code is EBCDIC. */
|
||||
{
|
||||
#ifndef EBCDIC
|
||||
SET_BIT(0xA0);
|
||||
#endif /* Not EBCDIC */
|
||||
}
|
||||
#endif /* 8-bit support */
|
||||
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_ANYNL:
|
||||
case OP_VSPACE:
|
||||
SET_BIT(CHAR_LF);
|
||||
SET_BIT(CHAR_VT);
|
||||
SET_BIT(CHAR_FF);
|
||||
SET_BIT(CHAR_CR);
|
||||
|
||||
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
|
||||
the bits for NEL and for code units >= 255, independently of UTF. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
SET_BIT(CHAR_NEL);
|
||||
SET_BIT(0xFF);
|
||||
#else
|
||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||
units of vertical space characters. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
SET_BIT(0xC2); /* For U+0085 (NEL) */
|
||||
SET_BIT(0xE2); /* For U+2028, U+2029 */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
/* For the 8-bit library not in UTF-8 mode, set the bit for NEL. */
|
||||
{
|
||||
SET_BIT(CHAR_NEL);
|
||||
}
|
||||
#endif /* 8-bit support */
|
||||
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* Single character types set the bits and stop. Note that if PCRE2_UCP
|
||||
is set, we do not see these opcodes because \d etc are converted to
|
||||
properties. Therefore, these apply in the case when only characters less
|
||||
than 256 are recognized to match the types. */
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
set_nottype_bits(re, cbit_digit, table_limit);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_DIGIT:
|
||||
set_type_bits(re, cbit_digit, table_limit);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
set_nottype_bits(re, cbit_space, table_limit);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_WHITESPACE:
|
||||
set_type_bits(re, cbit_space, table_limit);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
set_nottype_bits(re, cbit_word, table_limit);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_WORDCHAR:
|
||||
set_type_bits(re, cbit_word, table_limit);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* One or more character type fudges the pointer and restarts, knowing
|
||||
it will hit a single character type and stop there. */
|
||||
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEPOSPLUS:
|
||||
tcode++;
|
||||
break;
|
||||
|
||||
case OP_TYPEEXACT:
|
||||
tcode += 1 + IMM2_SIZE;
|
||||
break;
|
||||
|
||||
/* Zero or more repeats of character types set the bits and then
|
||||
try again. */
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEPOSUPTO:
|
||||
tcode += IMM2_SIZE; /* Fall through */
|
||||
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSQUERY:
|
||||
switch(tcode[1])
|
||||
{
|
||||
default:
|
||||
case OP_ANY:
|
||||
case OP_ALLANY:
|
||||
return SSB_FAIL;
|
||||
|
||||
case OP_HSPACE:
|
||||
SET_BIT(CHAR_HT);
|
||||
SET_BIT(CHAR_SPACE);
|
||||
|
||||
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
|
||||
the bits for 0xA0 and for code units >= 255, independently of UTF. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
SET_BIT(0xA0);
|
||||
SET_BIT(0xFF);
|
||||
#else
|
||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||
units of horizontal space characters. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
SET_BIT(0xC2); /* For U+00A0 */
|
||||
SET_BIT(0xE1); /* For U+1680, U+180E */
|
||||
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */
|
||||
SET_BIT(0xE3); /* For U+3000 */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
/* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless
|
||||
the code is EBCDIC. */
|
||||
{
|
||||
#ifndef EBCDIC
|
||||
SET_BIT(0xA0);
|
||||
#endif /* Not EBCDIC */
|
||||
}
|
||||
#endif /* 8-bit support */
|
||||
break;
|
||||
|
||||
case OP_ANYNL:
|
||||
case OP_VSPACE:
|
||||
SET_BIT(CHAR_LF);
|
||||
SET_BIT(CHAR_VT);
|
||||
SET_BIT(CHAR_FF);
|
||||
SET_BIT(CHAR_CR);
|
||||
|
||||
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
|
||||
the bits for NEL and for code units >= 255, independently of UTF. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
SET_BIT(CHAR_NEL);
|
||||
SET_BIT(0xFF);
|
||||
#else
|
||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||
units of vertical space characters. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
SET_BIT(0xC2); /* For U+0085 (NEL) */
|
||||
SET_BIT(0xE2); /* For U+2028, U+2029 */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
/* For the 8-bit library not in UTF-8 mode, set the bit for NEL. */
|
||||
{
|
||||
SET_BIT(CHAR_NEL);
|
||||
}
|
||||
#endif /* 8-bit support */
|
||||
break;
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
set_nottype_bits(re, cbit_digit, table_limit);
|
||||
break;
|
||||
|
||||
case OP_DIGIT:
|
||||
set_type_bits(re, cbit_digit, table_limit);
|
||||
break;
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
set_nottype_bits(re, cbit_space, table_limit);
|
||||
break;
|
||||
|
||||
case OP_WHITESPACE:
|
||||
set_type_bits(re, cbit_space, table_limit);
|
||||
break;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
set_nottype_bits(re, cbit_word, table_limit);
|
||||
break;
|
||||
|
||||
case OP_WORDCHAR:
|
||||
set_type_bits(re, cbit_word, table_limit);
|
||||
break;
|
||||
}
|
||||
|
||||
tcode += 2;
|
||||
break;
|
||||
|
||||
/* Set-based ECLASS: treat it the same as a "complex" XCLASS; give up. */
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_ECLASS:
|
||||
return SSB_FAIL;
|
||||
#endif
|
||||
|
||||
/* Extended class: if there are any property checks, or if this is a
|
||||
negative XCLASS without a map, give up. If there are no property checks,
|
||||
there must be wide characters on the XCLASS list, because otherwise an
|
||||
XCLASS would not have been created. This means that code points >= 255
|
||||
are potential starters. In the UTF-8 case we can scan them and set bits
|
||||
for the relevant leading bytes. */
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_XCLASS:
|
||||
xclassflags = tcode[1 + LINK_SIZE];
|
||||
if ((xclassflags & XCL_HASPROP) != 0 ||
|
||||
(xclassflags & (XCL_MAP|XCL_NOT)) == XCL_NOT)
|
||||
return SSB_FAIL;
|
||||
|
||||
/* We have a positive XCLASS or a negative one without a map. Set up the
|
||||
map pointer if there is one, and fall through. */
|
||||
|
||||
classmap = ((xclassflags & XCL_MAP) == 0)? NULL :
|
||||
(const uint8_t *)(tcode + 1 + LINK_SIZE + 1);
|
||||
|
||||
/* In UTF-8 mode, scan the character list and set bits for leading bytes,
|
||||
then jump to handle the map. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (utf && (xclassflags & XCL_NOT) == 0)
|
||||
{
|
||||
PCRE2_UCHAR b, e;
|
||||
PCRE2_SPTR p = tcode + 1 + LINK_SIZE + 1 + ((classmap == NULL)? 0:32);
|
||||
tcode += GET(tcode, 1);
|
||||
|
||||
if (*p >= XCL_LIST)
|
||||
{
|
||||
study_char_list(p, re->start_bitmap,
|
||||
((const uint8_t *)re + re->code_start));
|
||||
goto HANDLE_CLASSMAP;
|
||||
}
|
||||
|
||||
for (;;) switch (*p++)
|
||||
{
|
||||
case XCL_SINGLE:
|
||||
b = *p++;
|
||||
while ((*p & 0xc0) == 0x80) p++;
|
||||
re->start_bitmap[b/8] |= (1u << (b&7));
|
||||
break;
|
||||
|
||||
case XCL_RANGE:
|
||||
b = *p++;
|
||||
while ((*p & 0xc0) == 0x80) p++;
|
||||
e = *p++;
|
||||
while ((*p & 0xc0) == 0x80) p++;
|
||||
for (; b <= e; b++)
|
||||
re->start_bitmap[b/8] |= (1u << (b&7));
|
||||
break;
|
||||
|
||||
case XCL_END:
|
||||
goto HANDLE_CLASSMAP;
|
||||
|
||||
default:
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return SSB_UNKNOWN; /* Internal error, should not occur */
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
|
||||
/* It seems that the fall through comment must be outside the #ifdef if
|
||||
it is to avoid the gcc compiler warning. */
|
||||
|
||||
/* Fall through */
|
||||
|
||||
/* Enter here for a negative non-XCLASS. In the 8-bit library, if we are
|
||||
in UTF mode, any byte with a value >= 0xc4 is a potentially valid starter
|
||||
because it starts a character with a value > 255. In 8-bit non-UTF mode,
|
||||
there is no difference between CLASS and NCLASS. In all other wide
|
||||
character modes, set the 0xFF bit to indicate code units >= 255. */
|
||||
|
||||
case OP_NCLASS:
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (utf)
|
||||
{
|
||||
re->start_bitmap[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
|
||||
memset(re->start_bitmap+25, 0xff, 7); /* Bits for 0xc9 - 0xff */
|
||||
}
|
||||
#elif PCRE2_CODE_UNIT_WIDTH != 8
|
||||
SET_BIT(0xFF); /* For characters >= 255 */
|
||||
#endif
|
||||
/* Fall through */
|
||||
|
||||
/* Enter here for a positive non-XCLASS. If we have fallen through from
|
||||
an XCLASS, classmap will already be set; just advance the code pointer.
|
||||
Otherwise, set up classmap for a a non-XCLASS and advance past it. */
|
||||
|
||||
case OP_CLASS:
|
||||
if (*tcode == OP_XCLASS) tcode += GET(tcode, 1); else
|
||||
{
|
||||
classmap = (const uint8_t *)(++tcode);
|
||||
tcode += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
|
||||
/* When wide characters are supported, classmap may be NULL. In UTF-8
|
||||
(sic) mode, the bits in a class bit map correspond to character values,
|
||||
not to byte values. However, the bit map we are constructing is for byte
|
||||
values. So we have to do a conversion for characters whose code point is
|
||||
greater than 127. In fact, there are only two possible starting bytes for
|
||||
characters in the range 128 - 255. */
|
||||
|
||||
#if defined SUPPORT_WIDE_CHARS && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
HANDLE_CLASSMAP:
|
||||
#endif
|
||||
if (classmap != NULL)
|
||||
{
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (utf)
|
||||
{
|
||||
for (c = 0; c < 16; c++) re->start_bitmap[c] |= classmap[c];
|
||||
for (c = 128; c < 256; c++)
|
||||
{
|
||||
if ((classmap[c/8] & (1u << (c&7))) != 0)
|
||||
{
|
||||
int d = (c >> 6) | 0xc0; /* Set bit for this starter */
|
||||
re->start_bitmap[d/8] |= (1u << (d&7)); /* and then skip on to the */
|
||||
c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
/* In all modes except UTF-8, the two bit maps are compatible. */
|
||||
|
||||
{
|
||||
for (c = 0; c < 32; c++) re->start_bitmap[c] |= classmap[c];
|
||||
}
|
||||
}
|
||||
|
||||
/* Act on what follows the class. For a zero minimum repeat, continue;
|
||||
otherwise stop processing. */
|
||||
|
||||
switch (*tcode)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
case OP_CRPOSSTAR:
|
||||
case OP_CRPOSQUERY:
|
||||
tcode++;
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
case OP_CRPOSRANGE:
|
||||
if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE;
|
||||
else try_next = FALSE;
|
||||
break;
|
||||
|
||||
default:
|
||||
try_next = FALSE;
|
||||
break;
|
||||
}
|
||||
break; /* End of class handling case */
|
||||
} /* End of switch for opcodes */
|
||||
} /* End of try_next loop */
|
||||
|
||||
code += GET(code, 1); /* Advance to next branch */
|
||||
}
|
||||
while (*code == OP_ALT);
|
||||
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Study a compiled expression *
|
||||
*************************************************/
|
||||
|
||||
/* This function is handed a compiled expression that it must study to produce
|
||||
information that will speed up the matching.
|
||||
|
||||
Argument:
|
||||
re points to the compiled expression
|
||||
|
||||
Returns: 0 normally; non-zero should never normally occur
|
||||
1 unknown opcode in set_start_bits
|
||||
2 missing capturing bracket
|
||||
3 unknown opcode in find_minlength
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(study)(pcre2_real_code *re)
|
||||
{
|
||||
int count = 0;
|
||||
PCRE2_UCHAR *code;
|
||||
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
BOOL ucp = (re->overall_options & PCRE2_UCP) != 0;
|
||||
|
||||
/* Find start of compiled code */
|
||||
|
||||
code = (PCRE2_UCHAR *)((uint8_t *)re + re->code_start);
|
||||
|
||||
/* For a pattern that has a first code unit, or a multiline pattern that
|
||||
matches only at "line start", there is no point in seeking a list of starting
|
||||
code units. */
|
||||
|
||||
if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
|
||||
{
|
||||
int depth = 0;
|
||||
int rc = set_start_bits(re, code, utf, ucp, &depth);
|
||||
if (rc == SSB_UNKNOWN)
|
||||
{
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* If a list of starting code units was set up, scan the list to see if only
|
||||
one or two were listed. Having only one listed is rare because usually a
|
||||
single starting code unit will have been recognized and PCRE2_FIRSTSET set.
|
||||
If two are listed, see if they are caseless versions of the same character;
|
||||
if so we can replace the list with a caseless first code unit. This gives
|
||||
better performance and is plausibly worth doing for patterns such as [Ww]ord
|
||||
or (word|WORD). */
|
||||
|
||||
if (rc == SSB_DONE)
|
||||
{
|
||||
int i;
|
||||
int a = -1;
|
||||
int b = -1;
|
||||
uint8_t *p = re->start_bitmap;
|
||||
uint32_t flags = PCRE2_FIRSTMAPSET;
|
||||
|
||||
for (i = 0; i < 256; p++, i += 8)
|
||||
{
|
||||
uint8_t x = *p;
|
||||
if (x != 0)
|
||||
{
|
||||
int c;
|
||||
uint8_t y = x & (~x + 1); /* Least significant bit */
|
||||
if (y != x) goto DONE; /* More than one bit set */
|
||||
|
||||
/* In the 16-bit and 32-bit libraries, the bit for 0xff means "0xff and
|
||||
all wide characters", so we cannot use it here. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (i == 248 && x == 0x80) goto DONE;
|
||||
#endif
|
||||
|
||||
/* Compute the character value */
|
||||
|
||||
c = i;
|
||||
switch (x)
|
||||
{
|
||||
case 1: break;
|
||||
case 2: c += 1; break; case 4: c += 2; break;
|
||||
case 8: c += 3; break; case 16: c += 4; break;
|
||||
case 32: c += 5; break; case 64: c += 6; break;
|
||||
case 128: c += 7; break;
|
||||
}
|
||||
|
||||
/* c contains the code unit value, in the range 0-255. In 8-bit UTF
|
||||
mode, only values < 128 can be used. In all the other cases, c is a
|
||||
character value. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (utf && c > 127) goto DONE;
|
||||
#endif
|
||||
if (a < 0) a = c; /* First one found, save in a */
|
||||
else if (b < 0) /* Second one found */
|
||||
{
|
||||
int d = TABLE_GET((unsigned int)c, re->tables + fcc_offset, c);
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf || ucp)
|
||||
{
|
||||
if (UCD_CASESET(c) != 0) goto DONE; /* Multiple case set */
|
||||
if (c > 127) d = UCD_OTHERCASE(c);
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
if (d != a) goto DONE; /* Not the other case of a */
|
||||
b = c; /* Save second in b */
|
||||
}
|
||||
else goto DONE; /* More than two characters found */
|
||||
}
|
||||
}
|
||||
|
||||
/* Replace the start code unit bits with a first code unit. If it is the
|
||||
same as a required later code unit, then clear the required later code
|
||||
unit. This is because a search for a required code unit starts after an
|
||||
explicit first code unit, but at a code unit found from the bitmap.
|
||||
Patterns such as /a*a/ don't work if both the start unit and required
|
||||
unit are the same. */
|
||||
|
||||
if (a >= 0) {
|
||||
if ((re->flags & PCRE2_LASTSET) && (re->last_codeunit == (uint32_t)a || (b >= 0 && re->last_codeunit == (uint32_t)b))) {
|
||||
re->flags &= ~(PCRE2_LASTSET | PCRE2_LASTCASELESS);
|
||||
re->last_codeunit = 0;
|
||||
}
|
||||
re->first_codeunit = a;
|
||||
flags = PCRE2_FIRSTSET;
|
||||
if (b >= 0) flags |= PCRE2_FIRSTCASELESS;
|
||||
}
|
||||
|
||||
DONE:
|
||||
re->flags |= flags;
|
||||
}
|
||||
}
|
||||
|
||||
/* Find the minimum length of subject string. If the pattern can match an empty
|
||||
string, the minimum length is already known. If the pattern contains (*ACCEPT)
|
||||
all bets are off, and we don't even try to find a minimum length. If there are
|
||||
more back references than the size of the vector we are going to cache them in,
|
||||
do nothing. A pattern that complicated will probably take a long time to
|
||||
analyze and may in any case turn out to be too complicated. Note that back
|
||||
reference minima are held as 16-bit numbers. */
|
||||
|
||||
if ((re->flags & (PCRE2_MATCH_EMPTY|PCRE2_HASACCEPT)) == 0 &&
|
||||
re->top_backref <= MAX_CACHE_BACKREF)
|
||||
{
|
||||
int min;
|
||||
int backref_cache[MAX_CACHE_BACKREF+1];
|
||||
backref_cache[0] = 0; /* Highest one that is set */
|
||||
min = find_minlength(re, code, code, utf, NULL, &count, backref_cache);
|
||||
switch(min)
|
||||
{
|
||||
case -1: /* \C in UTF mode or over-complex regex */
|
||||
break; /* Leave minlength unchanged (will be zero) */
|
||||
|
||||
case -2:
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return 2; /* missing capturing bracket */
|
||||
|
||||
case -3:
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return 3; /* unrecognized opcode */
|
||||
|
||||
default:
|
||||
re->minlength = (min > UINT16_MAX)? UINT16_MAX : min;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre2_study.c */
|
||||
1707
3rd/pcre2/src/pcre2_substitute.c
Normal file
1707
3rd/pcre2/src/pcre2_substitute.c
Normal file
@@ -0,0 +1,1707 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#define PTR_STACK_SIZE 20
|
||||
|
||||
#define SUBSTITUTE_OPTIONS \
|
||||
(PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
|
||||
PCRE2_SUBSTITUTE_LITERAL|PCRE2_SUBSTITUTE_MATCHED| \
|
||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_REPLACEMENT_ONLY| \
|
||||
PCRE2_SUBSTITUTE_UNKNOWN_UNSET|PCRE2_SUBSTITUTE_UNSET_EMPTY)
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find end of substitute text *
|
||||
*************************************************/
|
||||
|
||||
/* In extended mode, we recognize ${name:+set text:unset text} and similar
|
||||
constructions. This requires the identification of unescaped : and }
|
||||
characters. This function scans for such. It must deal with nested ${
|
||||
constructions. The pointer to the text is updated, either to the required end
|
||||
character, or to where an error was detected.
|
||||
|
||||
Arguments:
|
||||
code points to the compiled expression (for options)
|
||||
ptrptr points to the pointer to the start of the text (updated)
|
||||
ptrend end of the whole string
|
||||
last TRUE if the last expected string (only } recognized)
|
||||
|
||||
Returns: 0 on success
|
||||
negative error code on failure
|
||||
*/
|
||||
|
||||
static int
|
||||
find_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend,
|
||||
BOOL last)
|
||||
{
|
||||
int rc = 0;
|
||||
uint32_t nestlevel = 0;
|
||||
BOOL literal = FALSE;
|
||||
PCRE2_SPTR ptr = *ptrptr;
|
||||
|
||||
for (; ptr < ptrend; ptr++)
|
||||
{
|
||||
if (literal)
|
||||
{
|
||||
if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E)
|
||||
{
|
||||
literal = FALSE;
|
||||
ptr += 1;
|
||||
}
|
||||
}
|
||||
|
||||
else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
|
||||
{
|
||||
if (nestlevel == 0) goto EXIT;
|
||||
nestlevel--;
|
||||
}
|
||||
|
||||
else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT;
|
||||
|
||||
else if (*ptr == CHAR_DOLLAR_SIGN)
|
||||
{
|
||||
if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
|
||||
{
|
||||
nestlevel++;
|
||||
ptr += 1;
|
||||
}
|
||||
}
|
||||
|
||||
else if (*ptr == CHAR_BACKSLASH)
|
||||
{
|
||||
int erc;
|
||||
int errorcode;
|
||||
uint32_t ch;
|
||||
|
||||
if (ptr < ptrend - 1) switch (ptr[1])
|
||||
{
|
||||
case CHAR_L:
|
||||
case CHAR_l:
|
||||
case CHAR_U:
|
||||
case CHAR_u:
|
||||
ptr += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
ptr += 1; /* Must point after \ */
|
||||
erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
|
||||
code->overall_options, code->extra_options, code->top_bracket, FALSE, NULL);
|
||||
ptr -= 1; /* Back to last code unit of escape */
|
||||
if (errorcode != 0)
|
||||
{
|
||||
/* errorcode from check_escape is positive, so must not be returned by
|
||||
pcre2_substitute(). */
|
||||
rc = PCRE2_ERROR_BADREPESCAPE;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
switch(erc)
|
||||
{
|
||||
case 0: /* Data character */
|
||||
case ESC_b: /* Data character */
|
||||
case ESC_v: /* Data character */
|
||||
case ESC_E: /* Isolated \E is ignored */
|
||||
break;
|
||||
|
||||
case ESC_Q:
|
||||
literal = TRUE;
|
||||
break;
|
||||
|
||||
case ESC_g:
|
||||
/* The \g<name> form (\g<number> already handled by check_escape)
|
||||
|
||||
Don't worry about finding the matching ">". We are super, super lenient
|
||||
about validating ${} replacements inside find_text_end(), so we certainly
|
||||
don't need to worry about other syntax. Importantly, a \g<..> or $<...>
|
||||
sequence can't contain a '}' character. */
|
||||
break;
|
||||
|
||||
default:
|
||||
if (erc < 0)
|
||||
break; /* capture group reference */
|
||||
rc = PCRE2_ERROR_BADREPESCAPE;
|
||||
goto EXIT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rc = PCRE2_ERROR_REPMISSINGBRACE; /* Terminator not found */
|
||||
|
||||
EXIT:
|
||||
*ptrptr = ptr;
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Validate group name *
|
||||
*************************************************/
|
||||
|
||||
/* This function scans for a capture group name, validating it
|
||||
consists of legal characters, is not empty, and does not exceed
|
||||
MAX_NAME_SIZE.
|
||||
|
||||
Arguments:
|
||||
ptrptr points to the pointer to the start of the text (updated)
|
||||
ptrend end of the whole string
|
||||
utf true if the input is UTF-encoded
|
||||
ctypes pointer to the character types table
|
||||
|
||||
Returns: TRUE if a name was read
|
||||
FALSE otherwise
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
read_name_subst(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, BOOL utf,
|
||||
const uint8_t* ctypes)
|
||||
{
|
||||
PCRE2_SPTR ptr = *ptrptr;
|
||||
PCRE2_SPTR nameptr = ptr;
|
||||
|
||||
if (ptr >= ptrend) /* No characters in name */
|
||||
goto FAILED;
|
||||
|
||||
/* We do not need to check whether the name starts with a non-digit.
|
||||
We are simply referencing names here, not defining them. */
|
||||
|
||||
/* See read_name in the pcre2_compile.c for the corresponding logic
|
||||
restricting group names inside the pattern itself. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
uint32_t c, type;
|
||||
|
||||
while (ptr < ptrend)
|
||||
{
|
||||
GETCHAR(c, ptr);
|
||||
type = UCD_CHARTYPE(c);
|
||||
if (type != ucp_Nd && PRIV(ucp_gentype)[type] != ucp_L &&
|
||||
c != CHAR_UNDERSCORE) break;
|
||||
ptr++;
|
||||
FORWARDCHARTEST(ptr, ptrend);
|
||||
}
|
||||
}
|
||||
else
|
||||
#else
|
||||
(void)utf; /* Avoid compiler warning */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Handle group names in non-UTF modes. */
|
||||
|
||||
{
|
||||
while (ptr < ptrend && MAX_255(*ptr) && (ctypes[*ptr] & ctype_word) != 0)
|
||||
{
|
||||
ptr++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check name length */
|
||||
|
||||
if (ptr - nameptr > MAX_NAME_SIZE)
|
||||
goto FAILED;
|
||||
|
||||
/* Subpattern names must not be empty */
|
||||
if (ptr == nameptr)
|
||||
goto FAILED;
|
||||
|
||||
*ptrptr = ptr;
|
||||
return TRUE;
|
||||
|
||||
FAILED:
|
||||
*ptrptr = ptr;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Case transformations *
|
||||
*************************************************/
|
||||
|
||||
#define PCRE2_SUBSTITUTE_CASE_NONE 0
|
||||
// 1, 2, 3 are PCRE2_SUBSTITUTE_CASE_LOWER, UPPER, TITLE_FIRST.
|
||||
#define PCRE2_SUBSTITUTE_CASE_REVERSE_TITLE_FIRST 4
|
||||
|
||||
typedef struct {
|
||||
int to_case; /* One of PCRE2_SUBSTITUTE_CASE_xyz */
|
||||
BOOL single_char;
|
||||
} case_state;
|
||||
|
||||
/* Helper to guess how much a string is likely to increase in size when
|
||||
case-transformed. Usually, strings don't change size at all, but some rare
|
||||
characters do grow. Estimate +10%, plus another few characters.
|
||||
|
||||
Performing this estimation is unfortunate, but inevitable, since we can't call
|
||||
the callout if we ran out of buffer space to prepare its input.
|
||||
|
||||
Because this estimate is inexact (and in pathological cases, underestimates the
|
||||
required buffer size) we must document that when you have a
|
||||
substitute_case_callout, and you are using PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, you
|
||||
may need more than two calls to determine the final buffer size. */
|
||||
|
||||
static PCRE2_SIZE
|
||||
pessimistic_case_inflation(PCRE2_SIZE len)
|
||||
{
|
||||
return (len >> 3u) + 10;
|
||||
}
|
||||
|
||||
/* Case transformation behaviour if no callout is passed. */
|
||||
|
||||
static PCRE2_SIZE
|
||||
default_substitute_case_callout(
|
||||
PCRE2_SPTR input, PCRE2_SIZE input_len,
|
||||
PCRE2_UCHAR *output, PCRE2_SIZE output_cap,
|
||||
case_state *state, const pcre2_code *code)
|
||||
{
|
||||
PCRE2_SPTR input_end = input + input_len;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf;
|
||||
BOOL ucp;
|
||||
#endif
|
||||
PCRE2_UCHAR temp[6];
|
||||
BOOL next_to_upper;
|
||||
BOOL rest_to_upper;
|
||||
BOOL single_char;
|
||||
BOOL overflow = FALSE;
|
||||
PCRE2_SIZE written = 0;
|
||||
|
||||
/* Helpful simplifying invariant: input and output are disjoint buffers.
|
||||
I believe that this code is technically undefined behaviour, because the two
|
||||
pointers input/output are "unrelated" pointers and hence not comparable. Casting
|
||||
via char* bypasses some but not all of those technical rules. It is not included
|
||||
in release builds, in any case. */
|
||||
PCRE2_ASSERT((char *)(input + input_len) <= (char *)output ||
|
||||
(char *)(output + output_cap) <= (char *)input);
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
utf = (code->overall_options & PCRE2_UTF) != 0;
|
||||
ucp = (code->overall_options & PCRE2_UCP) != 0;
|
||||
#endif
|
||||
|
||||
if (input_len == 0) return 0;
|
||||
|
||||
switch (state->to_case)
|
||||
{
|
||||
default:
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return 0;
|
||||
|
||||
case PCRE2_SUBSTITUTE_CASE_LOWER: // Can be single_char TRUE or FALSE
|
||||
case PCRE2_SUBSTITUTE_CASE_UPPER: // Can only be single_char FALSE
|
||||
next_to_upper = rest_to_upper = (state->to_case == PCRE2_SUBSTITUTE_CASE_UPPER);
|
||||
break;
|
||||
|
||||
case PCRE2_SUBSTITUTE_CASE_TITLE_FIRST: // Can be single_char TRUE or FALSE
|
||||
next_to_upper = TRUE;
|
||||
rest_to_upper = FALSE;
|
||||
state->to_case = PCRE2_SUBSTITUTE_CASE_LOWER;
|
||||
break;
|
||||
|
||||
case PCRE2_SUBSTITUTE_CASE_REVERSE_TITLE_FIRST: // Can only be single_char FALSE
|
||||
next_to_upper = FALSE;
|
||||
rest_to_upper = TRUE;
|
||||
state->to_case = PCRE2_SUBSTITUTE_CASE_UPPER;
|
||||
break;
|
||||
}
|
||||
|
||||
single_char = state->single_char;
|
||||
if (single_char)
|
||||
state->to_case = PCRE2_SUBSTITUTE_CASE_NONE;
|
||||
|
||||
while (input < input_end)
|
||||
{
|
||||
uint32_t ch;
|
||||
unsigned int chlen;
|
||||
|
||||
GETCHARINCTEST(ch, input);
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if ((utf || ucp) && ch >= 128)
|
||||
{
|
||||
uint32_t type = UCD_CHARTYPE(ch);
|
||||
if (PRIV(ucp_gentype)[type] == ucp_L &&
|
||||
type != (next_to_upper? ucp_Lu : ucp_Ll))
|
||||
ch = UCD_OTHERCASE(ch);
|
||||
|
||||
/* TODO This is far from correct... it doesn't support the SpecialCasing.txt
|
||||
mappings, but worse, it's not even correct for all the ordinary case
|
||||
mappings. We should add support for those (at least), and then add the
|
||||
SpecialCasing.txt mappings for Esszet and ligatures, and finally use the
|
||||
Turkish casing flag on the match context. */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
if (MAX_255(ch))
|
||||
{
|
||||
if (((code->tables + cbits_offset +
|
||||
(next_to_upper? cbit_upper:cbit_lower)
|
||||
)[ch/8] & (1u << (ch%8))) == 0)
|
||||
ch = (code->tables + fcc_offset)[ch];
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) chlen = PRIV(ord2utf)(ch, temp); else
|
||||
#endif
|
||||
{
|
||||
temp[0] = ch;
|
||||
chlen = 1;
|
||||
}
|
||||
|
||||
if (!overflow && chlen <= output_cap)
|
||||
{
|
||||
memcpy(output, temp, CU2BYTES(chlen));
|
||||
output += chlen;
|
||||
output_cap -= chlen;
|
||||
}
|
||||
else
|
||||
{
|
||||
overflow = TRUE;
|
||||
}
|
||||
|
||||
if (chlen > ~(PCRE2_SIZE)0 - written) /* Integer overflow */
|
||||
return ~(PCRE2_SIZE)0;
|
||||
written += chlen;
|
||||
|
||||
next_to_upper = rest_to_upper;
|
||||
|
||||
/* memcpy the remainder, if only transforming a single character. */
|
||||
|
||||
if (single_char)
|
||||
{
|
||||
PCRE2_SIZE rest_len = input_end - input;
|
||||
|
||||
if (!overflow && rest_len <= output_cap)
|
||||
memcpy(output, input, CU2BYTES(rest_len));
|
||||
|
||||
if (rest_len > ~(PCRE2_SIZE)0 - written) /* Integer overflow */
|
||||
return ~(PCRE2_SIZE)0;
|
||||
written += rest_len;
|
||||
|
||||
return written;
|
||||
}
|
||||
}
|
||||
|
||||
return written;
|
||||
}
|
||||
|
||||
/* Helper to perform the call to the substitute_case_callout. We wrap the
|
||||
user-provided callout because our internal arguments are slightly extended. We
|
||||
don't want the user callout to handle the case of "\l" (first character only to
|
||||
lowercase) or "\l\U" (first character to lowercase, rest to uppercase) because
|
||||
those are not operations defined by Unicode. Instead the user callout simply
|
||||
needs to provide the three Unicode primitives: lower, upper, titlecase. */
|
||||
|
||||
static PCRE2_SIZE
|
||||
do_case_copy(
|
||||
PCRE2_UCHAR *input_output, PCRE2_SIZE input_len, PCRE2_SIZE output_cap,
|
||||
case_state *state, BOOL utf,
|
||||
PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *,
|
||||
PCRE2_SIZE, int, void *),
|
||||
void *substitute_case_callout_data)
|
||||
{
|
||||
PCRE2_SPTR input = input_output;
|
||||
PCRE2_UCHAR *output = input_output;
|
||||
PCRE2_SIZE rc;
|
||||
PCRE2_SIZE rc2;
|
||||
int ch1_to_case;
|
||||
int rest_to_case;
|
||||
PCRE2_UCHAR ch1[6];
|
||||
PCRE2_SIZE ch1_len;
|
||||
PCRE2_SPTR rest;
|
||||
PCRE2_SIZE rest_len;
|
||||
BOOL ch1_overflow = FALSE;
|
||||
BOOL rest_overflow = FALSE;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32 || !defined(SUPPORT_UNICODE)
|
||||
(void)utf; /* Avoid compiler warning. */
|
||||
#endif
|
||||
|
||||
PCRE2_ASSERT(input_len != 0);
|
||||
|
||||
switch (state->to_case)
|
||||
{
|
||||
default:
|
||||
PCRE2_DEBUG_UNREACHABLE();
|
||||
return 0;
|
||||
|
||||
case PCRE2_SUBSTITUTE_CASE_LOWER: // Can be single_char TRUE or FALSE
|
||||
case PCRE2_SUBSTITUTE_CASE_UPPER: // Can only be single_char FALSE
|
||||
case PCRE2_SUBSTITUTE_CASE_TITLE_FIRST: // Can be single_char TRUE or FALSE
|
||||
|
||||
/* The easy case, where our internal casing operations align with those of
|
||||
the callout. */
|
||||
|
||||
if (state->single_char == FALSE)
|
||||
{
|
||||
rc = substitute_case_callout(input, input_len, output, output_cap,
|
||||
state->to_case, substitute_case_callout_data);
|
||||
|
||||
if (state->to_case == PCRE2_SUBSTITUTE_CASE_TITLE_FIRST)
|
||||
state->to_case = PCRE2_SUBSTITUTE_CASE_LOWER;
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
ch1_to_case = state->to_case;
|
||||
rest_to_case = PCRE2_SUBSTITUTE_CASE_NONE;
|
||||
break;
|
||||
|
||||
case PCRE2_SUBSTITUTE_CASE_REVERSE_TITLE_FIRST: // Can only be single_char FALSE
|
||||
ch1_to_case = PCRE2_SUBSTITUTE_CASE_LOWER;
|
||||
rest_to_case = PCRE2_SUBSTITUTE_CASE_UPPER;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Identify the leading character. Take copy, because its storage overlaps with
|
||||
`output`, and hence may be scrambled by the callout. */
|
||||
|
||||
{
|
||||
PCRE2_SPTR ch_end = input;
|
||||
uint32_t ch;
|
||||
|
||||
GETCHARINCTEST(ch, ch_end);
|
||||
(void) ch;
|
||||
PCRE2_ASSERT(ch_end <= input + input_len && ch_end - input <= 6);
|
||||
ch1_len = ch_end - input;
|
||||
memcpy(ch1, input, CU2BYTES(ch1_len));
|
||||
}
|
||||
|
||||
rest = input + ch1_len;
|
||||
rest_len = input_len - ch1_len;
|
||||
|
||||
/* Transform just ch1. The buffers are always in-place (input == output). With a
|
||||
custom callout, we need a loop to discover its required buffer size. The loop
|
||||
wouldn't be required if the callout were well-behaved, but it might be naughty
|
||||
and return "5" the first time, then "10" the next time we call it using the
|
||||
exact same input! */
|
||||
|
||||
{
|
||||
PCRE2_SIZE ch1_cap;
|
||||
PCRE2_SIZE max_ch1_cap;
|
||||
|
||||
ch1_cap = ch1_len; /* First attempt uses the space vacated by ch1. */
|
||||
PCRE2_ASSERT(output_cap >= input_len && input_len >= rest_len);
|
||||
max_ch1_cap = output_cap - rest_len;
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
rc = substitute_case_callout(ch1, ch1_len, output, ch1_cap, ch1_to_case,
|
||||
substitute_case_callout_data);
|
||||
if (rc == ~(PCRE2_SIZE)0) return rc;
|
||||
|
||||
if (rc <= ch1_cap) break;
|
||||
|
||||
if (rc > max_ch1_cap)
|
||||
{
|
||||
ch1_overflow = TRUE;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Move the rest to the right, to make room for expanding ch1. */
|
||||
|
||||
memmove(input_output + rc, rest, CU2BYTES(rest_len));
|
||||
rest = input + rc;
|
||||
|
||||
ch1_cap = rc;
|
||||
|
||||
/* Proof of loop termination: `ch1_cap` is growing on each iteration, but
|
||||
the loop ends if `rc` reaches the (unchanging) upper bound of output_cap. */
|
||||
}
|
||||
}
|
||||
|
||||
if (rest_to_case == PCRE2_SUBSTITUTE_CASE_NONE)
|
||||
{
|
||||
if (!ch1_overflow)
|
||||
{
|
||||
PCRE2_ASSERT(rest_len <= output_cap - rc);
|
||||
memmove(output + rc, rest, CU2BYTES(rest_len));
|
||||
}
|
||||
rc2 = rest_len;
|
||||
|
||||
state->to_case = PCRE2_SUBSTITUTE_CASE_NONE;
|
||||
}
|
||||
else
|
||||
{
|
||||
PCRE2_UCHAR dummy[1];
|
||||
|
||||
rc2 = substitute_case_callout(rest, rest_len,
|
||||
ch1_overflow? dummy : output + rc,
|
||||
ch1_overflow? 0u : output_cap - rc,
|
||||
rest_to_case, substitute_case_callout_data);
|
||||
if (rc2 == ~(PCRE2_SIZE)0) return rc2;
|
||||
|
||||
if (!ch1_overflow && rc2 > output_cap - rc) rest_overflow = TRUE;
|
||||
|
||||
/* If ch1 grows so that `xform(ch1)+rest` can't fit in the buffer, but then
|
||||
`rest` shrinks, it's actually possible for the total calculated length of
|
||||
`xform(ch1)+xform(rest)` to come out at less than output_cap. But we can't
|
||||
report that, because it would make it seem that the operation succeeded.
|
||||
If either of xform(ch1) or xform(rest) won't fit in the buffer, our final
|
||||
result must be > output_cap. */
|
||||
if (ch1_overflow && rc2 < rest_len)
|
||||
rc2 = rest_len;
|
||||
|
||||
state->to_case = PCRE2_SUBSTITUTE_CASE_UPPER;
|
||||
}
|
||||
|
||||
if (rc2 > ~(PCRE2_SIZE)0 - rc) /* Integer overflow */
|
||||
return ~(PCRE2_SIZE)0;
|
||||
|
||||
PCRE2_ASSERT(!(ch1_overflow || rest_overflow) || rc + rc2 > output_cap);
|
||||
(void)rest_overflow;
|
||||
|
||||
return rc + rc2;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match and substitute *
|
||||
*************************************************/
|
||||
|
||||
/* This function applies a compiled re to a subject string and creates a new
|
||||
string with substitutions. The first 7 arguments are the same as for
|
||||
pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
|
||||
|
||||
Arguments:
|
||||
code points to the compiled expression
|
||||
subject points to the subject string
|
||||
length length of subject string (may contain binary zeros)
|
||||
start_offset where to start in the subject string
|
||||
options option bits
|
||||
match_data points to a match_data block, or is NULL
|
||||
context points a PCRE2 context
|
||||
replacement points to the replacement string
|
||||
rlength length of replacement string
|
||||
buffer where to put the substituted string
|
||||
blength points to length of buffer; updated to length of string
|
||||
|
||||
Returns: >= 0 number of substitutions made
|
||||
< 0 an error code
|
||||
PCRE2_ERROR_BADREPLACEMENT means invalid use of $
|
||||
*/
|
||||
|
||||
/* This macro checks for space in the buffer before copying into it. On
|
||||
overflow, either give an error immediately, or keep on, accumulating the
|
||||
length. */
|
||||
|
||||
#define CHECKMEMCPY(from, length_) \
|
||||
do { \
|
||||
PCRE2_SIZE chkmc_length = length_; \
|
||||
if (overflowed) \
|
||||
{ \
|
||||
if (chkmc_length > ~(PCRE2_SIZE)0 - extra_needed) /* Integer overflow */ \
|
||||
goto TOOLARGEREPLACE; \
|
||||
extra_needed += chkmc_length; \
|
||||
} \
|
||||
else if (lengthleft < chkmc_length) \
|
||||
{ \
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
|
||||
overflowed = TRUE; \
|
||||
extra_needed = chkmc_length - lengthleft; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
memcpy(buffer + buff_offset, from, CU2BYTES(chkmc_length)); \
|
||||
buff_offset += chkmc_length; \
|
||||
lengthleft -= chkmc_length; \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* This macro checks for space and copies characters with casing modifications.
|
||||
On overflow, it behaves as for CHECKMEMCPY().
|
||||
|
||||
When substitute_case_callout is NULL, the source and destination buffers must
|
||||
not overlap, because our default handler does not support this. */
|
||||
|
||||
#define CHECKCASECPY_BASE(length_, do_call) \
|
||||
do { \
|
||||
PCRE2_SIZE chkcc_length = (PCRE2_SIZE)(length_); \
|
||||
PCRE2_SIZE chkcc_rc; \
|
||||
do_call \
|
||||
if (lengthleft < chkcc_rc) \
|
||||
{ \
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
|
||||
overflowed = TRUE; \
|
||||
extra_needed = chkcc_rc - lengthleft; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
buff_offset += chkcc_rc; \
|
||||
lengthleft -= chkcc_rc; \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#define CHECKCASECPY_DEFAULT(from, length_) \
|
||||
CHECKCASECPY_BASE(length_, { \
|
||||
chkcc_rc = default_substitute_case_callout(from, chkcc_length, \
|
||||
buffer + buff_offset, \
|
||||
overflowed? 0 : lengthleft, \
|
||||
&forcecase, code); \
|
||||
if (overflowed) \
|
||||
{ \
|
||||
if (chkcc_rc > ~(PCRE2_SIZE)0 - extra_needed) /* Integer overflow */ \
|
||||
goto TOOLARGEREPLACE; \
|
||||
extra_needed += chkcc_rc; \
|
||||
break; \
|
||||
} \
|
||||
})
|
||||
|
||||
#define CHECKCASECPY_CALLOUT(length_) \
|
||||
CHECKCASECPY_BASE(length_, { \
|
||||
chkcc_rc = do_case_copy(buffer + buff_offset, chkcc_length, \
|
||||
lengthleft, &forcecase, utf, \
|
||||
substitute_case_callout, \
|
||||
substitute_case_callout_data); \
|
||||
if (chkcc_rc == ~(PCRE2_SIZE)0) goto CASEERROR; \
|
||||
})
|
||||
|
||||
/* This macro does a delayed case transformation, for the situation when we have
|
||||
a case-forcing callout. */
|
||||
|
||||
#define DELAYEDFORCECASE() \
|
||||
do { \
|
||||
PCRE2_SIZE chars_outstanding = (buff_offset - casestart_offset) + \
|
||||
(extra_needed - casestart_extra_needed); \
|
||||
if (chars_outstanding > 0) \
|
||||
{ \
|
||||
if (overflowed) \
|
||||
{ \
|
||||
PCRE2_SIZE guess = pessimistic_case_inflation(chars_outstanding); \
|
||||
if (guess > ~(PCRE2_SIZE)0 - extra_needed) /* Integer overflow */ \
|
||||
goto TOOLARGEREPLACE; \
|
||||
extra_needed += guess; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
/* Rewind the buffer */ \
|
||||
lengthleft += (buff_offset - casestart_offset); \
|
||||
buff_offset = casestart_offset; \
|
||||
/* Care! In-place case transformation */ \
|
||||
CHECKCASECPY_CALLOUT(chars_outstanding); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Here's the function */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
|
||||
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
|
||||
pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
|
||||
PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
|
||||
{
|
||||
int rc;
|
||||
int subs;
|
||||
uint32_t ovector_count;
|
||||
uint32_t goptions = 0;
|
||||
uint32_t suboptions;
|
||||
pcre2_match_data *internal_match_data = NULL;
|
||||
BOOL escaped_literal = FALSE;
|
||||
BOOL overflowed = FALSE;
|
||||
BOOL use_existing_match;
|
||||
BOOL replacement_only;
|
||||
BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
|
||||
PCRE2_UCHAR temp[6];
|
||||
PCRE2_SPTR ptr;
|
||||
PCRE2_SPTR repend = NULL;
|
||||
PCRE2_SIZE extra_needed = 0;
|
||||
PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
|
||||
PCRE2_SIZE *ovector;
|
||||
PCRE2_SIZE ovecsave[3];
|
||||
pcre2_substitute_callout_block scb;
|
||||
PCRE2_SIZE sub_start_extra_needed;
|
||||
PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *,
|
||||
PCRE2_SIZE, int, void *) = NULL;
|
||||
void *substitute_case_callout_data = NULL;
|
||||
|
||||
/* General initialization */
|
||||
|
||||
buff_offset = 0;
|
||||
lengthleft = buff_length = *blength;
|
||||
*blength = PCRE2_UNSET;
|
||||
ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
|
||||
|
||||
if (mcontext != NULL)
|
||||
{
|
||||
substitute_case_callout = mcontext->substitute_case_callout;
|
||||
substitute_case_callout_data = mcontext->substitute_case_callout_data;
|
||||
}
|
||||
|
||||
/* Partial matching is not valid. This must come after setting *blength to
|
||||
PCRE2_UNSET, so as not to imply an offset in the replacement. */
|
||||
|
||||
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
/* Validate length and find the end of the replacement. A NULL replacement of
|
||||
zero length is interpreted as an empty string. */
|
||||
|
||||
if (replacement == NULL)
|
||||
{
|
||||
if (rlength != 0) return PCRE2_ERROR_NULL;
|
||||
replacement = (PCRE2_SPTR)"";
|
||||
}
|
||||
|
||||
if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
|
||||
repend = replacement + rlength;
|
||||
|
||||
/* Check for using a match that has already happened. Note that the subject
|
||||
pointer in the match data may be NULL after a no-match. */
|
||||
|
||||
use_existing_match = ((options & PCRE2_SUBSTITUTE_MATCHED) != 0);
|
||||
replacement_only = ((options & PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) != 0);
|
||||
|
||||
/* If starting from an existing match, there must be an externally provided
|
||||
match data block. We create an internal match_data block in two cases: (a) an
|
||||
external one is not supplied (and we are not starting from an existing match);
|
||||
(b) an existing match is to be used for the first substitution. In the latter
|
||||
case, we copy the existing match into the internal block, except for any cached
|
||||
heap frame size and pointer. This ensures that no changes are made to the
|
||||
external match data block. */
|
||||
|
||||
/* WARNING: In both cases below a general context is constructed "by hand"
|
||||
because calling pcre2_general_context_create() involves a memory allocation. If
|
||||
the contents of a general context control block are ever changed there will
|
||||
have to be changes below. */
|
||||
|
||||
if (match_data == NULL)
|
||||
{
|
||||
pcre2_general_context gcontext;
|
||||
if (use_existing_match) return PCRE2_ERROR_NULL;
|
||||
gcontext.memctl = (mcontext == NULL)?
|
||||
((const pcre2_real_code *)code)->memctl :
|
||||
((pcre2_real_match_context *)mcontext)->memctl;
|
||||
match_data = internal_match_data =
|
||||
pcre2_match_data_create_from_pattern(code, &gcontext);
|
||||
if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
}
|
||||
|
||||
else if (use_existing_match)
|
||||
{
|
||||
int pairs;
|
||||
pcre2_general_context gcontext;
|
||||
gcontext.memctl = (mcontext == NULL)?
|
||||
((const pcre2_real_code *)code)->memctl :
|
||||
((pcre2_real_match_context *)mcontext)->memctl;
|
||||
pairs = (code->top_bracket + 1 < match_data->oveccount)?
|
||||
code->top_bracket + 1 : match_data->oveccount;
|
||||
internal_match_data = pcre2_match_data_create(match_data->oveccount,
|
||||
&gcontext);
|
||||
if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
memcpy(internal_match_data, match_data, offsetof(pcre2_match_data, ovector)
|
||||
+ 2*pairs*sizeof(PCRE2_SIZE));
|
||||
internal_match_data->heapframes = NULL;
|
||||
internal_match_data->heapframes_size = 0;
|
||||
match_data = internal_match_data;
|
||||
}
|
||||
|
||||
/* Remember ovector details */
|
||||
|
||||
ovector = pcre2_get_ovector_pointer(match_data);
|
||||
ovector_count = pcre2_get_ovector_count(match_data);
|
||||
|
||||
/* Fixed things in the callout block */
|
||||
|
||||
scb.version = 0;
|
||||
scb.input = subject;
|
||||
scb.output = (PCRE2_SPTR)buffer;
|
||||
scb.ovector = ovector;
|
||||
|
||||
/* A NULL subject of zero length is treated as an empty string. */
|
||||
|
||||
if (subject == NULL)
|
||||
{
|
||||
if (length != 0) return PCRE2_ERROR_NULL;
|
||||
subject = (PCRE2_SPTR)"";
|
||||
}
|
||||
|
||||
/* Find length of zero-terminated subject */
|
||||
|
||||
if (length == PCRE2_ZERO_TERMINATED)
|
||||
length = subject? PRIV(strlen)(subject) : 0;
|
||||
|
||||
/* Check UTF replacement string if necessary. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||
{
|
||||
rc = PRIV(valid_utf)(replacement, rlength, &(match_data->startchar));
|
||||
if (rc != 0)
|
||||
{
|
||||
match_data->leftchar = 0;
|
||||
goto EXIT;
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Save the substitute options and remove them from the match options. */
|
||||
|
||||
suboptions = options & SUBSTITUTE_OPTIONS;
|
||||
options &= ~SUBSTITUTE_OPTIONS;
|
||||
|
||||
/* Error if the start match offset is greater than the length of the subject. */
|
||||
|
||||
if (start_offset > length)
|
||||
{
|
||||
match_data->leftchar = 0;
|
||||
rc = PCRE2_ERROR_BADOFFSET;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* Copy up to the start offset, unless only the replacement is required. */
|
||||
|
||||
if (!replacement_only) CHECKMEMCPY(subject, start_offset);
|
||||
|
||||
/* Loop for global substituting. If PCRE2_SUBSTITUTE_MATCHED is set, the first
|
||||
match is taken from the match_data that was passed in. */
|
||||
|
||||
subs = 0;
|
||||
do
|
||||
{
|
||||
PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
|
||||
uint32_t ptrstackptr = 0;
|
||||
case_state forcecase = { PCRE2_SUBSTITUTE_CASE_NONE, FALSE };
|
||||
PCRE2_SIZE casestart_offset = 0;
|
||||
PCRE2_SIZE casestart_extra_needed = 0;
|
||||
|
||||
if (use_existing_match)
|
||||
{
|
||||
rc = match_data->rc;
|
||||
use_existing_match = FALSE;
|
||||
}
|
||||
else rc = pcre2_match(code, subject, length, start_offset, options|goptions,
|
||||
match_data, mcontext);
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) options |= PCRE2_NO_UTF_CHECK; /* Only need to check once */
|
||||
#endif
|
||||
|
||||
/* Any error other than no match returns the error code. No match when not
|
||||
doing the special after-empty-match global rematch, or when at the end of the
|
||||
subject, breaks the global loop. Otherwise, advance the starting point by one
|
||||
character, copying it to the output, and try again. */
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
PCRE2_SIZE save_start;
|
||||
|
||||
if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
|
||||
if (goptions == 0 || start_offset >= length) break;
|
||||
|
||||
/* Advance by one code point. Then, if CRLF is a valid newline sequence and
|
||||
we have advanced into the middle of it, advance one more code point. In
|
||||
other words, do not start in the middle of CRLF, even if CR and LF on their
|
||||
own are valid newlines. */
|
||||
|
||||
save_start = start_offset++;
|
||||
if (subject[start_offset-1] == CHAR_CR &&
|
||||
(code->newline_convention == PCRE2_NEWLINE_CRLF ||
|
||||
code->newline_convention == PCRE2_NEWLINE_ANY ||
|
||||
code->newline_convention == PCRE2_NEWLINE_ANYCRLF) &&
|
||||
start_offset < length &&
|
||||
subject[start_offset] == CHAR_LF)
|
||||
start_offset++;
|
||||
|
||||
/* Otherwise, in UTF mode, advance past any secondary code points. */
|
||||
|
||||
else if ((code->overall_options & PCRE2_UTF) != 0)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80)
|
||||
start_offset++;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
while (start_offset < length &&
|
||||
(subject[start_offset] & 0xfc00) == 0xdc00)
|
||||
start_offset++;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Copy what we have advanced past (unless not required), reset the special
|
||||
global options, and continue to the next match. */
|
||||
|
||||
fraglength = start_offset - save_start;
|
||||
if (!replacement_only) CHECKMEMCPY(subject + save_start, fraglength);
|
||||
goptions = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle a successful match. Matches that use \K to end before they start
|
||||
or start before the current point in the subject are not supported. */
|
||||
|
||||
if (ovector[1] < ovector[0] || ovector[0] < start_offset)
|
||||
{
|
||||
rc = PCRE2_ERROR_BADSUBSPATTERN;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* Check for the same match as previous. This is legitimate after matching an
|
||||
empty string that starts after the initial match offset. We have tried again
|
||||
at the match point in case the pattern is one like /(?<=\G.)/ which can never
|
||||
match at its starting point, so running the match achieves the bumpalong. If
|
||||
we do get the same (null) match at the original match point, it isn't such a
|
||||
pattern, so we now do the empty string magic. In all other cases, a repeat
|
||||
match should never occur. */
|
||||
|
||||
if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
|
||||
{
|
||||
if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
|
||||
{
|
||||
goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
|
||||
ovecsave[2] = start_offset;
|
||||
continue; /* Back to the top of the loop */
|
||||
}
|
||||
rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* Count substitutions with a paranoid check for integer overflow; surely no
|
||||
real call to this function would ever hit this! */
|
||||
|
||||
if (subs == INT_MAX)
|
||||
{
|
||||
rc = PCRE2_ERROR_TOOMANYREPLACE;
|
||||
goto EXIT;
|
||||
}
|
||||
subs++;
|
||||
|
||||
/* Copy the text leading up to the match (unless not required); remember
|
||||
where the insert begins and how many ovector pairs are set; and remember how
|
||||
much space we have requested in extra_needed. */
|
||||
|
||||
if (rc == 0) rc = ovector_count;
|
||||
fraglength = ovector[0] - start_offset;
|
||||
if (!replacement_only) CHECKMEMCPY(subject + start_offset, fraglength);
|
||||
scb.output_offsets[0] = buff_offset;
|
||||
scb.oveccount = rc;
|
||||
sub_start_extra_needed = extra_needed;
|
||||
|
||||
/* Process the replacement string. If the entire replacement is literal, just
|
||||
copy it with length check. */
|
||||
|
||||
ptr = replacement;
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_LITERAL) != 0)
|
||||
{
|
||||
CHECKMEMCPY(ptr, rlength);
|
||||
}
|
||||
|
||||
/* Within a non-literal replacement, which must be scanned character by
|
||||
character, local literal mode can be set by \Q, but only in extended mode
|
||||
when backslashes are being interpreted. In extended mode we must handle
|
||||
nested substrings that are to be reprocessed. */
|
||||
|
||||
else for (;;)
|
||||
{
|
||||
uint32_t ch;
|
||||
unsigned int chlen;
|
||||
int group;
|
||||
uint32_t special;
|
||||
PCRE2_SPTR text1_start = NULL;
|
||||
PCRE2_SPTR text1_end = NULL;
|
||||
PCRE2_SPTR text2_start = NULL;
|
||||
PCRE2_SPTR text2_end = NULL;
|
||||
PCRE2_UCHAR name[MAX_NAME_SIZE + 1];
|
||||
|
||||
/* If at the end of a nested substring, pop the stack. */
|
||||
|
||||
if (ptr >= repend)
|
||||
{
|
||||
if (ptrstackptr == 0) break; /* End of replacement string */
|
||||
repend = ptrstack[--ptrstackptr];
|
||||
ptr = ptrstack[--ptrstackptr];
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle the next character */
|
||||
|
||||
if (escaped_literal)
|
||||
{
|
||||
if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
|
||||
{
|
||||
escaped_literal = FALSE;
|
||||
ptr += 2;
|
||||
continue;
|
||||
}
|
||||
goto LOADLITERAL;
|
||||
}
|
||||
|
||||
/* Not in literal mode. */
|
||||
|
||||
if (*ptr == CHAR_DOLLAR_SIGN)
|
||||
{
|
||||
BOOL inparens;
|
||||
BOOL inangle;
|
||||
BOOL star;
|
||||
PCRE2_SIZE sublength;
|
||||
PCRE2_UCHAR next;
|
||||
PCRE2_SPTR subptr, subptrend;
|
||||
|
||||
if (++ptr >= repend) goto BAD;
|
||||
if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL;
|
||||
|
||||
special = 0;
|
||||
text1_start = NULL;
|
||||
text1_end = NULL;
|
||||
text2_start = NULL;
|
||||
text2_end = NULL;
|
||||
group = -1;
|
||||
inparens = FALSE;
|
||||
inangle = FALSE;
|
||||
star = FALSE;
|
||||
subptr = NULL;
|
||||
subptrend = NULL;
|
||||
|
||||
/* Special $ sequences, as supported by Perl, JavaScript, .NET and others. */
|
||||
if (next == CHAR_AMPERSAND)
|
||||
{
|
||||
++ptr;
|
||||
group = 0;
|
||||
goto GROUP_SUBSTITUTE;
|
||||
}
|
||||
if (next == CHAR_GRAVE_ACCENT || next == CHAR_APOSTROPHE)
|
||||
{
|
||||
++ptr;
|
||||
rc = pcre2_substring_length_bynumber(match_data, 0, &sublength);
|
||||
if (rc < 0) goto PTREXIT; /* (Sanity-check ovector before reading from it.) */
|
||||
|
||||
if (next == CHAR_GRAVE_ACCENT)
|
||||
{
|
||||
subptr = subject;
|
||||
subptrend = subject + ovector[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
subptr = subject + ovector[1];
|
||||
subptrend = subject + length;
|
||||
}
|
||||
|
||||
goto SUBPTR_SUBSTITUTE;
|
||||
}
|
||||
if (next == CHAR_UNDERSCORE)
|
||||
{
|
||||
/* Java, .NET support $_ for "entire input string". */
|
||||
++ptr;
|
||||
subptr = subject;
|
||||
subptrend = subject + length;
|
||||
goto SUBPTR_SUBSTITUTE;
|
||||
}
|
||||
|
||||
if (next == CHAR_LEFT_CURLY_BRACKET)
|
||||
{
|
||||
if (++ptr >= repend) goto BAD;
|
||||
next = *ptr;
|
||||
inparens = TRUE;
|
||||
}
|
||||
else if (next == CHAR_LESS_THAN_SIGN)
|
||||
{
|
||||
/* JavaScript compatibility syntax, $<name>. Processes only named
|
||||
groups (not numbered) and does not support extensions such as star
|
||||
(you can do ${name} and ${*name}, but not $<*name>). */
|
||||
if (++ptr >= repend) goto BAD;
|
||||
next = *ptr;
|
||||
inangle = TRUE;
|
||||
}
|
||||
|
||||
if (!inangle && next == CHAR_ASTERISK)
|
||||
{
|
||||
if (++ptr >= repend) goto BAD;
|
||||
next = *ptr;
|
||||
star = TRUE;
|
||||
}
|
||||
|
||||
if (!star && !inangle && next >= CHAR_0 && next <= CHAR_9)
|
||||
{
|
||||
group = next - CHAR_0;
|
||||
while (++ptr < repend)
|
||||
{
|
||||
next = *ptr;
|
||||
if (next < CHAR_0 || next > CHAR_9) break;
|
||||
group = group * 10 + (next - CHAR_0);
|
||||
|
||||
/* A check for a number greater than the hightest captured group
|
||||
is sufficient here; no need for a separate overflow check. If unknown
|
||||
groups are to be treated as unset, just skip over any remaining
|
||||
digits and carry on. */
|
||||
|
||||
if (group > code->top_bracket)
|
||||
{
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
|
||||
{
|
||||
while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9);
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
rc = PCRE2_ERROR_NOSUBSTRING;
|
||||
goto PTREXIT;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
PCRE2_SIZE name_len;
|
||||
PCRE2_SPTR name_start = ptr;
|
||||
if (!read_name_subst(&ptr, repend, utf, code->tables + ctypes_offset))
|
||||
goto BAD;
|
||||
name_len = ptr - name_start;
|
||||
memcpy(name, name_start, CU2BYTES(name_len));
|
||||
name[name_len] = 0;
|
||||
}
|
||||
|
||||
next = 0; /* not used or updated after this point */
|
||||
(void)next;
|
||||
|
||||
/* In extended mode we recognize ${name:+set text:unset text} and
|
||||
${name:-default text}. */
|
||||
|
||||
if (inparens)
|
||||
{
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
|
||||
!star && ptr < repend - 2 && *ptr == CHAR_COLON)
|
||||
{
|
||||
special = *(++ptr);
|
||||
if (special != CHAR_PLUS && special != CHAR_MINUS)
|
||||
{
|
||||
rc = PCRE2_ERROR_BADSUBSTITUTION;
|
||||
goto PTREXIT;
|
||||
}
|
||||
|
||||
text1_start = ++ptr;
|
||||
rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS);
|
||||
if (rc != 0) goto PTREXIT;
|
||||
text1_end = ptr;
|
||||
|
||||
if (special == CHAR_PLUS && *ptr == CHAR_COLON)
|
||||
{
|
||||
text2_start = ++ptr;
|
||||
rc = find_text_end(code, &ptr, repend, TRUE);
|
||||
if (rc != 0) goto PTREXIT;
|
||||
text2_end = ptr;
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET)
|
||||
{
|
||||
rc = PCRE2_ERROR_REPMISSINGBRACE;
|
||||
goto PTREXIT;
|
||||
}
|
||||
}
|
||||
|
||||
ptr++;
|
||||
}
|
||||
|
||||
if (inangle)
|
||||
{
|
||||
if (ptr >= repend || *ptr != CHAR_GREATER_THAN_SIGN)
|
||||
goto BAD;
|
||||
ptr++;
|
||||
}
|
||||
|
||||
/* Have found a syntactically correct group number or name, or *name.
|
||||
Only *MARK is currently recognized. */
|
||||
|
||||
if (star)
|
||||
{
|
||||
if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
|
||||
{
|
||||
PCRE2_SPTR mark = pcre2_get_mark(match_data);
|
||||
if (mark != NULL)
|
||||
{
|
||||
/* Peek backwards one code unit to obtain the length of the mark.
|
||||
It can (theoretically) contain an embedded NUL. */
|
||||
fraglength = mark[-1];
|
||||
if (forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE &&
|
||||
substitute_case_callout == NULL)
|
||||
CHECKCASECPY_DEFAULT(mark, fraglength);
|
||||
else
|
||||
CHECKMEMCPY(mark, fraglength);
|
||||
}
|
||||
}
|
||||
else goto BAD;
|
||||
}
|
||||
|
||||
/* Substitute the contents of a group. We don't use substring_copy
|
||||
functions any more, in order to support case forcing. */
|
||||
|
||||
else
|
||||
{
|
||||
GROUP_SUBSTITUTE:
|
||||
/* Find a number for a named group. In case there are duplicate names,
|
||||
search for the first one that is set. If the name is not found when
|
||||
PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a
|
||||
non-existent group. */
|
||||
|
||||
if (group < 0)
|
||||
{
|
||||
PCRE2_SPTR first, last, entry;
|
||||
rc = pcre2_substring_nametable_scan(code, name, &first, &last);
|
||||
if (rc == PCRE2_ERROR_NOSUBSTRING &&
|
||||
(suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
|
||||
{
|
||||
group = code->top_bracket + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rc < 0) goto PTREXIT;
|
||||
for (entry = first; entry <= last; entry += rc)
|
||||
{
|
||||
uint32_t ng = GET2(entry, 0);
|
||||
if (ng < ovector_count)
|
||||
{
|
||||
if (group < 0) group = ng; /* First in ovector */
|
||||
if (ovector[ng*2] != PCRE2_UNSET)
|
||||
{
|
||||
group = ng; /* First that is set */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If group is still negative, it means we did not find a group
|
||||
that is in the ovector. Just set the first group. */
|
||||
|
||||
if (group < 0) group = GET2(first, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* We now have a group that is identified by number. Find the length of
|
||||
the captured string. If a group in a non-special substitution is unset
|
||||
when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */
|
||||
|
||||
rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
|
||||
if (rc < 0)
|
||||
{
|
||||
if (rc == PCRE2_ERROR_NOSUBSTRING &&
|
||||
(suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
|
||||
{
|
||||
rc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
if (rc != PCRE2_ERROR_UNSET) goto PTREXIT; /* Non-unset errors */
|
||||
if (special == 0) /* Plain substitution */
|
||||
{
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue;
|
||||
goto PTREXIT; /* Else error */
|
||||
}
|
||||
}
|
||||
|
||||
/* If special is '+' we have a 'set' and possibly an 'unset' text,
|
||||
both of which are reprocessed when used. If special is '-' we have a
|
||||
default text for when the group is unset; it must be reprocessed. */
|
||||
|
||||
if (special != 0)
|
||||
{
|
||||
if (special == CHAR_MINUS)
|
||||
{
|
||||
if (rc == 0) goto LITERAL_SUBSTITUTE;
|
||||
text2_start = text1_start;
|
||||
text2_end = text1_end;
|
||||
}
|
||||
|
||||
if (ptrstackptr >= PTR_STACK_SIZE) goto BAD;
|
||||
ptrstack[ptrstackptr++] = ptr;
|
||||
ptrstack[ptrstackptr++] = repend;
|
||||
|
||||
if (rc == 0)
|
||||
{
|
||||
ptr = text1_start;
|
||||
repend = text1_end;
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr = text2_start;
|
||||
repend = text2_end;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Otherwise we have a literal substitution of a group's contents. */
|
||||
|
||||
LITERAL_SUBSTITUTE:
|
||||
subptr = subject + ovector[group*2];
|
||||
subptrend = subject + ovector[group*2 + 1];
|
||||
|
||||
/* Substitute a literal string, possibly forcing alphabetic case. */
|
||||
|
||||
SUBPTR_SUBSTITUTE:
|
||||
if (forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE &&
|
||||
substitute_case_callout == NULL)
|
||||
CHECKCASECPY_DEFAULT(subptr, subptrend - subptr);
|
||||
else
|
||||
CHECKMEMCPY(subptr, subptrend - subptr);
|
||||
}
|
||||
} /* End of $ processing */
|
||||
|
||||
/* Handle an escape sequence in extended mode. We can use check_escape()
|
||||
to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but
|
||||
the case-forcing escapes are not supported in pcre2_compile() so must be
|
||||
recognized here. */
|
||||
|
||||
else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
|
||||
*ptr == CHAR_BACKSLASH)
|
||||
{
|
||||
int errorcode;
|
||||
case_state new_forcecase = { PCRE2_SUBSTITUTE_CASE_NONE, FALSE };
|
||||
|
||||
if (ptr < repend - 1) switch (ptr[1])
|
||||
{
|
||||
case CHAR_L:
|
||||
new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_LOWER;
|
||||
new_forcecase.single_char = FALSE;
|
||||
ptr += 2;
|
||||
break;
|
||||
|
||||
case CHAR_l:
|
||||
new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_LOWER;
|
||||
new_forcecase.single_char = TRUE;
|
||||
ptr += 2;
|
||||
if (ptr + 2 < repend && ptr[0] == CHAR_BACKSLASH && ptr[1] == CHAR_U)
|
||||
{
|
||||
/* Perl reverse-title-casing feature for \l\U */
|
||||
new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_REVERSE_TITLE_FIRST;
|
||||
new_forcecase.single_char = FALSE;
|
||||
ptr += 2;
|
||||
}
|
||||
break;
|
||||
|
||||
case CHAR_U:
|
||||
new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_UPPER;
|
||||
new_forcecase.single_char = FALSE;
|
||||
ptr += 2;
|
||||
break;
|
||||
|
||||
case CHAR_u:
|
||||
new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_TITLE_FIRST;
|
||||
new_forcecase.single_char = TRUE;
|
||||
ptr += 2;
|
||||
if (ptr + 2 < repend && ptr[0] == CHAR_BACKSLASH && ptr[1] == CHAR_L)
|
||||
{
|
||||
/* Perl title-casing feature for \u\L */
|
||||
new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_TITLE_FIRST;
|
||||
new_forcecase.single_char = FALSE;
|
||||
ptr += 2;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (new_forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE)
|
||||
{
|
||||
SETFORCECASE:
|
||||
|
||||
/* If the substitute_case_callout is unset, our case-forcing is done
|
||||
immediately. If there is a callout however, then its action is delayed
|
||||
until all the characters have been collected.
|
||||
|
||||
Apply the callout now, before we set the new casing mode. */
|
||||
|
||||
if (substitute_case_callout != NULL &&
|
||||
forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE)
|
||||
DELAYEDFORCECASE();
|
||||
|
||||
forcecase = new_forcecase;
|
||||
casestart_offset = buff_offset;
|
||||
casestart_extra_needed = extra_needed;
|
||||
continue;
|
||||
}
|
||||
|
||||
ptr++; /* Point after \ */
|
||||
rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
|
||||
code->overall_options, code->extra_options, code->top_bracket, FALSE, NULL);
|
||||
if (errorcode != 0) goto BADESCAPE;
|
||||
|
||||
switch(rc)
|
||||
{
|
||||
case ESC_E:
|
||||
goto SETFORCECASE;
|
||||
|
||||
case ESC_Q:
|
||||
escaped_literal = TRUE;
|
||||
continue;
|
||||
|
||||
case 0: /* Data character */
|
||||
case ESC_b: /* \b is backspace in a substitution */
|
||||
case ESC_v: /* \v is vertical tab in a substitution */
|
||||
|
||||
if (rc == ESC_b) ch = CHAR_BS;
|
||||
if (rc == ESC_v) ch = CHAR_VT;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) chlen = PRIV(ord2utf)(ch, temp); else
|
||||
#endif
|
||||
{
|
||||
temp[0] = ch;
|
||||
chlen = 1;
|
||||
}
|
||||
|
||||
if (forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE &&
|
||||
substitute_case_callout == NULL)
|
||||
CHECKCASECPY_DEFAULT(temp, chlen);
|
||||
else
|
||||
CHECKMEMCPY(temp, chlen);
|
||||
continue;
|
||||
|
||||
case ESC_g:
|
||||
{
|
||||
PCRE2_SIZE name_len;
|
||||
PCRE2_SPTR name_start;
|
||||
|
||||
/* Parse the \g<name> form (\g<number> already handled by check_escape) */
|
||||
if (ptr >= repend || *ptr != CHAR_LESS_THAN_SIGN)
|
||||
goto BADESCAPE;
|
||||
++ptr;
|
||||
|
||||
name_start = ptr;
|
||||
if (!read_name_subst(&ptr, repend, utf, code->tables + ctypes_offset))
|
||||
goto BADESCAPE;
|
||||
name_len = ptr - name_start;
|
||||
|
||||
if (ptr >= repend || *ptr != CHAR_GREATER_THAN_SIGN)
|
||||
goto BADESCAPE;
|
||||
++ptr;
|
||||
|
||||
special = 0;
|
||||
group = -1;
|
||||
memcpy(name, name_start, CU2BYTES(name_len));
|
||||
name[name_len] = 0;
|
||||
goto GROUP_SUBSTITUTE;
|
||||
}
|
||||
|
||||
default:
|
||||
if (rc < 0)
|
||||
{
|
||||
special = 0;
|
||||
group = -rc - 1;
|
||||
goto GROUP_SUBSTITUTE;
|
||||
}
|
||||
goto BADESCAPE;
|
||||
}
|
||||
} /* End of backslash processing */
|
||||
|
||||
/* Handle a literal code unit */
|
||||
|
||||
else
|
||||
{
|
||||
PCRE2_SPTR ch_start;
|
||||
|
||||
LOADLITERAL:
|
||||
ch_start = ptr;
|
||||
GETCHARINCTEST(ch, ptr); /* Get character value, increment pointer */
|
||||
(void) ch;
|
||||
|
||||
if (forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE &&
|
||||
substitute_case_callout == NULL)
|
||||
CHECKCASECPY_DEFAULT(ch_start, ptr - ch_start);
|
||||
else
|
||||
CHECKMEMCPY(ch_start, ptr - ch_start);
|
||||
} /* End handling a literal code unit */
|
||||
} /* End of loop for scanning the replacement. */
|
||||
|
||||
/* If the substitute_case_callout is unset, our case-forcing is done
|
||||
immediately. If there is a callout however, then its action is delayed
|
||||
until all the characters have been collected.
|
||||
|
||||
We now clean up any trailing section of the replacement for which we deferred
|
||||
the case-forcing. */
|
||||
|
||||
if (substitute_case_callout != NULL &&
|
||||
forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE)
|
||||
DELAYEDFORCECASE();
|
||||
|
||||
/* The replacement has been copied to the output, or its size has been
|
||||
remembered. Handle the callout if there is one. */
|
||||
|
||||
if (mcontext != NULL && mcontext->substitute_callout != NULL)
|
||||
{
|
||||
/* If we an actual (non-simulated) replacement, do the callout. */
|
||||
|
||||
if (!overflowed)
|
||||
{
|
||||
scb.subscount = subs;
|
||||
scb.output_offsets[1] = buff_offset;
|
||||
rc = mcontext->substitute_callout(&scb,
|
||||
mcontext->substitute_callout_data);
|
||||
|
||||
/* A non-zero return means cancel this substitution. Instead, copy the
|
||||
matched string fragment. */
|
||||
|
||||
if (rc != 0)
|
||||
{
|
||||
PCRE2_SIZE newlength = scb.output_offsets[1] - scb.output_offsets[0];
|
||||
PCRE2_SIZE oldlength = ovector[1] - ovector[0];
|
||||
|
||||
buff_offset -= newlength;
|
||||
lengthleft += newlength;
|
||||
if (!replacement_only) CHECKMEMCPY(subject + ovector[0], oldlength);
|
||||
|
||||
/* A negative return means do not do any more. */
|
||||
|
||||
if (rc < 0) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL);
|
||||
}
|
||||
}
|
||||
|
||||
/* In this interesting case, we cannot do the callout, so it's hard to
|
||||
estimate the required buffer size. What callers want is to be able to make
|
||||
two calls to pcre2_substitute(), once with PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
to discover the buffer size, and then a second and final call. Older
|
||||
versions of PCRE2 violated this assumption, by proceding as if the callout
|
||||
had returned zero - but on the second call to pcre2_substitute() it could
|
||||
return non-zero and then overflow the buffer again. Callers probably don't
|
||||
want to keep on looping to incrementally discover the buffer size. */
|
||||
|
||||
else
|
||||
{
|
||||
PCRE2_SIZE newlength_buf = buff_offset - scb.output_offsets[0];
|
||||
PCRE2_SIZE newlength_extra = extra_needed - sub_start_extra_needed;
|
||||
PCRE2_SIZE newlength =
|
||||
(newlength_extra > ~(PCRE2_SIZE)0 - newlength_buf)? /* Integer overflow */
|
||||
~(PCRE2_SIZE)0 : newlength_buf + newlength_extra; /* Cap the addition */
|
||||
PCRE2_SIZE oldlength = ovector[1] - ovector[0];
|
||||
|
||||
/* Be pessimistic: request whichever buffer size is larger out of
|
||||
accepting or rejecting the substitution. */
|
||||
|
||||
if (oldlength > newlength)
|
||||
{
|
||||
PCRE2_SIZE additional = oldlength - newlength;
|
||||
if (additional > ~(PCRE2_SIZE)0 - extra_needed) /* Integer overflow */
|
||||
goto TOOLARGEREPLACE;
|
||||
extra_needed += additional;
|
||||
}
|
||||
|
||||
/* Proceed as if the callout did not return a negative. A negative
|
||||
effectively rejects all future substitutions, but we want to examine them
|
||||
pessimistically. */
|
||||
}
|
||||
}
|
||||
|
||||
/* Save the details of this match. See above for how this data is used. If we
|
||||
matched an empty string, do the magic for global matches. Update the start
|
||||
offset to point to the rest of the subject string. If we re-used an existing
|
||||
match for the first match, switch to the internal match data block. */
|
||||
|
||||
ovecsave[0] = ovector[0];
|
||||
ovecsave[1] = ovector[1];
|
||||
ovecsave[2] = start_offset;
|
||||
|
||||
goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 :
|
||||
PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
|
||||
start_offset = ovector[1];
|
||||
} while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */
|
||||
|
||||
/* Copy the rest of the subject unless not required, and terminate the output
|
||||
with a binary zero. */
|
||||
|
||||
if (!replacement_only)
|
||||
{
|
||||
fraglength = length - start_offset;
|
||||
CHECKMEMCPY(subject + start_offset, fraglength);
|
||||
}
|
||||
|
||||
temp[0] = 0;
|
||||
CHECKMEMCPY(temp, 1);
|
||||
|
||||
/* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
|
||||
and matching has carried on after a full buffer, in order to compute the length
|
||||
needed. Otherwise, an overflow generates an immediate error return. */
|
||||
|
||||
if (overflowed)
|
||||
{
|
||||
rc = PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
if (extra_needed > ~(PCRE2_SIZE)0 - buff_length) /* Integer overflow */
|
||||
goto TOOLARGEREPLACE;
|
||||
*blength = buff_length + extra_needed;
|
||||
}
|
||||
|
||||
/* After a successful execution, return the number of substitutions and set the
|
||||
length of buffer used, excluding the trailing zero. */
|
||||
|
||||
else
|
||||
{
|
||||
rc = subs;
|
||||
*blength = buff_offset - 1;
|
||||
}
|
||||
|
||||
EXIT:
|
||||
if (internal_match_data != NULL) pcre2_match_data_free(internal_match_data);
|
||||
else match_data->rc = rc;
|
||||
return rc;
|
||||
|
||||
NOROOM:
|
||||
rc = PCRE2_ERROR_NOMEMORY;
|
||||
goto EXIT;
|
||||
|
||||
CASEERROR:
|
||||
rc = PCRE2_ERROR_REPLACECASE;
|
||||
goto EXIT;
|
||||
|
||||
TOOLARGEREPLACE:
|
||||
rc = PCRE2_ERROR_TOOLARGEREPLACE;
|
||||
goto EXIT;
|
||||
|
||||
BAD:
|
||||
rc = PCRE2_ERROR_BADREPLACEMENT;
|
||||
goto PTREXIT;
|
||||
|
||||
BADESCAPE:
|
||||
rc = PCRE2_ERROR_BADREPESCAPE;
|
||||
|
||||
PTREXIT:
|
||||
*blength = (PCRE2_SIZE)(ptr - replacement);
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* End of pcre2_substitute.c */
|
||||
550
3rd/pcre2/src/pcre2_substring.c
Normal file
550
3rd/pcre2/src/pcre2_substring.c
Normal file
@@ -0,0 +1,550 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy named captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer,
|
||||
identifying it by name. If the regex permits duplicate names, the first
|
||||
substring that is set is chosen.
|
||||
|
||||
Arguments:
|
||||
match_data points to the match data
|
||||
stringname the name of the required substring
|
||||
buffer where to put the substring
|
||||
sizeptr the size of the buffer, updated to the size of the substring
|
||||
|
||||
Returns: if successful: zero
|
||||
if not successful, a negative error code:
|
||||
(1) an error from nametable_scan()
|
||||
(2) an error from copy_bynumber()
|
||||
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
|
||||
(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_copy_byname(pcre2_match_data *match_data, PCRE2_SPTR stringname,
|
||||
PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SPTR first, last, entry;
|
||||
int failrc, entrysize;
|
||||
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
|
||||
return PCRE2_ERROR_DFA_UFUNC;
|
||||
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
|
||||
&first, &last);
|
||||
if (entrysize < 0) return entrysize;
|
||||
failrc = PCRE2_ERROR_UNAVAILABLE;
|
||||
for (entry = first; entry <= last; entry += entrysize)
|
||||
{
|
||||
uint32_t n = GET2(entry, 0);
|
||||
if (n < match_data->oveccount)
|
||||
{
|
||||
if (match_data->ovector[n*2] != PCRE2_UNSET)
|
||||
return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
|
||||
failrc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
}
|
||||
return failrc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy numbered captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer,
|
||||
identifying it by number.
|
||||
|
||||
Arguments:
|
||||
match_data points to the match data
|
||||
stringnumber the number of the required substring
|
||||
buffer where to put the substring
|
||||
sizeptr the size of the buffer, updated to the size of the substring
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful, a negative error code:
|
||||
PCRE2_ERROR_NOMEMORY: buffer too small
|
||||
PCRE2_ERROR_NOSUBSTRING: no such substring
|
||||
PCRE2_ERROR_UNAVAILABLE: ovector too small
|
||||
PCRE2_ERROR_UNSET: substring is not set
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
|
||||
uint32_t stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
int rc;
|
||||
PCRE2_SIZE size;
|
||||
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
|
||||
if (rc < 0) return rc;
|
||||
if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
|
||||
memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2],
|
||||
CU2BYTES(size));
|
||||
buffer[size] = 0;
|
||||
*sizeptr = size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Extract named captured string *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring, identified by name, into
|
||||
new memory. If the regex permits duplicate names, the first substring that is
|
||||
set is chosen.
|
||||
|
||||
Arguments:
|
||||
match_data pointer to match_data
|
||||
stringname the name of the required substring
|
||||
stringptr where to put the pointer to the new memory
|
||||
sizeptr where to put the length of the substring
|
||||
|
||||
Returns: if successful: zero
|
||||
if not successful, a negative value:
|
||||
(1) an error from nametable_scan()
|
||||
(2) an error from get_bynumber()
|
||||
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
|
||||
(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_get_byname(pcre2_match_data *match_data,
|
||||
PCRE2_SPTR stringname, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SPTR first, last, entry;
|
||||
int failrc, entrysize;
|
||||
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
|
||||
return PCRE2_ERROR_DFA_UFUNC;
|
||||
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
|
||||
&first, &last);
|
||||
if (entrysize < 0) return entrysize;
|
||||
failrc = PCRE2_ERROR_UNAVAILABLE;
|
||||
for (entry = first; entry <= last; entry += entrysize)
|
||||
{
|
||||
uint32_t n = GET2(entry, 0);
|
||||
if (n < match_data->oveccount)
|
||||
{
|
||||
if (match_data->ovector[n*2] != PCRE2_UNSET)
|
||||
return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
|
||||
failrc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
}
|
||||
return failrc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Extract captured string to new memory *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a piece of new
|
||||
memory.
|
||||
|
||||
Arguments:
|
||||
match_data points to match data
|
||||
stringnumber the number of the required substring
|
||||
stringptr where to put a pointer to the new memory
|
||||
sizeptr where to put the size of the substring
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful, a negative error code:
|
||||
PCRE2_ERROR_NOMEMORY: failed to get memory
|
||||
PCRE2_ERROR_NOSUBSTRING: no such substring
|
||||
PCRE2_ERROR_UNAVAILABLE: ovector too small
|
||||
PCRE2_ERROR_UNSET: substring is not set
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_get_bynumber(pcre2_match_data *match_data,
|
||||
uint32_t stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
int rc;
|
||||
PCRE2_SIZE size;
|
||||
PCRE2_UCHAR *yield;
|
||||
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
|
||||
if (rc < 0) return rc;
|
||||
yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
|
||||
(size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
|
||||
if (yield == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl));
|
||||
memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2],
|
||||
CU2BYTES(size));
|
||||
yield[size] = 0;
|
||||
*stringptr = yield;
|
||||
*sizeptr = size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free memory obtained by get_substring *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Argument: the result of a previous pcre2_substring_get_byxxx()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_free(PCRE2_UCHAR *string)
|
||||
{
|
||||
if (string != NULL)
|
||||
{
|
||||
pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl));
|
||||
memctl->free(memctl, memctl->memory_data);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get length of a named substring *
|
||||
*************************************************/
|
||||
|
||||
/* This function returns the length of a named captured substring. If the regex
|
||||
permits duplicate names, the first substring that is set is chosen.
|
||||
|
||||
Arguments:
|
||||
match_data pointer to match data
|
||||
stringname the name of the required substring
|
||||
sizeptr where to put the length
|
||||
|
||||
Returns: 0 if successful, else a negative error number
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_length_byname(pcre2_match_data *match_data,
|
||||
PCRE2_SPTR stringname, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SPTR first, last, entry;
|
||||
int failrc, entrysize;
|
||||
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
|
||||
return PCRE2_ERROR_DFA_UFUNC;
|
||||
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
|
||||
&first, &last);
|
||||
if (entrysize < 0) return entrysize;
|
||||
failrc = PCRE2_ERROR_UNAVAILABLE;
|
||||
for (entry = first; entry <= last; entry += entrysize)
|
||||
{
|
||||
uint32_t n = GET2(entry, 0);
|
||||
if (n < match_data->oveccount)
|
||||
{
|
||||
if (match_data->ovector[n*2] != PCRE2_UNSET)
|
||||
return pcre2_substring_length_bynumber(match_data, n, sizeptr);
|
||||
failrc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
}
|
||||
return failrc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get length of a numbered substring *
|
||||
*************************************************/
|
||||
|
||||
/* This function returns the length of a captured substring. If the start is
|
||||
beyond the end (which can happen when \K is used in an assertion), it sets the
|
||||
length to zero.
|
||||
|
||||
Arguments:
|
||||
match_data pointer to match data
|
||||
stringnumber the number of the required substring
|
||||
sizeptr where to put the length, if not NULL
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful, a negative error code:
|
||||
PCRE2_ERROR_NOSUBSTRING: no such substring
|
||||
PCRE2_ERROR_UNAVAILABLE: ovector is too small
|
||||
PCRE2_ERROR_UNSET: substring is not set
|
||||
PCRE2_ERROR_INVALIDOFFSET: internal error, should not occur
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_length_bynumber(pcre2_match_data *match_data,
|
||||
uint32_t stringnumber, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SIZE left, right;
|
||||
int count = match_data->rc;
|
||||
if (count == PCRE2_ERROR_PARTIAL)
|
||||
{
|
||||
if (stringnumber > 0) return PCRE2_ERROR_PARTIAL;
|
||||
count = 0;
|
||||
}
|
||||
else if (count < 0) return count; /* Match failed */
|
||||
|
||||
if (match_data->matchedby != PCRE2_MATCHEDBY_DFA_INTERPRETER)
|
||||
{
|
||||
if (stringnumber > match_data->code->top_bracket)
|
||||
return PCRE2_ERROR_NOSUBSTRING;
|
||||
if (stringnumber >= match_data->oveccount)
|
||||
return PCRE2_ERROR_UNAVAILABLE;
|
||||
if (match_data->ovector[stringnumber*2] == PCRE2_UNSET)
|
||||
return PCRE2_ERROR_UNSET;
|
||||
}
|
||||
else /* Matched using pcre2_dfa_match() */
|
||||
{
|
||||
if (stringnumber >= match_data->oveccount) return PCRE2_ERROR_UNAVAILABLE;
|
||||
if (count != 0 && stringnumber >= (uint32_t)count) return PCRE2_ERROR_UNSET;
|
||||
}
|
||||
|
||||
left = match_data->ovector[stringnumber*2];
|
||||
right = match_data->ovector[stringnumber*2+1];
|
||||
if (left > match_data->subject_length || right > match_data->subject_length)
|
||||
return PCRE2_ERROR_INVALIDOFFSET;
|
||||
if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Extract all captured strings to new memory *
|
||||
*************************************************/
|
||||
|
||||
/* This function gets one chunk of memory and builds a list of pointers and all
|
||||
the captured substrings in it. A NULL pointer is put on the end of the list.
|
||||
The substrings are zero-terminated, but also, if the final argument is
|
||||
non-NULL, a list of lengths is also returned. This allows binary data to be
|
||||
handled.
|
||||
|
||||
Arguments:
|
||||
match_data points to the match data
|
||||
listptr set to point to the list of pointers
|
||||
lengthsptr set to point to the list of lengths (may be NULL)
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful, a negative error code:
|
||||
PCRE2_ERROR_NOMEMORY: failed to get memory,
|
||||
or a match failure code
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
|
||||
PCRE2_SIZE **lengthsptr)
|
||||
{
|
||||
int i, count, count2;
|
||||
PCRE2_SIZE size;
|
||||
PCRE2_SIZE *lensp;
|
||||
pcre2_memctl *memp;
|
||||
PCRE2_UCHAR **listp;
|
||||
PCRE2_UCHAR *sp;
|
||||
PCRE2_SIZE *ovector;
|
||||
|
||||
if ((count = match_data->rc) < 0) return count; /* Match failed */
|
||||
if (count == 0) count = match_data->oveccount; /* Ovector too small */
|
||||
|
||||
count2 = 2*count;
|
||||
ovector = match_data->ovector;
|
||||
size = sizeof(pcre2_memctl) + sizeof(PCRE2_UCHAR *); /* For final NULL */
|
||||
if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count; /* For lengths */
|
||||
|
||||
for (i = 0; i < count2; i += 2)
|
||||
{
|
||||
size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1);
|
||||
if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]);
|
||||
}
|
||||
|
||||
memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
|
||||
if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
*listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
|
||||
lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));
|
||||
|
||||
if (lengthsptr == NULL)
|
||||
{
|
||||
sp = (PCRE2_UCHAR *)lensp;
|
||||
lensp = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
*lengthsptr = lensp;
|
||||
sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
|
||||
}
|
||||
|
||||
for (i = 0; i < count2; i += 2)
|
||||
{
|
||||
size = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
|
||||
|
||||
/* Size == 0 includes the case when the capture is unset. Avoid adding
|
||||
PCRE2_UNSET to match_data->subject because it overflows, even though with
|
||||
zero size calling memcpy() is harmless. */
|
||||
|
||||
if (size != 0) memcpy(sp, match_data->subject + ovector[i], CU2BYTES(size));
|
||||
*listp++ = sp;
|
||||
if (lensp != NULL) *lensp++ = size;
|
||||
sp += size;
|
||||
*sp++ = 0;
|
||||
}
|
||||
|
||||
*listp = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free memory obtained by substring_list_get *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Argument: the result of a previous pcre2_substring_list_get()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_list_free(PCRE2_UCHAR **list)
|
||||
{
|
||||
if (list != NULL)
|
||||
{
|
||||
pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl));
|
||||
memctl->free(memctl, memctl->memory_data);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find (multiple) entries for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This function scans the nametable for a given name, using binary chop. It
|
||||
returns either two pointers to the entries in the table, or, if no pointers are
|
||||
given, the number of a unique group with the given name. If duplicate names are
|
||||
permitted, and the name is not unique, an error is generated.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose entries required
|
||||
firstptr where to put the pointer to the first entry
|
||||
lastptr where to put the pointer to the last entry
|
||||
|
||||
Returns: PCRE2_ERROR_NOSUBSTRING if the name is not found
|
||||
otherwise, if firstptr and lastptr are NULL:
|
||||
a group number for a unique substring
|
||||
else PCRE2_ERROR_NOUNIQUESUBSTRING
|
||||
otherwise:
|
||||
the length of each entry, having set firstptr and lastptr
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname,
|
||||
PCRE2_SPTR *firstptr, PCRE2_SPTR *lastptr)
|
||||
{
|
||||
uint16_t bot = 0;
|
||||
uint16_t top = code->name_count;
|
||||
uint16_t entrysize = code->name_entry_size;
|
||||
PCRE2_SPTR nametable = (PCRE2_SPTR)((const char *)code + sizeof(pcre2_real_code));
|
||||
|
||||
while (top > bot)
|
||||
{
|
||||
uint16_t mid = (top + bot) / 2;
|
||||
PCRE2_SPTR entry = nametable + entrysize*mid;
|
||||
int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);
|
||||
if (c == 0)
|
||||
{
|
||||
PCRE2_SPTR first;
|
||||
PCRE2_SPTR last;
|
||||
PCRE2_SPTR lastentry;
|
||||
lastentry = nametable + entrysize * (code->name_count - 1);
|
||||
first = last = entry;
|
||||
while (first > nametable)
|
||||
{
|
||||
if (PRIV(strcmp)(stringname, (first - entrysize + IMM2_SIZE)) != 0) break;
|
||||
first -= entrysize;
|
||||
}
|
||||
while (last < lastentry)
|
||||
{
|
||||
if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;
|
||||
last += entrysize;
|
||||
}
|
||||
if (firstptr == NULL) return (first == last)?
|
||||
(int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;
|
||||
*firstptr = first;
|
||||
*lastptr = last;
|
||||
return entrysize;
|
||||
}
|
||||
if (c > 0) bot = mid + 1; else top = mid;
|
||||
}
|
||||
|
||||
return PCRE2_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find number for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is a convenience wrapper for pcre2_substring_nametable_scan()
|
||||
when it is known that names are unique. If there are duplicate names, it is not
|
||||
defined which number is returned.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose number is required
|
||||
|
||||
Returns: the number of the named parenthesis, or a negative number
|
||||
PCRE2_ERROR_NOSUBSTRING if not found
|
||||
PCRE2_ERROR_NOUNIQUESUBSTRING if not unique
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_number_from_name(const pcre2_code *code,
|
||||
PCRE2_SPTR stringname)
|
||||
{
|
||||
return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);
|
||||
}
|
||||
|
||||
/* End of pcre2_substring.c */
|
||||
234
3rd/pcre2/src/pcre2_tables.c
Normal file
234
3rd/pcre2/src/pcre2_tables.c
Normal file
@@ -0,0 +1,234 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains some fixed tables that are used by more than one of the
|
||||
PCRE2 code modules. The tables are also #included by the pcre2test program,
|
||||
which uses macros to change their names from _pcre2_xxx to xxxx, thereby
|
||||
avoiding name clashes with the library. In this case, PCRE2_PCRE2TEST is
|
||||
defined. */
|
||||
|
||||
#ifndef PCRE2_PCRE2TEST /* We're compiling the library */
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
#include "pcre2_internal.h"
|
||||
#endif /* PCRE2_PCRE2TEST */
|
||||
|
||||
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||
the definition is next to the definition of the opcodes in pcre2_internal.h.
|
||||
This is mode-dependent, so it is skipped when this file is included by
|
||||
pcre2test. */
|
||||
|
||||
#ifndef PCRE2_PCRE2TEST
|
||||
const uint8_t PRIV(OP_lengths)[] = { OP_LENGTHS };
|
||||
#endif
|
||||
|
||||
/* Tables of horizontal and vertical whitespace characters, suitable for
|
||||
adding to classes. */
|
||||
|
||||
const uint32_t PRIV(hspace_list)[] = { HSPACE_LIST };
|
||||
const uint32_t PRIV(vspace_list)[] = { VSPACE_LIST };
|
||||
|
||||
/* These tables are the pairs of delimiters that are valid for callout string
|
||||
arguments. For each starting delimiter there must be a matching ending
|
||||
delimiter, which in fact is different only for bracket-like delimiters. */
|
||||
|
||||
const uint32_t PRIV(callout_start_delims)[] = {
|
||||
CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
|
||||
CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
|
||||
CHAR_DOLLAR_SIGN, CHAR_LEFT_CURLY_BRACKET, 0 };
|
||||
|
||||
const uint32_t PRIV(callout_end_delims[]) = {
|
||||
CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
|
||||
CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
|
||||
CHAR_DOLLAR_SIGN, CHAR_RIGHT_CURLY_BRACKET, 0 };
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Tables for UTF-8 support *
|
||||
*************************************************/
|
||||
|
||||
/* These tables are required by pcre2test in 16- or 32-bit mode, as well
|
||||
as for the library in 8-bit mode, because pcre2test uses UTF-8 internally for
|
||||
handling wide characters. */
|
||||
|
||||
#if defined PCRE2_PCRE2TEST || \
|
||||
(defined SUPPORT_UNICODE && \
|
||||
defined PCRE2_CODE_UNIT_WIDTH && \
|
||||
PCRE2_CODE_UNIT_WIDTH == 8)
|
||||
|
||||
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
||||
character. */
|
||||
|
||||
const int PRIV(utf8_table1)[] =
|
||||
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
||||
|
||||
const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int);
|
||||
|
||||
/* These are the indicator bits and the mask for the data bits to set in the
|
||||
first byte of a character, indexed by the number of additional bytes. */
|
||||
|
||||
const int PRIV(utf8_table2)[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
|
||||
const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
|
||||
|
||||
/* Table of the number of extra bytes, indexed by the first byte masked with
|
||||
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
|
||||
|
||||
const uint8_t PRIV(utf8_table4)[] = {
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
||||
|
||||
#endif /* UTF-8 support needed */
|
||||
|
||||
/* Tables concerned with Unicode properties are relevant only when Unicode
|
||||
support is enabled. See also the pcre2_ucptables.c file, which is generated by
|
||||
a Python script from Unicode data files. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
|
||||
/* Table to translate from particular type value to the general value. */
|
||||
|
||||
const uint32_t PRIV(ucp_gentype)[] = {
|
||||
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
|
||||
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
|
||||
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
|
||||
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
|
||||
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
|
||||
ucp_P, ucp_P, /* Ps, Po */
|
||||
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
|
||||
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
|
||||
};
|
||||
|
||||
/* This table encodes the rules for finding the end of an extended grapheme
|
||||
cluster. Every code point has a grapheme break property which is one of the
|
||||
ucp_gbXX values defined in pcre2_ucp.h. These changed between Unicode versions
|
||||
10 and 11. The 2-dimensional table is indexed by the properties of two adjacent
|
||||
code points. The left property selects a word from the table, and the right
|
||||
property selects a bit from that word like this:
|
||||
|
||||
PRIV(ucp_gbtable)[left-property] & (1u << right-property)
|
||||
|
||||
The value is non-zero if a grapheme break is NOT permitted between the relevant
|
||||
two code points. The breaking rules are as follows:
|
||||
|
||||
1. Break at the start and end of text (pretty obviously).
|
||||
|
||||
2. Do not break between a CR and LF; otherwise, break before and after
|
||||
controls.
|
||||
|
||||
3. Do not break Hangul syllable sequences, the rules for which are:
|
||||
|
||||
L may be followed by L, V, LV or LVT
|
||||
LV or V may be followed by V or T
|
||||
LVT or T may be followed by T
|
||||
|
||||
4. Do not break before extending characters or zero-width-joiner (ZWJ).
|
||||
|
||||
The following rules are only for extended grapheme clusters (but that's what we
|
||||
are implementing).
|
||||
|
||||
5. Do not break before SpacingMarks.
|
||||
|
||||
6. Do not break after Prepend characters.
|
||||
|
||||
7. Do not break within emoji modifier sequences or emoji zwj sequences. That
|
||||
is, do not break between characters with the Extended_Pictographic property
|
||||
if a ZWJ intervenes. Extend characters are allowed between the characters;
|
||||
this cannot be represented in this table, the code has to deal with it.
|
||||
|
||||
8. Do not break within emoji flag sequences. That is, do not break between
|
||||
regional indicator (RI) symbols if there are an odd number of RI characters
|
||||
before the break point. This table encodes "join RI characters"; the code
|
||||
has to deal with checking for previous adjoining RIs.
|
||||
|
||||
9. Otherwise, break everywhere.
|
||||
*/
|
||||
|
||||
#define ESZ (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbZWJ)
|
||||
|
||||
const uint32_t PRIV(ucp_gbtable)[] = {
|
||||
(1u<<ucp_gbLF), /* 0 CR */
|
||||
0, /* 1 LF */
|
||||
0, /* 2 Control */
|
||||
ESZ, /* 3 Extend */
|
||||
ESZ|(1u<<ucp_gbPrepend)| /* 4 Prepend */
|
||||
(1u<<ucp_gbL)|(1u<<ucp_gbV)|(1u<<ucp_gbT)|
|
||||
(1u<<ucp_gbLV)|(1u<<ucp_gbLVT)|(1u<<ucp_gbOther)|
|
||||
(1u<<ucp_gbRegional_Indicator),
|
||||
ESZ, /* 5 SpacingMark */
|
||||
ESZ|(1u<<ucp_gbL)|(1u<<ucp_gbV)|(1u<<ucp_gbLV)| /* 6 L */
|
||||
(1u<<ucp_gbLVT),
|
||||
ESZ|(1u<<ucp_gbV)|(1u<<ucp_gbT), /* 7 V */
|
||||
ESZ|(1u<<ucp_gbT), /* 8 T */
|
||||
ESZ|(1u<<ucp_gbV)|(1u<<ucp_gbT), /* 9 LV */
|
||||
ESZ|(1u<<ucp_gbT), /* 10 LVT */
|
||||
(1u<<ucp_gbRegional_Indicator), /* 11 Regional Indicator */
|
||||
ESZ, /* 12 Other */
|
||||
ESZ|(1u<<ucp_gbExtended_Pictographic), /* 13 ZWJ */
|
||||
ESZ /* 14 Extended Pictographic */
|
||||
};
|
||||
|
||||
#undef ESZ
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
/* This table reverses PRIV(ucp_gentype). We can save the cost
|
||||
of a memory load. */
|
||||
|
||||
const int PRIV(ucp_typerange)[] = {
|
||||
ucp_Cc, ucp_Cs,
|
||||
ucp_Ll, ucp_Lu,
|
||||
ucp_Mc, ucp_Mn,
|
||||
ucp_Nd, ucp_No,
|
||||
ucp_Pc, ucp_Ps,
|
||||
ucp_Sc, ucp_So,
|
||||
ucp_Zl, ucp_Zs,
|
||||
};
|
||||
#endif /* SUPPORT_JIT */
|
||||
|
||||
/* Finally, include the tables that are auto-generated from the Unicode data
|
||||
files. */
|
||||
|
||||
#include "pcre2_ucptables.c"
|
||||
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* End of pcre2_tables.c */
|
||||
5804
3rd/pcre2/src/pcre2_ucd.c
Normal file
5804
3rd/pcre2/src/pcre2_ucd.c
Normal file
File diff suppressed because it is too large
Load Diff
408
3rd/pcre2/src/pcre2_ucp.h
Normal file
408
3rd/pcre2/src/pcre2_ucp.h
Normal file
@@ -0,0 +1,408 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2022 University of Cambridge
|
||||
|
||||
This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY!
|
||||
Instead, modify the maint/GenerateUcpHeader.py script and run it to generate
|
||||
a new version of this code.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
|
||||
#define PCRE2_UCP_H_IDEMPOTENT_GUARD
|
||||
|
||||
/* This file contains definitions of the Unicode property values that are
|
||||
returned by the UCD access macros and used throughout PCRE2.
|
||||
|
||||
IMPORTANT: The specific values of the first two enums (general and particular
|
||||
character categories) are assumed by the table called catposstab in the file
|
||||
pcre2_auto_possess.c. They are unlikely to change, but should be checked after
|
||||
an update. */
|
||||
|
||||
/* These are the general character categories. */
|
||||
|
||||
enum {
|
||||
ucp_C,
|
||||
ucp_L,
|
||||
ucp_M,
|
||||
ucp_N,
|
||||
ucp_P,
|
||||
ucp_S,
|
||||
ucp_Z,
|
||||
};
|
||||
|
||||
/* These are the particular character categories. */
|
||||
|
||||
enum {
|
||||
ucp_Cc, /* Control */
|
||||
ucp_Cf, /* Format */
|
||||
ucp_Cn, /* Unassigned */
|
||||
ucp_Co, /* Private use */
|
||||
ucp_Cs, /* Surrogate */
|
||||
ucp_Ll, /* Lower case letter */
|
||||
ucp_Lm, /* Modifier letter */
|
||||
ucp_Lo, /* Other letter */
|
||||
ucp_Lt, /* Title case letter */
|
||||
ucp_Lu, /* Upper case letter */
|
||||
ucp_Mc, /* Spacing mark */
|
||||
ucp_Me, /* Enclosing mark */
|
||||
ucp_Mn, /* Non-spacing mark */
|
||||
ucp_Nd, /* Decimal number */
|
||||
ucp_Nl, /* Letter number */
|
||||
ucp_No, /* Other number */
|
||||
ucp_Pc, /* Connector punctuation */
|
||||
ucp_Pd, /* Dash punctuation */
|
||||
ucp_Pe, /* Close punctuation */
|
||||
ucp_Pf, /* Final punctuation */
|
||||
ucp_Pi, /* Initial punctuation */
|
||||
ucp_Po, /* Other punctuation */
|
||||
ucp_Ps, /* Open punctuation */
|
||||
ucp_Sc, /* Currency symbol */
|
||||
ucp_Sk, /* Modifier symbol */
|
||||
ucp_Sm, /* Mathematical symbol */
|
||||
ucp_So, /* Other symbol */
|
||||
ucp_Zl, /* Line separator */
|
||||
ucp_Zp, /* Paragraph separator */
|
||||
ucp_Zs, /* Space separator */
|
||||
};
|
||||
|
||||
/* These are Boolean properties. */
|
||||
|
||||
enum {
|
||||
ucp_ASCII,
|
||||
ucp_ASCII_Hex_Digit,
|
||||
ucp_Alphabetic,
|
||||
ucp_Bidi_Control,
|
||||
ucp_Bidi_Mirrored,
|
||||
ucp_Case_Ignorable,
|
||||
ucp_Cased,
|
||||
ucp_Changes_When_Casefolded,
|
||||
ucp_Changes_When_Casemapped,
|
||||
ucp_Changes_When_Lowercased,
|
||||
ucp_Changes_When_Titlecased,
|
||||
ucp_Changes_When_Uppercased,
|
||||
ucp_Dash,
|
||||
ucp_Default_Ignorable_Code_Point,
|
||||
ucp_Deprecated,
|
||||
ucp_Diacritic,
|
||||
ucp_Emoji,
|
||||
ucp_Emoji_Component,
|
||||
ucp_Emoji_Modifier,
|
||||
ucp_Emoji_Modifier_Base,
|
||||
ucp_Emoji_Presentation,
|
||||
ucp_Extended_Pictographic,
|
||||
ucp_Extender,
|
||||
ucp_Grapheme_Base,
|
||||
ucp_Grapheme_Extend,
|
||||
ucp_Grapheme_Link,
|
||||
ucp_Hex_Digit,
|
||||
ucp_IDS_Binary_Operator,
|
||||
ucp_IDS_Trinary_Operator,
|
||||
ucp_IDS_Unary_Operator,
|
||||
ucp_ID_Compat_Math_Continue,
|
||||
ucp_ID_Compat_Math_Start,
|
||||
ucp_ID_Continue,
|
||||
ucp_ID_Start,
|
||||
ucp_Ideographic,
|
||||
ucp_InCB,
|
||||
ucp_Join_Control,
|
||||
ucp_Logical_Order_Exception,
|
||||
ucp_Lowercase,
|
||||
ucp_Math,
|
||||
ucp_Modifier_Combining_Mark,
|
||||
ucp_Noncharacter_Code_Point,
|
||||
ucp_Pattern_Syntax,
|
||||
ucp_Pattern_White_Space,
|
||||
ucp_Prepended_Concatenation_Mark,
|
||||
ucp_Quotation_Mark,
|
||||
ucp_Radical,
|
||||
ucp_Regional_Indicator,
|
||||
ucp_Sentence_Terminal,
|
||||
ucp_Soft_Dotted,
|
||||
ucp_Terminal_Punctuation,
|
||||
ucp_Unified_Ideograph,
|
||||
ucp_Uppercase,
|
||||
ucp_Variation_Selector,
|
||||
ucp_White_Space,
|
||||
ucp_XID_Continue,
|
||||
ucp_XID_Start,
|
||||
/* This must be last */
|
||||
ucp_Bprop_Count
|
||||
};
|
||||
|
||||
/* Size of entries in ucd_boolprop_sets[] */
|
||||
|
||||
#define ucd_boolprop_sets_item_size 2
|
||||
|
||||
/* These are the bidi class values. */
|
||||
|
||||
enum {
|
||||
ucp_bidiAL, /* Arabic_Letter */
|
||||
ucp_bidiAN, /* Arabic_Number */
|
||||
ucp_bidiB, /* Paragraph_Separator */
|
||||
ucp_bidiBN, /* Boundary_Neutral */
|
||||
ucp_bidiCS, /* Common_Separator */
|
||||
ucp_bidiEN, /* European_Number */
|
||||
ucp_bidiES, /* European_Separator */
|
||||
ucp_bidiET, /* European_Terminator */
|
||||
ucp_bidiFSI, /* First_Strong_Isolate */
|
||||
ucp_bidiL, /* Left_To_Right */
|
||||
ucp_bidiLRE, /* Left_To_Right_Embedding */
|
||||
ucp_bidiLRI, /* Left_To_Right_Isolate */
|
||||
ucp_bidiLRO, /* Left_To_Right_Override */
|
||||
ucp_bidiNSM, /* Nonspacing_Mark */
|
||||
ucp_bidiON, /* Other_Neutral */
|
||||
ucp_bidiPDF, /* Pop_Directional_Format */
|
||||
ucp_bidiPDI, /* Pop_Directional_Isolate */
|
||||
ucp_bidiR, /* Right_To_Left */
|
||||
ucp_bidiRLE, /* Right_To_Left_Embedding */
|
||||
ucp_bidiRLI, /* Right_To_Left_Isolate */
|
||||
ucp_bidiRLO, /* Right_To_Left_Override */
|
||||
ucp_bidiS, /* Segment_Separator */
|
||||
ucp_bidiWS, /* White_Space */
|
||||
};
|
||||
|
||||
/* These are grapheme break properties. The Extended Pictographic property
|
||||
comes from the emoji-data.txt file. */
|
||||
|
||||
enum {
|
||||
ucp_gbCR, /* 0 */
|
||||
ucp_gbLF, /* 1 */
|
||||
ucp_gbControl, /* 2 */
|
||||
ucp_gbExtend, /* 3 */
|
||||
ucp_gbPrepend, /* 4 */
|
||||
ucp_gbSpacingMark, /* 5 */
|
||||
ucp_gbL, /* 6 Hangul syllable type L */
|
||||
ucp_gbV, /* 7 Hangul syllable type V */
|
||||
ucp_gbT, /* 8 Hangul syllable type T */
|
||||
ucp_gbLV, /* 9 Hangul syllable type LV */
|
||||
ucp_gbLVT, /* 10 Hangul syllable type LVT */
|
||||
ucp_gbRegional_Indicator, /* 11 */
|
||||
ucp_gbOther, /* 12 */
|
||||
ucp_gbZWJ, /* 13 */
|
||||
ucp_gbExtended_Pictographic, /* 14 */
|
||||
};
|
||||
|
||||
/* These are the script identifications. */
|
||||
|
||||
enum {
|
||||
/* Scripts which has characters in other scripts. */
|
||||
ucp_Latin,
|
||||
ucp_Greek,
|
||||
ucp_Cyrillic,
|
||||
ucp_Armenian,
|
||||
ucp_Hebrew,
|
||||
ucp_Arabic,
|
||||
ucp_Syriac,
|
||||
ucp_Thaana,
|
||||
ucp_Devanagari,
|
||||
ucp_Bengali,
|
||||
ucp_Gurmukhi,
|
||||
ucp_Gujarati,
|
||||
ucp_Oriya,
|
||||
ucp_Tamil,
|
||||
ucp_Telugu,
|
||||
ucp_Kannada,
|
||||
ucp_Malayalam,
|
||||
ucp_Sinhala,
|
||||
ucp_Thai,
|
||||
ucp_Tibetan,
|
||||
ucp_Myanmar,
|
||||
ucp_Georgian,
|
||||
ucp_Hangul,
|
||||
ucp_Ethiopic,
|
||||
ucp_Cherokee,
|
||||
ucp_Runic,
|
||||
ucp_Mongolian,
|
||||
ucp_Hiragana,
|
||||
ucp_Katakana,
|
||||
ucp_Bopomofo,
|
||||
ucp_Han,
|
||||
ucp_Yi,
|
||||
ucp_Gothic,
|
||||
ucp_Tagalog,
|
||||
ucp_Hanunoo,
|
||||
ucp_Buhid,
|
||||
ucp_Tagbanwa,
|
||||
ucp_Limbu,
|
||||
ucp_Tai_Le,
|
||||
ucp_Linear_B,
|
||||
ucp_Shavian,
|
||||
ucp_Cypriot,
|
||||
ucp_Buginese,
|
||||
ucp_Coptic,
|
||||
ucp_Glagolitic,
|
||||
ucp_Tifinagh,
|
||||
ucp_Syloti_Nagri,
|
||||
ucp_Phags_Pa,
|
||||
ucp_Nko,
|
||||
ucp_Kayah_Li,
|
||||
ucp_Lycian,
|
||||
ucp_Carian,
|
||||
ucp_Lydian,
|
||||
ucp_Avestan,
|
||||
ucp_Samaritan,
|
||||
ucp_Lisu,
|
||||
ucp_Javanese,
|
||||
ucp_Old_Turkic,
|
||||
ucp_Kaithi,
|
||||
ucp_Mandaic,
|
||||
ucp_Chakma,
|
||||
ucp_Meroitic_Hieroglyphs,
|
||||
ucp_Sharada,
|
||||
ucp_Takri,
|
||||
ucp_Caucasian_Albanian,
|
||||
ucp_Duployan,
|
||||
ucp_Elbasan,
|
||||
ucp_Grantha,
|
||||
ucp_Khojki,
|
||||
ucp_Linear_A,
|
||||
ucp_Mahajani,
|
||||
ucp_Manichaean,
|
||||
ucp_Modi,
|
||||
ucp_Old_Permic,
|
||||
ucp_Psalter_Pahlavi,
|
||||
ucp_Khudawadi,
|
||||
ucp_Tirhuta,
|
||||
ucp_Multani,
|
||||
ucp_Old_Hungarian,
|
||||
ucp_Adlam,
|
||||
ucp_Osage,
|
||||
ucp_Tangut,
|
||||
ucp_Masaram_Gondi,
|
||||
ucp_Dogra,
|
||||
ucp_Gunjala_Gondi,
|
||||
ucp_Hanifi_Rohingya,
|
||||
ucp_Sogdian,
|
||||
ucp_Nandinagari,
|
||||
ucp_Yezidi,
|
||||
ucp_Cypro_Minoan,
|
||||
ucp_Old_Uyghur,
|
||||
ucp_Toto,
|
||||
ucp_Garay,
|
||||
ucp_Gurung_Khema,
|
||||
ucp_Ol_Onal,
|
||||
ucp_Sunuwar,
|
||||
ucp_Todhri,
|
||||
ucp_Tulu_Tigalari,
|
||||
|
||||
/* Scripts which has no characters in other scripts. */
|
||||
ucp_Unknown,
|
||||
ucp_Common,
|
||||
ucp_Lao,
|
||||
ucp_Canadian_Aboriginal,
|
||||
ucp_Ogham,
|
||||
ucp_Khmer,
|
||||
ucp_Old_Italic,
|
||||
ucp_Deseret,
|
||||
ucp_Inherited,
|
||||
ucp_Ugaritic,
|
||||
ucp_Osmanya,
|
||||
ucp_Braille,
|
||||
ucp_New_Tai_Lue,
|
||||
ucp_Old_Persian,
|
||||
ucp_Kharoshthi,
|
||||
ucp_Balinese,
|
||||
ucp_Cuneiform,
|
||||
ucp_Phoenician,
|
||||
ucp_Sundanese,
|
||||
ucp_Lepcha,
|
||||
ucp_Ol_Chiki,
|
||||
ucp_Vai,
|
||||
ucp_Saurashtra,
|
||||
ucp_Rejang,
|
||||
ucp_Cham,
|
||||
ucp_Tai_Tham,
|
||||
ucp_Tai_Viet,
|
||||
ucp_Egyptian_Hieroglyphs,
|
||||
ucp_Bamum,
|
||||
ucp_Meetei_Mayek,
|
||||
ucp_Imperial_Aramaic,
|
||||
ucp_Old_South_Arabian,
|
||||
ucp_Inscriptional_Parthian,
|
||||
ucp_Inscriptional_Pahlavi,
|
||||
ucp_Batak,
|
||||
ucp_Brahmi,
|
||||
ucp_Meroitic_Cursive,
|
||||
ucp_Miao,
|
||||
ucp_Sora_Sompeng,
|
||||
ucp_Bassa_Vah,
|
||||
ucp_Pahawh_Hmong,
|
||||
ucp_Mende_Kikakui,
|
||||
ucp_Mro,
|
||||
ucp_Old_North_Arabian,
|
||||
ucp_Nabataean,
|
||||
ucp_Palmyrene,
|
||||
ucp_Pau_Cin_Hau,
|
||||
ucp_Siddham,
|
||||
ucp_Warang_Citi,
|
||||
ucp_Ahom,
|
||||
ucp_Anatolian_Hieroglyphs,
|
||||
ucp_Hatran,
|
||||
ucp_SignWriting,
|
||||
ucp_Bhaiksuki,
|
||||
ucp_Marchen,
|
||||
ucp_Newa,
|
||||
ucp_Nushu,
|
||||
ucp_Soyombo,
|
||||
ucp_Zanabazar_Square,
|
||||
ucp_Makasar,
|
||||
ucp_Medefaidrin,
|
||||
ucp_Old_Sogdian,
|
||||
ucp_Elymaic,
|
||||
ucp_Nyiakeng_Puachue_Hmong,
|
||||
ucp_Wancho,
|
||||
ucp_Chorasmian,
|
||||
ucp_Dives_Akuru,
|
||||
ucp_Khitan_Small_Script,
|
||||
ucp_Tangsa,
|
||||
ucp_Vithkuqi,
|
||||
ucp_Kawi,
|
||||
ucp_Nag_Mundari,
|
||||
ucp_Kirat_Rai,
|
||||
|
||||
/* This must be last */
|
||||
ucp_Script_Count
|
||||
};
|
||||
|
||||
/* Size of entries in ucd_script_sets[] */
|
||||
|
||||
#define ucd_script_sets_item_size 4
|
||||
|
||||
#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
|
||||
|
||||
/* End of pcre2_ucp.h */
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user