Compare commits
777 Commits
zmq
...
53faaeb396
| Author | SHA1 | Date | |
|---|---|---|---|
| 53faaeb396 | |||
| 2928a690b8 | |||
| 220ce225f8 | |||
| ac89c499ab | |||
| 8d1c97da04 | |||
| aa140fd4ec | |||
| 08161c9aad | |||
| 40cda7d988 | |||
| c05b8b4095 | |||
| 3d7ba1dee6 | |||
| a299ada873 | |||
| 91144ad338 | |||
| ef8b785ac6 | |||
| 27f37c9cc1 | |||
| f2464ed76b | |||
| a3615c5666 | |||
| 1b3f72d429 | |||
| 04152f05a9 | |||
| d95944dcfc | |||
| a17644a953 | |||
| 3426a3064e | |||
| a41379a40e | |||
| 39266f8c3c | |||
| 3bcb778628 | |||
| 781b430c33 | |||
| 0ac7ea3096 | |||
| d6a0ae6106 | |||
| 3625afa783 | |||
| 154cbd0160 | |||
| b6c5d65a8d | |||
| d4254121b8 | |||
| da30ae558f | |||
| 7a6936ccd9 | |||
| 58acf7a823 | |||
| d9719a7a50 | |||
| bf63365370 | |||
| 64e142b8c6 | |||
| be2d3c197c | |||
| 30c4f215a2 | |||
| 6ffbbbe636 | |||
| d62599fb8e | |||
| 7e371132ae | |||
| 3504c25f84 | |||
| 68615035aa | |||
| 654c0847b2 | |||
| 91955d44fa | |||
| a2c24c9f07 | |||
| cf25cacc17 | |||
| 19daab173c | |||
| 19f8f522b3 | |||
| 905d39c07b | |||
| 9504792586 | |||
| 1b51d09c58 | |||
| ed3d4c4217 | |||
| 7b52f6d70d | |||
| 19fe33383a | |||
| e57719c118 | |||
| edb077b400 | |||
| bad51db0c8 | |||
| 87ab4c4c28 | |||
| 8f3456a650 | |||
| ce1aee1553 | |||
| dfdc4b8bdc | |||
| 8a61cfe7ef | |||
| 78f79d92aa | |||
| f09fe03e0d | |||
| 8e18ec15c9 | |||
| 4340f5444d | |||
| 8e96750046 | |||
| d01baffb0b | |||
| 82cda42e75 | |||
| 5755d172cd | |||
| 0dd47f50a0 | |||
| 60d2a83df5 | |||
| 7757ac10ec | |||
| f334a6603f | |||
| 15548de79c | |||
| 49713ddc57 | |||
| 0973379f53 | |||
| a3004491d6 | |||
| 199a272eb8 | |||
| 855d1ca1b5 | |||
| 64025e0399 | |||
| 6e5556969d | |||
| b50eeaef46 | |||
| ffc471359a | |||
| 79007e7b4e | |||
| 9e08b6ffc5 | |||
| 9b8a1583c2 | |||
| cae264a77b | |||
| 4f934fef35 | |||
| 3a159b0553 | |||
| 0c973f0216 | |||
| 65d3168eb5 | |||
| 12114b3e77 | |||
| 6f6b717354 | |||
| 58b3fa64bc | |||
| 1acaf24df9 | |||
| c2c9822169 | |||
| dff4f2b3a0 | |||
| bf9ad65ff0 | |||
| 53ec75bf0c | |||
| f5270d75eb | |||
| 4aa596b179 | |||
| 002f21fc9e | |||
| 9ab46e4afc | |||
| caa7880cc4 | |||
| 24112498ce | |||
| c9a5ddd89f | |||
| 3483edbd76 | |||
| fe82c12d5b | |||
| d90c3f0991 | |||
| f6b9131f4a | |||
| c67f7a2b64 | |||
| 047d38ea59 | |||
| 3d07795515 | |||
| ee137b2820 | |||
| cdde340efe | |||
| ee34e8a72e | |||
| 0840d807a0 | |||
| 4655d72554 | |||
| 5b066cbc27 | |||
| 2247473959 | |||
| 57f8c1313e | |||
| 73ed51e3d4 | |||
| 1106cde3e4 | |||
| b43158d3a8 | |||
| 9a928f6feb | |||
| df75efe881 | |||
| 9f1d23ad8e | |||
| 7cd2f7a310 | |||
| 315966504e | |||
| 7209eec012 | |||
| 992f59904a | |||
| 9dbd7210cb | |||
| ac8efc9f88 | |||
| 92a0a9356c | |||
| 28f3471036 | |||
| d3d7235338 | |||
| 92a87a0c64 | |||
| cd7e053fc5 | |||
| 9eecbbab6e | |||
| 3641e636d2 | |||
| 4acab04895 | |||
| aa963a4bda | |||
| bdd18b614f | |||
| e186e0adff | |||
| f105f616f6 | |||
|
|
b99c51181d | ||
|
|
bc6b584480 | ||
| eb97de1413 | |||
| 97f1c25ff8 | |||
| 97aad47a21 | |||
| 43bd1d8550 | |||
|
|
3255199b3f | ||
| 224412e20a | |||
| 000ce2a54d | |||
|
|
f992bf4cbb | ||
|
|
96625bd93d | ||
| 9d4357c066 | |||
| 7a945f47b1 | |||
| 9a352bfc83 | |||
| aa2f117075 | |||
| bf5bb45771 | |||
| cdc966bd8c | |||
| 17b902ebcc | |||
| 996b7ea403 | |||
| da4b09be9e | |||
| b24b5a1346 | |||
| 0d94699206 | |||
| 16a818c95e | |||
| a0454b809d | |||
| af8c096c7a | |||
| 14cf81aed0 | |||
| c43b914fb3 | |||
| f002f6cedd | |||
| ce846eca51 | |||
| e6c8714857 | |||
| 33fc334077 | |||
| 6efc962923 | |||
| 7d02f710ea | |||
| c8876807ed | |||
| f43f834460 | |||
| ccf6b810b5 | |||
| a438c4249d | |||
| 1c7fc39b6c | |||
| f07c9cbce8 | |||
| abdba6d68b | |||
| 3db26a762c | |||
| b35561f74e | |||
| f041d1435e | |||
| b781bd5148 | |||
| a16e0b7659 | |||
| 0bafd3fa98 | |||
| 903b320629 | |||
| 9cd05996e7 | |||
| 15dc76c4a7 | |||
| d46b5e008a | |||
| af4b718053 | |||
| 7eae1e127c | |||
| f97fed7daa | |||
| d764171c82 | |||
| d4a024ea75 | |||
| 491d89f117 | |||
| a86e8f7b58 | |||
| d97798d063 | |||
| 247759b364 | |||
| a745f803b3 | |||
| 1b67000887 | |||
| 04d3e31dbc | |||
| 9f29155d07 | |||
| 021411defa | |||
| ee4d78d2e1 | |||
| 9283c88b4e | |||
| 8d585439bb | |||
| ebf2b08bb1 | |||
| eb21c85170 | |||
| fb68a9f9fe | |||
| c7c3852747 | |||
| c18d0c918e | |||
| 9c4fd41eef | |||
| 9173dc936d | |||
| 77c8681b43 | |||
| 2db9440a38 | |||
| 0d585cfebf | |||
| 02a9bfb76f | |||
| ad7385c21f | |||
| dd6d91ac1d | |||
| 576d9c79be | |||
| 3fa5d9d9df | |||
| b14d30108a | |||
| 2b738f6f43 | |||
| 263fa18726 | |||
| f47bc411bc | |||
| 154fb7d9fd | |||
| 9f09af9f27 | |||
| 960e4a7cce | |||
| 50bff12364 | |||
| 7297b9aee0 | |||
| 3652705784 | |||
| 851f101470 | |||
| 6b87536d8d | |||
| d299a1f386 | |||
| c8350d2f0a | |||
| 7efdacc979 | |||
| c77afcc374 | |||
| b8fc44714c | |||
| 49a5ed6aa3 | |||
| 76c76b48c5 | |||
| 03bd23a3b9 | |||
| 61e6edb4c8 | |||
| aa78e930be | |||
| bb668dab29 | |||
| d1662d3535 | |||
| 12b136a2f4 | |||
| aa69815a31 | |||
| 3afa0ce0ab | |||
| 64a474c343 | |||
| 0907a3eb13 | |||
| c86ec0ae82 | |||
| ac76e07d9d | |||
| 56aed70425 | |||
| 63321a4ce3 | |||
| 66fb93ba88 | |||
| 299a009d61 | |||
| 4d395f4487 | |||
| 2f82aaf97b | |||
| 13ececf370 | |||
| b19d50ba62 | |||
| b763f0e5cd | |||
| 27f6f5158d | |||
| 61f1a34c14 | |||
| 8ad2503c5a | |||
| 1a96dc0cb9 | |||
| de810b7bd6 | |||
| 35aadb0e78 | |||
| 3a6b3a4064 | |||
| 9e78546b7e | |||
| ccae1a7311 | |||
| 54d0ba1876 | |||
| d1193e7aa1 | |||
| 6c6437e980 | |||
| 17e75c2951 | |||
| c4766f8f5b | |||
| 1a214cff4e | |||
| dd56b1c142 | |||
| 0ae63fa9ad | |||
| 5e4c5b0d47 | |||
| 58dcf7ba69 | |||
| 1de4304e30 | |||
| 96e8ef2b23 | |||
| dea29429bf | |||
| f5392f8b63 | |||
| 3ab57eea88 | |||
| f503e85507 | |||
| 0f19719a98 | |||
| ee7251c17c | |||
| 2780dacb48 | |||
| dea469d85e | |||
| 254649aa10 | |||
| 7badc531ce | |||
| 6d3b7c8543 | |||
| 4af5886649 | |||
| 38bcb6c482 | |||
| eb91ee1b35 | |||
|
|
2d1c86bc83 | ||
|
d66f7efb3c
|
|||
|
20bdf3af61
|
|||
| 49f5de26eb | |||
| 0cd1206f94 | |||
|
df82102798
|
|||
|
650dadc347
|
|||
| 504fc5c896 | |||
| 162f8e25fd | |||
|
e0900b6bd5
|
|||
|
f3540d54b4
|
|||
| d743cc66d8 | |||
| df4dcd7f32 | |||
| 3391d88460 | |||
| 5d75f1d298 | |||
| 73d482ebe2 | |||
| 9ed66db515 | |||
| c64a1beefa | |||
| 677ae06df8 | |||
| 5cc5369d2a | |||
| 5c16953d95 | |||
| f355cfc05e | |||
| 81cbf905ba | |||
| 2732595efe | |||
| 2ac215c19e | |||
| 6c3f305562 | |||
| fd82f5316c | |||
| a98176f513 | |||
| 886eb06880 | |||
| 581c7c937a | |||
| 5836c64e5e | |||
| badcfac616 | |||
| c2b8a8d6da | |||
| 430a41fefc | |||
| c74ba871cd | |||
| 9d9357b0ca | |||
| c4cb81a104 | |||
| b609ce8027 | |||
| 0bd45ec49b | |||
| 3625627072 | |||
| 8e744249de | |||
| cf5284a244 | |||
| 74c20c37b7 | |||
| b25ecf42fb | |||
| 5bb9477b5b | |||
| a27353d42d | |||
| 1327e46f61 | |||
| a516acbd44 | |||
| d3d2b4281c | |||
| ab0b6a7649 | |||
| 6e13ee173e | |||
| a786c928e0 | |||
| f52fc5332c | |||
| dc6fbbf7ec | |||
| a2256a3872 | |||
| d45996af28 | |||
| 05dcbca894 | |||
| 57ed40912d | |||
| 46751ab977 | |||
| e9128771db | |||
|
|
7bbffef237 | ||
|
|
cb59017ebb | ||
| ec8fbcb112 | |||
| 4c909e692a | |||
|
|
cfc9ed131a | ||
|
|
39d81dd23b | ||
| 56fd6b921e | |||
| 0fed454bb7 | |||
| cbac9f4253 | |||
|
|
d46f1a137a | ||
| 3d7e845213 | |||
| e9a7eaa276 | |||
|
|
e6a5010023 | ||
| 4994d0bf66 | |||
|
|
15a9d68a87 | ||
|
|
16c09ae6e9 | ||
| 702d1642e0 | |||
| d34374d4e0 | |||
| 398d760ba9 | |||
|
|
d9eac06749 | ||
|
|
f9c1ef5ba4 | ||
|
|
8738043dce | ||
| db5c4dcf3f | |||
| d3dd3fb32b | |||
|
|
21fa3baf4e | ||
|
|
b17510218b | ||
|
|
897f03f3d0 | ||
|
|
36ff427e0d | ||
|
|
2e1179e2fa | ||
| fffaf0726d | |||
| 6da1ec5acf | |||
| 16a8d37a8f | |||
|
|
93a1bf4f6d | ||
|
|
f08a07cab0 | ||
| 8a5e72c723 | |||
| 2163deb7ea | |||
| 4219372e68 | |||
| e48d0ebaab | |||
| 591c92b4bb | |||
| 6e81a419fb | |||
| ca403ca2c7 | |||
| e46cfdc4bd | |||
| 609ff8e9c8 | |||
| ea0df21726 | |||
| 3a5050b028 | |||
| 2cf561767f | |||
| 638f0e0181 | |||
| 359c7816bc | |||
| 9438ab4e53 | |||
| e5777dde6c | |||
| 3c7e117661 | |||
| 0f48c206c3 | |||
| 4a7f009cc6 | |||
| 001812f3ce | |||
|
|
bf87f631f0 | ||
| 44876836c5 | |||
| 2d2f6b254b | |||
| 858b54ce64 | |||
| 38840c5b7d | |||
| 499ee386a7 | |||
|
|
eddef26b5e | ||
|
|
db3de9904a | ||
|
|
3511fee459 | ||
|
|
5a1a381a32 | ||
|
|
1bc9f5ed19 | ||
|
|
8370351ff3 | ||
|
|
952020a3e2 | ||
|
|
b35ec1f30a | ||
| 443eeed38b | |||
| d1abfe5213 | |||
|
|
29e34bdd60 | ||
|
|
4635e9ba4f | ||
|
|
49e553c551 | ||
| 1abcf06bf6 | |||
| 2550f76376 | |||
| 24da7aa644 | |||
| 494faf862b | |||
| 67561636e5 | |||
|
|
00f7a24d54 | ||
|
|
f07f814b08 | ||
| fed2c5991d | |||
| ed888324b2 | |||
| 79efd9e15d | |||
|
|
1acd29e474 | ||
|
|
cd7af5e9b7 | ||
|
|
adef5b6781 | ||
|
|
2e9acdb1ba | ||
| f0fee0d78f | |||
|
|
60f4db61eb | ||
|
|
724a2dffcf | ||
| 616b384ad6 | |||
| 8abc9777cc | |||
| 8551499a5e | |||
|
|
1eaecb288f | ||
|
|
170a713357 | ||
| 54cc6c55b2 | |||
| e6aa3c34d4 | |||
| bb40f69298 | |||
| af9a9e78b9 | |||
| af1264e42b | |||
| eb91fbfc45 | |||
| 4ea5465637 | |||
| ab7769dd5a | |||
| b1e220e454 | |||
| 1b499530c5 | |||
| b0d48caaad | |||
|
|
d6758a8562 | ||
| 97734953dd | |||
| bd98583116 | |||
|
|
ddba8f401b | ||
|
|
4725eb96d6 | ||
|
|
38fd1b5dc4 | ||
| 16c12a2756 | |||
|
|
1b09ad5c27 | ||
|
|
00d06f71ba | ||
|
|
3873f0b03b | ||
| 6ae6e9a540 | |||
|
|
bbc83128b0 | ||
|
|
d13e68c206 | ||
| a4882dc054 | |||
|
|
a1b9b7e1d6 | ||
|
|
e1b89aeca8 | ||
| 242abaaf59 | |||
|
|
0116387fe3 | ||
|
|
f5953a0ba7 | ||
|
|
7aa407264f | ||
|
|
59c7896577 | ||
|
|
a69de63db0 | ||
| e96b399da7 | |||
| 33eefd7453 | |||
| c7fffe1280 | |||
| 1b04d7ecce | |||
| b66272a68a | |||
| 12c032392c | |||
| ffa25c18f0 | |||
| f67e3030b9 | |||
| 1028233553 | |||
| ef8ffcd02f | |||
| 0897a8369f | |||
| fa19ad1093 | |||
| 8c6b3613b6 | |||
| a23eb341e2 | |||
| b2bc385397 | |||
| 0f9e592273 | |||
| cf4f58ed95 | |||
| 0243f588bc | |||
| af77974e91 | |||
| a502182eba | |||
| d219baee27 | |||
| 0ea1e2c856 | |||
| 3107949e6f | |||
| 460519c075 | |||
| 9347ed2e55 | |||
| 5770adfd34 | |||
| 9714d8ea42 | |||
| 0b3ee4bb6a | |||
| d1f7065c8a | |||
| 6995c25613 | |||
| 28ce6e8f3f | |||
| 8d5730f715 | |||
| 2bbdbc3ac9 | |||
| 19e4eee222 | |||
| 4139d88103 | |||
| 6881fd13b7 | |||
|
|
8c8553a6af | ||
| 97dd19f0c7 | |||
|
|
784c949b1a | ||
|
|
7325e12e30 | ||
| 019ddbb80b | |||
|
|
6322b248a8 | ||
| c1c47b4869 | |||
| 2f4e73ef13 | |||
| 5ae1cfae87 | |||
| 5d82caf889 | |||
| d3028a3ce8 | |||
| eef4573a68 | |||
| 48f3b62540 | |||
|
|
f7d6302572 | ||
|
|
7ad520a1c3 | ||
|
|
4bc12989ca | ||
|
|
186c71d973 | ||
|
|
06c8e6af10 | ||
|
|
69b9589e84 | ||
|
|
5c767c5e3e | ||
|
|
9cd0389a0b | ||
|
|
a7ffc85404 | ||
|
|
2e9c3a1dbf | ||
| 9f581335d3 | |||
| e70e1c0203 | |||
| cb179de856 | |||
| 0aaa5ba890 | |||
| 0cf7fb9f25 | |||
| a304997177 | |||
| d6ba51e4bc | |||
| 892edb7d5b | |||
| c3cf0f3586 | |||
| 2dec17e871 | |||
| 88ffd602d6 | |||
| a57e51bdf8 | |||
| 44c52c40f1 | |||
| 6ecd04b0d8 | |||
| 964823b332 | |||
| ca8839f097 | |||
| 546ad6a744 | |||
| bd9ad16074 | |||
| 23907c7043 | |||
| a6cea11911 | |||
| cea7a7c121 | |||
| 095ecd254f | |||
| 2246b8b5fd | |||
| 914ff5355d | |||
| 858269a46b | |||
| 5072d8c915 | |||
| 5f8c04a78e | |||
| 41fb7cc40d | |||
| 6a399c7d39 | |||
| 90afc369f0 | |||
|
|
765ef7368e | ||
| 03384d02a0 | |||
| cf48c9ebf7 | |||
| dd3d42944e | |||
| c2e44dc3ba | |||
| c1c324a5a8 | |||
| 7f93ba55b4 | |||
| fcd871c0fc | |||
| 0c54709414 | |||
| 7a458c5cbe | |||
| 8da0469dbf | |||
| 38b75c85d7 | |||
| 833d0333d7 | |||
| e67a426ff2 | |||
| 39e4d9a73c | |||
|
|
91216c4b17 | ||
|
|
416b142889 | ||
|
|
cb4df7dc42 | ||
|
|
99a4546775 | ||
|
|
8a9864a91c | ||
| 6485d81025 | |||
|
|
db54d0b052 | ||
|
|
87105cff21 | ||
|
|
a489daa475 | ||
| c476a06e8c | |||
| 9deae168a6 | |||
|
|
93b881da1b | ||
|
|
8beaac5193 | ||
|
|
d4294e3d95 | ||
|
|
8c6db321cf | ||
|
|
35e68bcd0e | ||
|
|
f01ca5e5bf | ||
| bef0ac1194 | |||
| 9fa78a1dbf | |||
| 4b32101de6 | |||
|
|
6abec38856 | ||
|
|
d22af96bea | ||
| 7b65a59a6e | |||
| 42e253adc7 | |||
|
|
bb45a60d94 | ||
|
|
fa93c8a486 | ||
| 77e0423375 | |||
| c7e67b309e | |||
|
|
2ab2614ab4 | ||
|
|
5ed900c46c | ||
|
|
fb104a9f24 | ||
|
|
42bfe7c587 | ||
| 4f2218619c | |||
|
|
e4e16764f3 | ||
| 00830958df | |||
|
|
486fdf3dcd | ||
|
|
7cd824f3ab | ||
|
|
60c9d60079 | ||
| 397d273802 | |||
| a117844233 | |||
| d5c27b1181 | |||
| c90d06871e | |||
| fb282d405d | |||
|
|
f83d08cf56 | ||
|
|
0194e3f6b6 | ||
|
|
1edd9e4c55 | ||
|
|
aa417be1d3 | ||
|
|
7ab16b641d | ||
|
|
42fd417e34 | ||
|
|
c2ceb710c5 | ||
|
|
288062cfd6 | ||
|
|
9f1ae76d1e | ||
| 7c927da979 | |||
|
|
7a9c3f72ca | ||
| 35d340aaab | |||
| 5d98a7dd3a | |||
| 5393225538 | |||
| 831f202536 | |||
| 81b749caa2 | |||
| a50953ae7c | |||
| 08c5d15e6a | |||
| ff70bf6a0e | |||
| a315f7af25 | |||
|
|
4c7df57e66 | ||
|
|
7dee8f6313 | ||
|
|
0ad6ca6602 | ||
|
|
649850a1ba | ||
|
|
8bb1af8d2a | ||
| 7a0adf5e28 | |||
|
|
6241795b60 | ||
|
|
9d0451455a | ||
|
|
9a8b9c7141 | ||
| 866f71edb5 | |||
|
|
4bae04feec | ||
|
|
415160387b | ||
|
|
3dd0d0b6cf | ||
|
|
2596b119ac | ||
| 20e0771331 | |||
| 7a26ae7292 | |||
| cc4e1f48aa | |||
|
|
6e6305d2ec | ||
|
|
3e9cb2481c | ||
|
|
ea624f1223 | ||
|
|
e4aec3f95e | ||
| 7d83c6fe19 | |||
| 7ef4321a3d | |||
| 54b5372356 | |||
|
|
9bf1a11701 | ||
|
|
99280a40ef | ||
|
|
99061f6e24 | ||
|
|
2fd139b1e3 | ||
| ae29b4514c | |||
| 58de1ceafc | |||
| 20e6d1be99 | |||
| 2a877fbb6b | |||
| 2a6ebc9d8d | |||
|
|
c3c98b9d78 | ||
|
|
443e8b46a6 | ||
|
|
fff2aa468a | ||
| 1918e55a97 | |||
| eb6d378de2 | |||
| b1b174ba64 | |||
|
|
4921a3b0fd | ||
|
|
8296e9a32b | ||
|
|
7403ee67be | ||
|
|
cde2341c1f | ||
|
|
542f180d9d | ||
| 86130d7105 | |||
|
|
c9e329d27d | ||
|
|
d4c6c410da | ||
|
|
a7df53fbfe | ||
|
|
0504fa187e | ||
| 1d9a39f792 | |||
| cbdaabee4a | |||
| 3c8ccf357b | |||
|
|
92b20f6f46 | ||
| 04d7ed77d9 | |||
| a2a205cfd2 | |||
| d3b6597042 | |||
|
|
48c885e12a | ||
|
|
6e5a5a6ade | ||
| 21e03fc8cb | |||
| b85de0d704 | |||
|
|
f781cc3846 | ||
|
|
1cb3d4ffe9 | ||
|
|
9293706634 | ||
|
|
fde6bdf17f | ||
|
|
a1c1fd8339 | ||
|
|
01b39dc75f | ||
| 07ec32c969 | |||
| 042366e19e | |||
| 948a90fcd9 | |||
| c404688bbd | |||
| aa76a15f40 | |||
|
|
ca20785b53 | ||
|
|
13d0b2f960 | ||
|
|
46571ac39f | ||
| 36d770ea2e | |||
|
|
bc7d129a9e | ||
| 8accc28804 | |||
|
|
62e130f91b | ||
| a009221092 | |||
| dedc35b466 | |||
| 5e33587703 | |||
| 950f6830da | |||
|
|
19a8ca84e6 | ||
|
|
ece3fb1536 | ||
| 0d119502a8 | |||
| cc5951cfc3 | |||
| 61d42e0ac5 | |||
| 127935086c | |||
| 76ed60edf3 | |||
| 3b0a1c70fe | |||
| 186e07e45d | |||
| 047cff7d6e | |||
| 305275e3ac | |||
| efb0d5f4f9 | |||
| 991a074538 | |||
| f82b6c12ee | |||
| 00edfa4ef0 | |||
| 35a3ce6402 | |||
| be3ce454a0 | |||
| 4c85206cfa | |||
| 5ecdcbe46e | |||
| c937d7251a | |||
| 1cc46468c1 | |||
| 5cc8ef1eb0 | |||
| 99e135caa2 | |||
| 9de7045d63 | |||
| 0e65151e9f | |||
| 3c20728210 | |||
| 4c0530d89a | |||
| f5af8a1da9 | |||
| 44b9c37391 | |||
| 97b0b6fc0c | |||
| 1a2e9afaef | |||
| 39a3a23a24 | |||
| ee131921a0 | |||
| f8818c8537 | |||
| b07242226e |
224
.clang-format
Normal file
224
.clang-format
Normal file
@@ -0,0 +1,224 @@
|
|||||||
|
---
|
||||||
|
Language: Cpp
|
||||||
|
AccessModifierOffset: -4
|
||||||
|
AlignAfterOpenBracket: Align
|
||||||
|
AlignArrayOfStructures: Left
|
||||||
|
AlignConsecutiveAssignments:
|
||||||
|
Enabled: true
|
||||||
|
AcrossEmptyLines: true
|
||||||
|
AcrossComments: true
|
||||||
|
AlignCompound: false
|
||||||
|
PadOperators: true
|
||||||
|
AlignConsecutiveBitFields:
|
||||||
|
Enabled: true
|
||||||
|
AcrossEmptyLines: false
|
||||||
|
AcrossComments: true
|
||||||
|
AlignCompound: false
|
||||||
|
PadOperators: true
|
||||||
|
AlignConsecutiveDeclarations:
|
||||||
|
Enabled: false
|
||||||
|
AcrossEmptyLines: false
|
||||||
|
AcrossComments: false
|
||||||
|
AlignCompound: false
|
||||||
|
PadOperators: false
|
||||||
|
AlignConsecutiveMacros:
|
||||||
|
Enabled: true
|
||||||
|
AcrossEmptyLines: true
|
||||||
|
AcrossComments: true
|
||||||
|
AlignCompound: false
|
||||||
|
PadOperators: true
|
||||||
|
AlignEscapedNewlines: Left
|
||||||
|
AlignOperands: Align
|
||||||
|
AlignTrailingComments: true
|
||||||
|
AllowAllArgumentsOnNextLine: false
|
||||||
|
AllowAllParametersOfDeclarationOnNextLine: false
|
||||||
|
AllowShortEnumsOnASingleLine: false
|
||||||
|
AllowShortBlocksOnASingleLine: Empty
|
||||||
|
AllowShortCaseLabelsOnASingleLine: true
|
||||||
|
AllowShortFunctionsOnASingleLine: Inline
|
||||||
|
AllowShortLambdasOnASingleLine: All
|
||||||
|
AllowShortIfStatementsOnASingleLine: WithoutElse
|
||||||
|
AllowShortLoopsOnASingleLine: false
|
||||||
|
AlwaysBreakAfterDefinitionReturnType: None
|
||||||
|
AlwaysBreakAfterReturnType: None
|
||||||
|
AlwaysBreakBeforeMultilineStrings: false
|
||||||
|
AlwaysBreakTemplateDeclarations: Yes
|
||||||
|
AttributeMacros:
|
||||||
|
- __capability
|
||||||
|
BinPackArguments: false
|
||||||
|
BinPackParameters: false
|
||||||
|
BraceWrapping:
|
||||||
|
AfterCaseLabel: false
|
||||||
|
AfterClass: false
|
||||||
|
AfterControlStatement: Never
|
||||||
|
AfterEnum: false
|
||||||
|
AfterFunction: false
|
||||||
|
AfterNamespace: false
|
||||||
|
AfterObjCDeclaration: false
|
||||||
|
AfterStruct: false
|
||||||
|
AfterUnion: false
|
||||||
|
AfterExternBlock: false
|
||||||
|
BeforeCatch: false
|
||||||
|
BeforeElse: false
|
||||||
|
BeforeLambdaBody: false
|
||||||
|
BeforeWhile: false
|
||||||
|
IndentBraces: false
|
||||||
|
SplitEmptyFunction: false
|
||||||
|
SplitEmptyRecord: false
|
||||||
|
SplitEmptyNamespace: true
|
||||||
|
BreakBeforeBinaryOperators: None
|
||||||
|
BreakBeforeConceptDeclarations: Always
|
||||||
|
BreakBeforeBraces: Attach
|
||||||
|
BreakInheritanceList: BeforeComma
|
||||||
|
BreakBeforeTernaryOperators: true
|
||||||
|
BreakConstructorInitializers: BeforeComma
|
||||||
|
BreakAfterJavaFieldAnnotations: false
|
||||||
|
BreakStringLiterals: true
|
||||||
|
ColumnLimit: 140
|
||||||
|
CommentPragmas: '^ IWYU pragma:'
|
||||||
|
QualifierAlignment: Leave
|
||||||
|
CompactNamespaces: false
|
||||||
|
ConstructorInitializerIndentWidth: 4
|
||||||
|
ContinuationIndentWidth: 4
|
||||||
|
Cpp11BracedListStyle: true
|
||||||
|
DeriveLineEnding: false
|
||||||
|
DerivePointerAlignment: false
|
||||||
|
DisableFormat: false
|
||||||
|
EmptyLineAfterAccessModifier: Never
|
||||||
|
EmptyLineBeforeAccessModifier: Always
|
||||||
|
ExperimentalAutoDetectBinPacking: false
|
||||||
|
PackConstructorInitializers: CurrentLine
|
||||||
|
BasedOnStyle: ''
|
||||||
|
ConstructorInitializerAllOnOneLineOrOnePerLine: true
|
||||||
|
AllowAllConstructorInitializersOnNextLine: true
|
||||||
|
FixNamespaceComments: true
|
||||||
|
ForEachMacros:
|
||||||
|
- foreach
|
||||||
|
- Q_FOREACH
|
||||||
|
- BOOST_FOREACH
|
||||||
|
- piForeach
|
||||||
|
- piForeachC
|
||||||
|
- piForeachR
|
||||||
|
- piForeachRC
|
||||||
|
- piForeachCR
|
||||||
|
IfMacros:
|
||||||
|
- KJ_IF_MAYBE
|
||||||
|
IncludeBlocks: Regroup
|
||||||
|
IncludeCategories:
|
||||||
|
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
|
||||||
|
Priority: 2
|
||||||
|
SortPriority: 0
|
||||||
|
CaseSensitive: false
|
||||||
|
- Regex: '^(<|"(gtest|gmock|isl|json)/)'
|
||||||
|
Priority: 3
|
||||||
|
SortPriority: 0
|
||||||
|
CaseSensitive: false
|
||||||
|
- Regex: '.*'
|
||||||
|
Priority: 1
|
||||||
|
SortPriority: 0
|
||||||
|
CaseSensitive: false
|
||||||
|
IncludeIsMainRegex: '(Test)?$'
|
||||||
|
IncludeIsMainSourceRegex: ''
|
||||||
|
IndentAccessModifiers: false
|
||||||
|
IndentCaseLabels: false
|
||||||
|
IndentCaseBlocks: false
|
||||||
|
IndentGotoLabels: false
|
||||||
|
IndentPPDirectives: AfterHash
|
||||||
|
IndentExternBlock: NoIndent
|
||||||
|
IndentRequiresClause: true
|
||||||
|
IndentWidth: 4
|
||||||
|
IndentWrappedFunctionNames: false
|
||||||
|
InsertBraces: false
|
||||||
|
InsertTrailingCommas: Wrapped
|
||||||
|
JavaScriptQuotes: Leave
|
||||||
|
JavaScriptWrapImports: true
|
||||||
|
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||||
|
LambdaBodyIndentation: Signature
|
||||||
|
MacroBlockBegin: "PRIVATE_DEFINITION_START|STATIC_INITIALIZER_BEGIN"
|
||||||
|
MacroBlockEnd: "PRIVATE_DEFINITION_END|STATIC_INITIALIZER_END"
|
||||||
|
MaxEmptyLinesToKeep: 2
|
||||||
|
NamespaceIndentation: None
|
||||||
|
ObjCBinPackProtocolList: Auto
|
||||||
|
ObjCBlockIndentWidth: 2
|
||||||
|
ObjCBreakBeforeNestedBlockParam: true
|
||||||
|
ObjCSpaceAfterProperty: false
|
||||||
|
ObjCSpaceBeforeProtocolList: true
|
||||||
|
PenaltyBreakAssignment: 2
|
||||||
|
PenaltyBreakBeforeFirstCallParameter: 19
|
||||||
|
PenaltyBreakComment: 300
|
||||||
|
PenaltyBreakFirstLessLess: 120
|
||||||
|
PenaltyBreakOpenParenthesis: 0
|
||||||
|
PenaltyBreakString: 1000
|
||||||
|
PenaltyBreakTemplateDeclaration: 10
|
||||||
|
PenaltyExcessCharacter: 1000000
|
||||||
|
PenaltyReturnTypeOnItsOwnLine: 60
|
||||||
|
PenaltyIndentedWhitespace: 0
|
||||||
|
PointerAlignment: Middle
|
||||||
|
PPIndentWidth: 2
|
||||||
|
ReferenceAlignment: Middle
|
||||||
|
ReflowComments: true
|
||||||
|
RemoveBracesLLVM: false
|
||||||
|
RequiresClausePosition: OwnLine
|
||||||
|
SeparateDefinitionBlocks: Leave
|
||||||
|
ShortNamespaceLines: 1
|
||||||
|
SortIncludes: CaseSensitive
|
||||||
|
SortJavaStaticImport: Before
|
||||||
|
SortUsingDeclarations: true
|
||||||
|
SpaceAfterCStyleCast: false
|
||||||
|
SpaceAfterLogicalNot: false
|
||||||
|
SpaceAfterTemplateKeyword: false
|
||||||
|
SpaceBeforeAssignmentOperators: true
|
||||||
|
SpaceBeforeCaseColon: false
|
||||||
|
SpaceBeforeCpp11BracedList: false
|
||||||
|
SpaceBeforeCtorInitializerColon: false
|
||||||
|
SpaceBeforeInheritanceColon: false
|
||||||
|
SpaceBeforeParens: ControlStatementsExceptControlMacros
|
||||||
|
SpaceBeforeParensOptions:
|
||||||
|
AfterControlStatements: true
|
||||||
|
AfterForeachMacros: false
|
||||||
|
AfterFunctionDefinitionName: false
|
||||||
|
AfterFunctionDeclarationName: false
|
||||||
|
AfterIfMacros: false
|
||||||
|
AfterOverloadedOperator: false
|
||||||
|
AfterRequiresInClause: false
|
||||||
|
AfterRequiresInExpression: false
|
||||||
|
BeforeNonEmptyParentheses: false
|
||||||
|
SpaceAroundPointerQualifiers: Both
|
||||||
|
SpaceBeforeRangeBasedForLoopColon: false
|
||||||
|
SpaceInEmptyBlock: false
|
||||||
|
SpaceInEmptyParentheses: false
|
||||||
|
SpacesBeforeTrailingComments: 1
|
||||||
|
SpacesInAngles: Never
|
||||||
|
SpacesInConditionalStatement: false
|
||||||
|
SpacesInContainerLiterals: false
|
||||||
|
SpacesInCStyleCastParentheses: false
|
||||||
|
SpacesInLineCommentPrefix:
|
||||||
|
Minimum: 1
|
||||||
|
Maximum: -1
|
||||||
|
SpacesInParentheses: false
|
||||||
|
SpacesInSquareBrackets: false
|
||||||
|
SpaceBeforeSquareBrackets: false
|
||||||
|
BitFieldColonSpacing: After
|
||||||
|
Standard: c++11
|
||||||
|
StatementAttributeLikeMacros:
|
||||||
|
- Q_EMIT
|
||||||
|
- PIMETA
|
||||||
|
StatementMacros:
|
||||||
|
- Q_UNUSED
|
||||||
|
- QT_REQUIRE_VERSION
|
||||||
|
- PRIVATE_DECLARATION
|
||||||
|
- NO_COPY_CLASS
|
||||||
|
- FOREVER_WAIT
|
||||||
|
- WAIT_FOREVER
|
||||||
|
TabWidth: 4
|
||||||
|
UseCRLF: false
|
||||||
|
UseTab: AlignWithSpaces
|
||||||
|
WhitespaceSensitiveMacros:
|
||||||
|
- STRINGIZE
|
||||||
|
- PP_STRINGIZE
|
||||||
|
- BOOST_PP_STRINGIZE
|
||||||
|
- NS_SWIFT_NAME
|
||||||
|
- CF_SWIFT_NAME
|
||||||
|
- PIMETA
|
||||||
|
...
|
||||||
|
|
||||||
6
.editorconfig
Normal file
6
.editorconfig
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
root = true
|
||||||
|
|
||||||
|
[*.{h,c,cpp}]
|
||||||
|
charset = utf-8
|
||||||
|
indent_style = tab
|
||||||
|
tab_width = 4
|
||||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -2,4 +2,7 @@
|
|||||||
/.svn
|
/.svn
|
||||||
/doc/rtf
|
/doc/rtf
|
||||||
_unsused
|
_unsused
|
||||||
CMakeLists.txt.user*
|
CMakeLists.txt.user*
|
||||||
|
/include
|
||||||
|
/release
|
||||||
|
/build*
|
||||||
|
|||||||
160
3rd/BLAKE2/blake2-impl.h
Normal file
160
3rd/BLAKE2/blake2-impl.h
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
/*
|
||||||
|
BLAKE2 reference source code package - reference C implementations
|
||||||
|
|
||||||
|
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
|
||||||
|
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
|
||||||
|
your option. The terms of these licenses can be found at:
|
||||||
|
|
||||||
|
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||||
|
- OpenSSL license : https://www.openssl.org/source/license.html
|
||||||
|
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
More information about the BLAKE2 hash function can be found at
|
||||||
|
https://blake2.net.
|
||||||
|
*/
|
||||||
|
#ifndef BLAKE2_IMPL_H
|
||||||
|
#define BLAKE2_IMPL_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L)
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
#define BLAKE2_INLINE __inline
|
||||||
|
#elif defined(__GNUC__)
|
||||||
|
#define BLAKE2_INLINE __inline__
|
||||||
|
#else
|
||||||
|
#define BLAKE2_INLINE
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#define BLAKE2_INLINE inline
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static BLAKE2_INLINE uint32_t load32( const void *src )
|
||||||
|
{
|
||||||
|
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||||
|
uint32_t w;
|
||||||
|
memcpy(&w, src, sizeof w);
|
||||||
|
return w;
|
||||||
|
#else
|
||||||
|
const uint8_t *p = ( const uint8_t * )src;
|
||||||
|
return (( uint32_t )( p[0] ) << 0) |
|
||||||
|
(( uint32_t )( p[1] ) << 8) |
|
||||||
|
(( uint32_t )( p[2] ) << 16) |
|
||||||
|
(( uint32_t )( p[3] ) << 24) ;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static BLAKE2_INLINE uint64_t load64( const void *src )
|
||||||
|
{
|
||||||
|
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||||
|
uint64_t w;
|
||||||
|
memcpy(&w, src, sizeof w);
|
||||||
|
return w;
|
||||||
|
#else
|
||||||
|
const uint8_t *p = ( const uint8_t * )src;
|
||||||
|
return (( uint64_t )( p[0] ) << 0) |
|
||||||
|
(( uint64_t )( p[1] ) << 8) |
|
||||||
|
(( uint64_t )( p[2] ) << 16) |
|
||||||
|
(( uint64_t )( p[3] ) << 24) |
|
||||||
|
(( uint64_t )( p[4] ) << 32) |
|
||||||
|
(( uint64_t )( p[5] ) << 40) |
|
||||||
|
(( uint64_t )( p[6] ) << 48) |
|
||||||
|
(( uint64_t )( p[7] ) << 56) ;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static BLAKE2_INLINE uint16_t load16( const void *src )
|
||||||
|
{
|
||||||
|
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||||
|
uint16_t w;
|
||||||
|
memcpy(&w, src, sizeof w);
|
||||||
|
return w;
|
||||||
|
#else
|
||||||
|
const uint8_t *p = ( const uint8_t * )src;
|
||||||
|
return ( uint16_t )((( uint32_t )( p[0] ) << 0) |
|
||||||
|
(( uint32_t )( p[1] ) << 8));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static BLAKE2_INLINE void store16( void *dst, uint16_t w )
|
||||||
|
{
|
||||||
|
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||||
|
memcpy(dst, &w, sizeof w);
|
||||||
|
#else
|
||||||
|
uint8_t *p = ( uint8_t * )dst;
|
||||||
|
*p++ = ( uint8_t )w; w >>= 8;
|
||||||
|
*p++ = ( uint8_t )w;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static BLAKE2_INLINE void store32( void *dst, uint32_t w )
|
||||||
|
{
|
||||||
|
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||||
|
memcpy(dst, &w, sizeof w);
|
||||||
|
#else
|
||||||
|
uint8_t *p = ( uint8_t * )dst;
|
||||||
|
p[0] = (uint8_t)(w >> 0);
|
||||||
|
p[1] = (uint8_t)(w >> 8);
|
||||||
|
p[2] = (uint8_t)(w >> 16);
|
||||||
|
p[3] = (uint8_t)(w >> 24);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static BLAKE2_INLINE void store64( void *dst, uint64_t w )
|
||||||
|
{
|
||||||
|
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||||
|
memcpy(dst, &w, sizeof w);
|
||||||
|
#else
|
||||||
|
uint8_t *p = ( uint8_t * )dst;
|
||||||
|
p[0] = (uint8_t)(w >> 0);
|
||||||
|
p[1] = (uint8_t)(w >> 8);
|
||||||
|
p[2] = (uint8_t)(w >> 16);
|
||||||
|
p[3] = (uint8_t)(w >> 24);
|
||||||
|
p[4] = (uint8_t)(w >> 32);
|
||||||
|
p[5] = (uint8_t)(w >> 40);
|
||||||
|
p[6] = (uint8_t)(w >> 48);
|
||||||
|
p[7] = (uint8_t)(w >> 56);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static BLAKE2_INLINE uint64_t load48( const void *src )
|
||||||
|
{
|
||||||
|
const uint8_t *p = ( const uint8_t * )src;
|
||||||
|
return (( uint64_t )( p[0] ) << 0) |
|
||||||
|
(( uint64_t )( p[1] ) << 8) |
|
||||||
|
(( uint64_t )( p[2] ) << 16) |
|
||||||
|
(( uint64_t )( p[3] ) << 24) |
|
||||||
|
(( uint64_t )( p[4] ) << 32) |
|
||||||
|
(( uint64_t )( p[5] ) << 40) ;
|
||||||
|
}
|
||||||
|
|
||||||
|
static BLAKE2_INLINE void store48( void *dst, uint64_t w )
|
||||||
|
{
|
||||||
|
uint8_t *p = ( uint8_t * )dst;
|
||||||
|
p[0] = (uint8_t)(w >> 0);
|
||||||
|
p[1] = (uint8_t)(w >> 8);
|
||||||
|
p[2] = (uint8_t)(w >> 16);
|
||||||
|
p[3] = (uint8_t)(w >> 24);
|
||||||
|
p[4] = (uint8_t)(w >> 32);
|
||||||
|
p[5] = (uint8_t)(w >> 40);
|
||||||
|
}
|
||||||
|
|
||||||
|
static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c )
|
||||||
|
{
|
||||||
|
return ( w >> c ) | ( w << ( 32 - c ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c )
|
||||||
|
{
|
||||||
|
return ( w >> c ) | ( w << ( 64 - c ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* prevents compiler optimizing out memset() */
|
||||||
|
static BLAKE2_INLINE void secure_zero_memory(void *v, size_t n)
|
||||||
|
{
|
||||||
|
static void *(*const volatile memset_v)(void *, int, size_t) = &memset;
|
||||||
|
memset_v(v, 0, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
195
3rd/BLAKE2/blake2.h
Normal file
195
3rd/BLAKE2/blake2.h
Normal file
@@ -0,0 +1,195 @@
|
|||||||
|
/*
|
||||||
|
BLAKE2 reference source code package - reference C implementations
|
||||||
|
|
||||||
|
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
|
||||||
|
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
|
||||||
|
your option. The terms of these licenses can be found at:
|
||||||
|
|
||||||
|
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||||
|
- OpenSSL license : https://www.openssl.org/source/license.html
|
||||||
|
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
More information about the BLAKE2 hash function can be found at
|
||||||
|
https://blake2.net.
|
||||||
|
*/
|
||||||
|
#ifndef BLAKE2_H
|
||||||
|
#define BLAKE2_H
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
#define BLAKE2_PACKED(x) __pragma(pack(push, 1)) x __pragma(pack(pop))
|
||||||
|
#else
|
||||||
|
#define BLAKE2_PACKED(x) x __attribute__((packed))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
enum blake2s_constant
|
||||||
|
{
|
||||||
|
BLAKE2S_BLOCKBYTES = 64,
|
||||||
|
BLAKE2S_OUTBYTES = 32,
|
||||||
|
BLAKE2S_KEYBYTES = 32,
|
||||||
|
BLAKE2S_SALTBYTES = 8,
|
||||||
|
BLAKE2S_PERSONALBYTES = 8
|
||||||
|
};
|
||||||
|
|
||||||
|
enum blake2b_constant
|
||||||
|
{
|
||||||
|
BLAKE2B_BLOCKBYTES = 128,
|
||||||
|
BLAKE2B_OUTBYTES = 64,
|
||||||
|
BLAKE2B_KEYBYTES = 64,
|
||||||
|
BLAKE2B_SALTBYTES = 16,
|
||||||
|
BLAKE2B_PERSONALBYTES = 16
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct blake2s_state__
|
||||||
|
{
|
||||||
|
uint32_t h[8];
|
||||||
|
uint32_t t[2];
|
||||||
|
uint32_t f[2];
|
||||||
|
uint8_t buf[BLAKE2S_BLOCKBYTES];
|
||||||
|
size_t buflen;
|
||||||
|
size_t outlen;
|
||||||
|
uint8_t last_node;
|
||||||
|
} blake2s_state;
|
||||||
|
|
||||||
|
typedef struct blake2b_state__
|
||||||
|
{
|
||||||
|
uint64_t h[8];
|
||||||
|
uint64_t t[2];
|
||||||
|
uint64_t f[2];
|
||||||
|
uint8_t buf[BLAKE2B_BLOCKBYTES];
|
||||||
|
size_t buflen;
|
||||||
|
size_t outlen;
|
||||||
|
uint8_t last_node;
|
||||||
|
} blake2b_state;
|
||||||
|
|
||||||
|
typedef struct blake2sp_state__
|
||||||
|
{
|
||||||
|
blake2s_state S[8][1];
|
||||||
|
blake2s_state R[1];
|
||||||
|
uint8_t buf[8 * BLAKE2S_BLOCKBYTES];
|
||||||
|
size_t buflen;
|
||||||
|
size_t outlen;
|
||||||
|
} blake2sp_state;
|
||||||
|
|
||||||
|
typedef struct blake2bp_state__
|
||||||
|
{
|
||||||
|
blake2b_state S[4][1];
|
||||||
|
blake2b_state R[1];
|
||||||
|
uint8_t buf[4 * BLAKE2B_BLOCKBYTES];
|
||||||
|
size_t buflen;
|
||||||
|
size_t outlen;
|
||||||
|
} blake2bp_state;
|
||||||
|
|
||||||
|
|
||||||
|
BLAKE2_PACKED(struct blake2s_param__
|
||||||
|
{
|
||||||
|
uint8_t digest_length; /* 1 */
|
||||||
|
uint8_t key_length; /* 2 */
|
||||||
|
uint8_t fanout; /* 3 */
|
||||||
|
uint8_t depth; /* 4 */
|
||||||
|
uint32_t leaf_length; /* 8 */
|
||||||
|
uint32_t node_offset; /* 12 */
|
||||||
|
uint16_t xof_length; /* 14 */
|
||||||
|
uint8_t node_depth; /* 15 */
|
||||||
|
uint8_t inner_length; /* 16 */
|
||||||
|
/* uint8_t reserved[0]; */
|
||||||
|
uint8_t salt[BLAKE2S_SALTBYTES]; /* 24 */
|
||||||
|
uint8_t personal[BLAKE2S_PERSONALBYTES]; /* 32 */
|
||||||
|
});
|
||||||
|
|
||||||
|
typedef struct blake2s_param__ blake2s_param;
|
||||||
|
|
||||||
|
BLAKE2_PACKED(struct blake2b_param__
|
||||||
|
{
|
||||||
|
uint8_t digest_length; /* 1 */
|
||||||
|
uint8_t key_length; /* 2 */
|
||||||
|
uint8_t fanout; /* 3 */
|
||||||
|
uint8_t depth; /* 4 */
|
||||||
|
uint32_t leaf_length; /* 8 */
|
||||||
|
uint32_t node_offset; /* 12 */
|
||||||
|
uint32_t xof_length; /* 16 */
|
||||||
|
uint8_t node_depth; /* 17 */
|
||||||
|
uint8_t inner_length; /* 18 */
|
||||||
|
uint8_t reserved[14]; /* 32 */
|
||||||
|
uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
|
||||||
|
uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
|
||||||
|
});
|
||||||
|
|
||||||
|
typedef struct blake2b_param__ blake2b_param;
|
||||||
|
|
||||||
|
typedef struct blake2xs_state__
|
||||||
|
{
|
||||||
|
blake2s_state S[1];
|
||||||
|
blake2s_param P[1];
|
||||||
|
} blake2xs_state;
|
||||||
|
|
||||||
|
typedef struct blake2xb_state__
|
||||||
|
{
|
||||||
|
blake2b_state S[1];
|
||||||
|
blake2b_param P[1];
|
||||||
|
} blake2xb_state;
|
||||||
|
|
||||||
|
/* Padded structs result in a compile-time error */
|
||||||
|
enum {
|
||||||
|
BLAKE2_DUMMY_1 = 1/(int)(sizeof(blake2s_param) == BLAKE2S_OUTBYTES),
|
||||||
|
BLAKE2_DUMMY_2 = 1/(int)(sizeof(blake2b_param) == BLAKE2B_OUTBYTES)
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Streaming API */
|
||||||
|
int blake2s_init( blake2s_state *S, size_t outlen );
|
||||||
|
int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen );
|
||||||
|
int blake2s_init_param( blake2s_state *S, const blake2s_param *P );
|
||||||
|
int blake2s_update( blake2s_state *S, const void *in, size_t inlen );
|
||||||
|
int blake2s_final( blake2s_state *S, void *out, size_t outlen );
|
||||||
|
|
||||||
|
int blake2b_init( blake2b_state *S, size_t outlen );
|
||||||
|
int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
|
||||||
|
int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
|
||||||
|
int blake2b_update( blake2b_state *S, const void *in, size_t inlen );
|
||||||
|
int blake2b_final( blake2b_state *S, void *out, size_t outlen );
|
||||||
|
|
||||||
|
int blake2sp_init( blake2sp_state *S, size_t outlen );
|
||||||
|
int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen );
|
||||||
|
int blake2sp_update( blake2sp_state *S, const void *in, size_t inlen );
|
||||||
|
int blake2sp_final( blake2sp_state *S, void *out, size_t outlen );
|
||||||
|
|
||||||
|
int blake2bp_init( blake2bp_state *S, size_t outlen );
|
||||||
|
int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen );
|
||||||
|
int blake2bp_update( blake2bp_state *S, const void *in, size_t inlen );
|
||||||
|
int blake2bp_final( blake2bp_state *S, void *out, size_t outlen );
|
||||||
|
|
||||||
|
/* Variable output length API */
|
||||||
|
int blake2xs_init( blake2xs_state *S, const size_t outlen );
|
||||||
|
int blake2xs_init_key( blake2xs_state *S, const size_t outlen, const void *key, size_t keylen );
|
||||||
|
int blake2xs_update( blake2xs_state *S, const void *in, size_t inlen );
|
||||||
|
int blake2xs_final(blake2xs_state *S, void *out, size_t outlen);
|
||||||
|
|
||||||
|
int blake2xb_init( blake2xb_state *S, const size_t outlen );
|
||||||
|
int blake2xb_init_key( blake2xb_state *S, const size_t outlen, const void *key, size_t keylen );
|
||||||
|
int blake2xb_update( blake2xb_state *S, const void *in, size_t inlen );
|
||||||
|
int blake2xb_final(blake2xb_state *S, void *out, size_t outlen);
|
||||||
|
|
||||||
|
/* Simple API */
|
||||||
|
int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
|
||||||
|
int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
|
||||||
|
|
||||||
|
int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
|
||||||
|
int blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
|
||||||
|
|
||||||
|
int blake2xs( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
|
||||||
|
int blake2xb( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
|
||||||
|
|
||||||
|
/* This is simply an alias for blake2b */
|
||||||
|
int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
379
3rd/BLAKE2/blake2b-ref.c
Normal file
379
3rd/BLAKE2/blake2b-ref.c
Normal file
@@ -0,0 +1,379 @@
|
|||||||
|
/*
|
||||||
|
BLAKE2 reference source code package - reference C implementations
|
||||||
|
|
||||||
|
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
|
||||||
|
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
|
||||||
|
your option. The terms of these licenses can be found at:
|
||||||
|
|
||||||
|
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||||
|
- OpenSSL license : https://www.openssl.org/source/license.html
|
||||||
|
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
More information about the BLAKE2 hash function can be found at
|
||||||
|
https://blake2.net.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "blake2.h"
|
||||||
|
#include "blake2-impl.h"
|
||||||
|
|
||||||
|
static const uint64_t blake2b_IV[8] =
|
||||||
|
{
|
||||||
|
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
||||||
|
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
|
||||||
|
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
|
||||||
|
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
|
||||||
|
};
|
||||||
|
|
||||||
|
static const uint8_t blake2b_sigma[12][16] =
|
||||||
|
{
|
||||||
|
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||||||
|
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
|
||||||
|
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
|
||||||
|
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
|
||||||
|
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
|
||||||
|
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
|
||||||
|
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
|
||||||
|
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
|
||||||
|
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
|
||||||
|
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
|
||||||
|
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||||||
|
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static void blake2b_set_lastnode( blake2b_state *S )
|
||||||
|
{
|
||||||
|
S->f[1] = (uint64_t)-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Some helper functions, not necessarily useful */
|
||||||
|
static int blake2b_is_lastblock( const blake2b_state *S )
|
||||||
|
{
|
||||||
|
return S->f[0] != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void blake2b_set_lastblock( blake2b_state *S )
|
||||||
|
{
|
||||||
|
if( S->last_node ) blake2b_set_lastnode( S );
|
||||||
|
|
||||||
|
S->f[0] = (uint64_t)-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
|
||||||
|
{
|
||||||
|
S->t[0] += inc;
|
||||||
|
S->t[1] += ( S->t[0] < inc );
|
||||||
|
}
|
||||||
|
|
||||||
|
static void blake2b_init0( blake2b_state *S )
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
memset( S, 0, sizeof( blake2b_state ) );
|
||||||
|
|
||||||
|
for( i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* init xors IV with input parameter block */
|
||||||
|
int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
|
||||||
|
{
|
||||||
|
const uint8_t *p = ( const uint8_t * )( P );
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
blake2b_init0( S );
|
||||||
|
|
||||||
|
/* IV XOR ParamBlock */
|
||||||
|
for( i = 0; i < 8; ++i )
|
||||||
|
S->h[i] ^= load64( p + sizeof( S->h[i] ) * i );
|
||||||
|
|
||||||
|
S->outlen = P->digest_length;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int blake2b_init( blake2b_state *S, size_t outlen )
|
||||||
|
{
|
||||||
|
blake2b_param P[1];
|
||||||
|
|
||||||
|
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
|
||||||
|
|
||||||
|
P->digest_length = (uint8_t)outlen;
|
||||||
|
P->key_length = 0;
|
||||||
|
P->fanout = 1;
|
||||||
|
P->depth = 1;
|
||||||
|
store32( &P->leaf_length, 0 );
|
||||||
|
store32( &P->node_offset, 0 );
|
||||||
|
store32( &P->xof_length, 0 );
|
||||||
|
P->node_depth = 0;
|
||||||
|
P->inner_length = 0;
|
||||||
|
memset( P->reserved, 0, sizeof( P->reserved ) );
|
||||||
|
memset( P->salt, 0, sizeof( P->salt ) );
|
||||||
|
memset( P->personal, 0, sizeof( P->personal ) );
|
||||||
|
return blake2b_init_param( S, P );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen )
|
||||||
|
{
|
||||||
|
blake2b_param P[1];
|
||||||
|
|
||||||
|
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
|
||||||
|
|
||||||
|
if ( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1;
|
||||||
|
|
||||||
|
P->digest_length = (uint8_t)outlen;
|
||||||
|
P->key_length = (uint8_t)keylen;
|
||||||
|
P->fanout = 1;
|
||||||
|
P->depth = 1;
|
||||||
|
store32( &P->leaf_length, 0 );
|
||||||
|
store32( &P->node_offset, 0 );
|
||||||
|
store32( &P->xof_length, 0 );
|
||||||
|
P->node_depth = 0;
|
||||||
|
P->inner_length = 0;
|
||||||
|
memset( P->reserved, 0, sizeof( P->reserved ) );
|
||||||
|
memset( P->salt, 0, sizeof( P->salt ) );
|
||||||
|
memset( P->personal, 0, sizeof( P->personal ) );
|
||||||
|
|
||||||
|
if( blake2b_init_param( S, P ) < 0 ) return -1;
|
||||||
|
|
||||||
|
{
|
||||||
|
uint8_t block[BLAKE2B_BLOCKBYTES];
|
||||||
|
memset( block, 0, BLAKE2B_BLOCKBYTES );
|
||||||
|
memcpy( block, key, keylen );
|
||||||
|
blake2b_update( S, block, BLAKE2B_BLOCKBYTES );
|
||||||
|
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define G(r,i,a,b,c,d) \
|
||||||
|
do { \
|
||||||
|
a = a + b + m[blake2b_sigma[r][2*i+0]]; \
|
||||||
|
d = rotr64(d ^ a, 32); \
|
||||||
|
c = c + d; \
|
||||||
|
b = rotr64(b ^ c, 24); \
|
||||||
|
a = a + b + m[blake2b_sigma[r][2*i+1]]; \
|
||||||
|
d = rotr64(d ^ a, 16); \
|
||||||
|
c = c + d; \
|
||||||
|
b = rotr64(b ^ c, 63); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define ROUND(r) \
|
||||||
|
do { \
|
||||||
|
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
|
||||||
|
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
|
||||||
|
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
|
||||||
|
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
|
||||||
|
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
|
||||||
|
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
|
||||||
|
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
|
||||||
|
G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
|
||||||
|
{
|
||||||
|
uint64_t m[16];
|
||||||
|
uint64_t v[16];
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
for( i = 0; i < 16; ++i ) {
|
||||||
|
m[i] = load64( block + i * sizeof( m[i] ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
for( i = 0; i < 8; ++i ) {
|
||||||
|
v[i] = S->h[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
v[ 8] = blake2b_IV[0];
|
||||||
|
v[ 9] = blake2b_IV[1];
|
||||||
|
v[10] = blake2b_IV[2];
|
||||||
|
v[11] = blake2b_IV[3];
|
||||||
|
v[12] = blake2b_IV[4] ^ S->t[0];
|
||||||
|
v[13] = blake2b_IV[5] ^ S->t[1];
|
||||||
|
v[14] = blake2b_IV[6] ^ S->f[0];
|
||||||
|
v[15] = blake2b_IV[7] ^ S->f[1];
|
||||||
|
|
||||||
|
ROUND( 0 );
|
||||||
|
ROUND( 1 );
|
||||||
|
ROUND( 2 );
|
||||||
|
ROUND( 3 );
|
||||||
|
ROUND( 4 );
|
||||||
|
ROUND( 5 );
|
||||||
|
ROUND( 6 );
|
||||||
|
ROUND( 7 );
|
||||||
|
ROUND( 8 );
|
||||||
|
ROUND( 9 );
|
||||||
|
ROUND( 10 );
|
||||||
|
ROUND( 11 );
|
||||||
|
|
||||||
|
for( i = 0; i < 8; ++i ) {
|
||||||
|
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef G
|
||||||
|
#undef ROUND
|
||||||
|
|
||||||
|
int blake2b_update( blake2b_state *S, const void *pin, size_t inlen )
|
||||||
|
{
|
||||||
|
const unsigned char * in = (const unsigned char *)pin;
|
||||||
|
if( inlen > 0 )
|
||||||
|
{
|
||||||
|
size_t left = S->buflen;
|
||||||
|
size_t fill = BLAKE2B_BLOCKBYTES - left;
|
||||||
|
if( inlen > fill )
|
||||||
|
{
|
||||||
|
S->buflen = 0;
|
||||||
|
memcpy( S->buf + left, in, fill ); /* Fill buffer */
|
||||||
|
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
|
||||||
|
blake2b_compress( S, S->buf ); /* Compress */
|
||||||
|
in += fill; inlen -= fill;
|
||||||
|
while(inlen > BLAKE2B_BLOCKBYTES) {
|
||||||
|
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
||||||
|
blake2b_compress( S, in );
|
||||||
|
in += BLAKE2B_BLOCKBYTES;
|
||||||
|
inlen -= BLAKE2B_BLOCKBYTES;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
memcpy( S->buf + S->buflen, in, inlen );
|
||||||
|
S->buflen += inlen;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int blake2b_final( blake2b_state *S, void *out, size_t outlen )
|
||||||
|
{
|
||||||
|
uint8_t buffer[BLAKE2B_OUTBYTES] = {0};
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
if( out == NULL || outlen < S->outlen )
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if( blake2b_is_lastblock( S ) )
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
blake2b_increment_counter( S, S->buflen );
|
||||||
|
blake2b_set_lastblock( S );
|
||||||
|
memset( S->buf + S->buflen, 0, BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
|
||||||
|
blake2b_compress( S, S->buf );
|
||||||
|
|
||||||
|
for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
|
||||||
|
store64( buffer + sizeof( S->h[i] ) * i, S->h[i] );
|
||||||
|
|
||||||
|
memcpy( out, buffer, S->outlen );
|
||||||
|
secure_zero_memory(buffer, sizeof(buffer));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* inlen, at least, should be uint64_t. Others can be size_t. */
|
||||||
|
int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
|
||||||
|
{
|
||||||
|
blake2b_state S[1];
|
||||||
|
|
||||||
|
/* Verify parameters */
|
||||||
|
if ( NULL == in && inlen > 0 ) return -1;
|
||||||
|
|
||||||
|
if ( NULL == out ) return -1;
|
||||||
|
|
||||||
|
if( NULL == key && keylen > 0 ) return -1;
|
||||||
|
|
||||||
|
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
|
||||||
|
|
||||||
|
if( keylen > BLAKE2B_KEYBYTES ) return -1;
|
||||||
|
|
||||||
|
if( keylen > 0 )
|
||||||
|
{
|
||||||
|
if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if( blake2b_init( S, outlen ) < 0 ) return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
blake2b_update( S, ( const uint8_t * )in, inlen );
|
||||||
|
blake2b_final( S, out, outlen );
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) {
|
||||||
|
return blake2b(out, outlen, in, inlen, key, keylen);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(SUPERCOP)
|
||||||
|
int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
|
||||||
|
{
|
||||||
|
return blake2b( out, BLAKE2B_OUTBYTES, in, inlen, NULL, 0 );
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(BLAKE2B_SELFTEST)
|
||||||
|
#include <string.h>
|
||||||
|
#include "blake2-kat.h"
|
||||||
|
int main( void )
|
||||||
|
{
|
||||||
|
uint8_t key[BLAKE2B_KEYBYTES];
|
||||||
|
uint8_t buf[BLAKE2_KAT_LENGTH];
|
||||||
|
size_t i, step;
|
||||||
|
|
||||||
|
for( i = 0; i < BLAKE2B_KEYBYTES; ++i )
|
||||||
|
key[i] = ( uint8_t )i;
|
||||||
|
|
||||||
|
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
|
||||||
|
buf[i] = ( uint8_t )i;
|
||||||
|
|
||||||
|
/* Test simple API */
|
||||||
|
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
|
||||||
|
{
|
||||||
|
uint8_t hash[BLAKE2B_OUTBYTES];
|
||||||
|
blake2b( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES );
|
||||||
|
|
||||||
|
if( 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) )
|
||||||
|
{
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Test streaming API */
|
||||||
|
for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) {
|
||||||
|
for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
|
||||||
|
uint8_t hash[BLAKE2B_OUTBYTES];
|
||||||
|
blake2b_state S;
|
||||||
|
uint8_t * p = buf;
|
||||||
|
size_t mlen = i;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
if( (err = blake2b_init_key(&S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES)) < 0 ) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (mlen >= step) {
|
||||||
|
if ( (err = blake2b_update(&S, p, step)) < 0 ) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
mlen -= step;
|
||||||
|
p += step;
|
||||||
|
}
|
||||||
|
if ( (err = blake2b_update(&S, p, mlen)) < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
if ( (err = blake2b_final(&S, hash, BLAKE2B_OUTBYTES)) < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != memcmp(hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
puts( "ok" );
|
||||||
|
return 0;
|
||||||
|
fail:
|
||||||
|
puts("error");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
359
3rd/BLAKE2/blake2bp-ref.c
Normal file
359
3rd/BLAKE2/blake2bp-ref.c
Normal file
@@ -0,0 +1,359 @@
|
|||||||
|
/*
|
||||||
|
BLAKE2 reference source code package - reference C implementations
|
||||||
|
|
||||||
|
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
|
||||||
|
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
|
||||||
|
your option. The terms of these licenses can be found at:
|
||||||
|
|
||||||
|
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||||
|
- OpenSSL license : https://www.openssl.org/source/license.html
|
||||||
|
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
More information about the BLAKE2 hash function can be found at
|
||||||
|
https://blake2.net.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#include <omp.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "blake2.h"
|
||||||
|
#include "blake2-impl.h"
|
||||||
|
|
||||||
|
#define PARALLELISM_DEGREE 4
|
||||||
|
|
||||||
|
/*
|
||||||
|
blake2b_init_param defaults to setting the expecting output length
|
||||||
|
from the digest_length parameter block field.
|
||||||
|
|
||||||
|
In some cases, however, we do not want this, as the output length
|
||||||
|
of these instances is given by inner_length instead.
|
||||||
|
*/
|
||||||
|
static int blake2bp_init_leaf_param( blake2b_state *S, const blake2b_param *P )
|
||||||
|
{
|
||||||
|
int err = blake2b_init_param(S, P);
|
||||||
|
S->outlen = P->inner_length;
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int blake2bp_init_leaf( blake2b_state *S, size_t outlen, size_t keylen, uint64_t offset )
|
||||||
|
{
|
||||||
|
blake2b_param P[1];
|
||||||
|
P->digest_length = (uint8_t)outlen;
|
||||||
|
P->key_length = (uint8_t)keylen;
|
||||||
|
P->fanout = PARALLELISM_DEGREE;
|
||||||
|
P->depth = 2;
|
||||||
|
store32( &P->leaf_length, 0 );
|
||||||
|
store32( &P->node_offset, offset );
|
||||||
|
store32( &P->xof_length, 0 );
|
||||||
|
P->node_depth = 0;
|
||||||
|
P->inner_length = BLAKE2B_OUTBYTES;
|
||||||
|
memset( P->reserved, 0, sizeof( P->reserved ) );
|
||||||
|
memset( P->salt, 0, sizeof( P->salt ) );
|
||||||
|
memset( P->personal, 0, sizeof( P->personal ) );
|
||||||
|
return blake2bp_init_leaf_param( S, P );
|
||||||
|
}
|
||||||
|
|
||||||
|
static int blake2bp_init_root( blake2b_state *S, size_t outlen, size_t keylen )
|
||||||
|
{
|
||||||
|
blake2b_param P[1];
|
||||||
|
P->digest_length = (uint8_t)outlen;
|
||||||
|
P->key_length = (uint8_t)keylen;
|
||||||
|
P->fanout = PARALLELISM_DEGREE;
|
||||||
|
P->depth = 2;
|
||||||
|
store32( &P->leaf_length, 0 );
|
||||||
|
store32( &P->node_offset, 0 );
|
||||||
|
store32( &P->xof_length, 0 );
|
||||||
|
P->node_depth = 1;
|
||||||
|
P->inner_length = BLAKE2B_OUTBYTES;
|
||||||
|
memset( P->reserved, 0, sizeof( P->reserved ) );
|
||||||
|
memset( P->salt, 0, sizeof( P->salt ) );
|
||||||
|
memset( P->personal, 0, sizeof( P->personal ) );
|
||||||
|
return blake2b_init_param( S, P );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int blake2bp_init( blake2bp_state *S, size_t outlen )
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
|
||||||
|
|
||||||
|
memset( S->buf, 0, sizeof( S->buf ) );
|
||||||
|
S->buflen = 0;
|
||||||
|
S->outlen = outlen;
|
||||||
|
|
||||||
|
if( blake2bp_init_root( S->R, outlen, 0 ) < 0 )
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||||
|
if( blake2bp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1;
|
||||||
|
|
||||||
|
S->R->last_node = 1;
|
||||||
|
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen )
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
|
||||||
|
|
||||||
|
if( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1;
|
||||||
|
|
||||||
|
memset( S->buf, 0, sizeof( S->buf ) );
|
||||||
|
S->buflen = 0;
|
||||||
|
S->outlen = outlen;
|
||||||
|
|
||||||
|
if( blake2bp_init_root( S->R, outlen, keylen ) < 0 )
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||||
|
if( blake2bp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1;
|
||||||
|
|
||||||
|
S->R->last_node = 1;
|
||||||
|
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
|
||||||
|
{
|
||||||
|
uint8_t block[BLAKE2B_BLOCKBYTES];
|
||||||
|
memset( block, 0, BLAKE2B_BLOCKBYTES );
|
||||||
|
memcpy( block, key, keylen );
|
||||||
|
|
||||||
|
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||||
|
blake2b_update( S->S[i], block, BLAKE2B_BLOCKBYTES );
|
||||||
|
|
||||||
|
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int blake2bp_update( blake2bp_state *S, const void *pin, size_t inlen )
|
||||||
|
{
|
||||||
|
const unsigned char * in = (const unsigned char *)pin;
|
||||||
|
size_t left = S->buflen;
|
||||||
|
size_t fill = sizeof( S->buf ) - left;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
if( left && inlen >= fill )
|
||||||
|
{
|
||||||
|
memcpy( S->buf + left, in, fill );
|
||||||
|
|
||||||
|
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||||
|
blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES );
|
||||||
|
|
||||||
|
in += fill;
|
||||||
|
inlen -= fill;
|
||||||
|
left = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE)
|
||||||
|
#else
|
||||||
|
|
||||||
|
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
size_t i = omp_get_thread_num();
|
||||||
|
#endif
|
||||||
|
size_t inlen__ = inlen;
|
||||||
|
const unsigned char *in__ = ( const unsigned char * )in;
|
||||||
|
in__ += i * BLAKE2B_BLOCKBYTES;
|
||||||
|
|
||||||
|
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES )
|
||||||
|
{
|
||||||
|
blake2b_update( S->S[i], in__, BLAKE2B_BLOCKBYTES );
|
||||||
|
in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
|
||||||
|
inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES );
|
||||||
|
inlen %= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
|
||||||
|
|
||||||
|
if( inlen > 0 )
|
||||||
|
memcpy( S->buf + left, in, inlen );
|
||||||
|
|
||||||
|
S->buflen = left + inlen;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int blake2bp_final( blake2bp_state *S, void *out, size_t outlen )
|
||||||
|
{
|
||||||
|
uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES];
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
if(out == NULL || outlen < S->outlen) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||||
|
{
|
||||||
|
if( S->buflen > i * BLAKE2B_BLOCKBYTES )
|
||||||
|
{
|
||||||
|
size_t left = S->buflen - i * BLAKE2B_BLOCKBYTES;
|
||||||
|
|
||||||
|
if( left > BLAKE2B_BLOCKBYTES ) left = BLAKE2B_BLOCKBYTES;
|
||||||
|
|
||||||
|
blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, left );
|
||||||
|
}
|
||||||
|
|
||||||
|
blake2b_final( S->S[i], hash[i], BLAKE2B_OUTBYTES );
|
||||||
|
}
|
||||||
|
|
||||||
|
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||||
|
blake2b_update( S->R, hash[i], BLAKE2B_OUTBYTES );
|
||||||
|
|
||||||
|
return blake2b_final( S->R, out, S->outlen );
|
||||||
|
}
|
||||||
|
|
||||||
|
int blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
|
||||||
|
{
|
||||||
|
uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES];
|
||||||
|
blake2b_state S[PARALLELISM_DEGREE][1];
|
||||||
|
blake2b_state FS[1];
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
/* Verify parameters */
|
||||||
|
if ( NULL == in && inlen > 0 ) return -1;
|
||||||
|
|
||||||
|
if ( NULL == out ) return -1;
|
||||||
|
|
||||||
|
if( NULL == key && keylen > 0 ) return -1;
|
||||||
|
|
||||||
|
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
|
||||||
|
|
||||||
|
if( keylen > BLAKE2B_KEYBYTES ) return -1;
|
||||||
|
|
||||||
|
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||||
|
if( blake2bp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1;
|
||||||
|
|
||||||
|
S[PARALLELISM_DEGREE - 1]->last_node = 1; /* mark last node */
|
||||||
|
|
||||||
|
if( keylen > 0 )
|
||||||
|
{
|
||||||
|
uint8_t block[BLAKE2B_BLOCKBYTES];
|
||||||
|
memset( block, 0, BLAKE2B_BLOCKBYTES );
|
||||||
|
memcpy( block, key, keylen );
|
||||||
|
|
||||||
|
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||||
|
blake2b_update( S[i], block, BLAKE2B_BLOCKBYTES );
|
||||||
|
|
||||||
|
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE)
|
||||||
|
#else
|
||||||
|
|
||||||
|
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
size_t i = omp_get_thread_num();
|
||||||
|
#endif
|
||||||
|
size_t inlen__ = inlen;
|
||||||
|
const unsigned char *in__ = ( const unsigned char * )in;
|
||||||
|
in__ += i * BLAKE2B_BLOCKBYTES;
|
||||||
|
|
||||||
|
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES )
|
||||||
|
{
|
||||||
|
blake2b_update( S[i], in__, BLAKE2B_BLOCKBYTES );
|
||||||
|
in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
|
||||||
|
inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( inlen__ > i * BLAKE2B_BLOCKBYTES )
|
||||||
|
{
|
||||||
|
const size_t left = inlen__ - i * BLAKE2B_BLOCKBYTES;
|
||||||
|
const size_t len = left <= BLAKE2B_BLOCKBYTES ? left : BLAKE2B_BLOCKBYTES;
|
||||||
|
blake2b_update( S[i], in__, len );
|
||||||
|
}
|
||||||
|
|
||||||
|
blake2b_final( S[i], hash[i], BLAKE2B_OUTBYTES );
|
||||||
|
}
|
||||||
|
|
||||||
|
if( blake2bp_init_root( FS, outlen, keylen ) < 0 )
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
FS->last_node = 1; /* Mark as last node */
|
||||||
|
|
||||||
|
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||||
|
blake2b_update( FS, hash[i], BLAKE2B_OUTBYTES );
|
||||||
|
|
||||||
|
return blake2b_final( FS, out, outlen );;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(BLAKE2BP_SELFTEST)
|
||||||
|
#include <string.h>
|
||||||
|
#include "blake2-kat.h"
|
||||||
|
int main( void )
|
||||||
|
{
|
||||||
|
uint8_t key[BLAKE2B_KEYBYTES];
|
||||||
|
uint8_t buf[BLAKE2_KAT_LENGTH];
|
||||||
|
size_t i, step;
|
||||||
|
|
||||||
|
for( i = 0; i < BLAKE2B_KEYBYTES; ++i )
|
||||||
|
key[i] = ( uint8_t )i;
|
||||||
|
|
||||||
|
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
|
||||||
|
buf[i] = ( uint8_t )i;
|
||||||
|
|
||||||
|
/* Test simple API */
|
||||||
|
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
|
||||||
|
{
|
||||||
|
uint8_t hash[BLAKE2B_OUTBYTES];
|
||||||
|
blake2bp( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES );
|
||||||
|
|
||||||
|
if( 0 != memcmp( hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES ) )
|
||||||
|
{
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Test streaming API */
|
||||||
|
for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) {
|
||||||
|
for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
|
||||||
|
uint8_t hash[BLAKE2B_OUTBYTES];
|
||||||
|
blake2bp_state S;
|
||||||
|
uint8_t * p = buf;
|
||||||
|
size_t mlen = i;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
if( (err = blake2bp_init_key(&S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES)) < 0 ) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (mlen >= step) {
|
||||||
|
if ( (err = blake2bp_update(&S, p, step)) < 0 ) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
mlen -= step;
|
||||||
|
p += step;
|
||||||
|
}
|
||||||
|
if ( (err = blake2bp_update(&S, p, mlen)) < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
if ( (err = blake2bp_final(&S, hash, BLAKE2B_OUTBYTES)) < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != memcmp(hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
puts( "ok" );
|
||||||
|
return 0;
|
||||||
|
fail:
|
||||||
|
puts("error");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
367
3rd/BLAKE2/blake2s-ref.c
Normal file
367
3rd/BLAKE2/blake2s-ref.c
Normal file
@@ -0,0 +1,367 @@
|
|||||||
|
/*
|
||||||
|
BLAKE2 reference source code package - reference C implementations
|
||||||
|
|
||||||
|
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
|
||||||
|
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
|
||||||
|
your option. The terms of these licenses can be found at:
|
||||||
|
|
||||||
|
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||||
|
- OpenSSL license : https://www.openssl.org/source/license.html
|
||||||
|
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
More information about the BLAKE2 hash function can be found at
|
||||||
|
https://blake2.net.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "blake2.h"
|
||||||
|
#include "blake2-impl.h"
|
||||||
|
|
||||||
|
static const uint32_t blake2s_IV[8] =
|
||||||
|
{
|
||||||
|
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
|
||||||
|
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
|
||||||
|
};
|
||||||
|
|
||||||
|
static const uint8_t blake2s_sigma[10][16] =
|
||||||
|
{
|
||||||
|
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||||||
|
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
|
||||||
|
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
|
||||||
|
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
|
||||||
|
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
|
||||||
|
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
|
||||||
|
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
|
||||||
|
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
|
||||||
|
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
|
||||||
|
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
|
||||||
|
};
|
||||||
|
|
||||||
|
static void blake2s_set_lastnode( blake2s_state *S )
|
||||||
|
{
|
||||||
|
S->f[1] = (uint32_t)-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Some helper functions, not necessarily useful */
|
||||||
|
static int blake2s_is_lastblock( const blake2s_state *S )
|
||||||
|
{
|
||||||
|
return S->f[0] != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void blake2s_set_lastblock( blake2s_state *S )
|
||||||
|
{
|
||||||
|
if( S->last_node ) blake2s_set_lastnode( S );
|
||||||
|
|
||||||
|
S->f[0] = (uint32_t)-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void blake2s_increment_counter( blake2s_state *S, const uint32_t inc )
|
||||||
|
{
|
||||||
|
S->t[0] += inc;
|
||||||
|
S->t[1] += ( S->t[0] < inc );
|
||||||
|
}
|
||||||
|
|
||||||
|
static void blake2s_init0( blake2s_state *S )
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
memset( S, 0, sizeof( blake2s_state ) );
|
||||||
|
|
||||||
|
for( i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* init2 xors IV with input parameter block */
|
||||||
|
int blake2s_init_param( blake2s_state *S, const blake2s_param *P )
|
||||||
|
{
|
||||||
|
const unsigned char *p = ( const unsigned char * )( P );
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
blake2s_init0( S );
|
||||||
|
|
||||||
|
/* IV XOR ParamBlock */
|
||||||
|
for( i = 0; i < 8; ++i )
|
||||||
|
S->h[i] ^= load32( &p[i * 4] );
|
||||||
|
|
||||||
|
S->outlen = P->digest_length;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Sequential blake2s initialization */
|
||||||
|
int blake2s_init( blake2s_state *S, size_t outlen )
|
||||||
|
{
|
||||||
|
blake2s_param P[1];
|
||||||
|
|
||||||
|
/* Move interval verification here? */
|
||||||
|
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
|
||||||
|
|
||||||
|
P->digest_length = (uint8_t)outlen;
|
||||||
|
P->key_length = 0;
|
||||||
|
P->fanout = 1;
|
||||||
|
P->depth = 1;
|
||||||
|
store32( &P->leaf_length, 0 );
|
||||||
|
store32( &P->node_offset, 0 );
|
||||||
|
store16( &P->xof_length, 0 );
|
||||||
|
P->node_depth = 0;
|
||||||
|
P->inner_length = 0;
|
||||||
|
/* memset(P->reserved, 0, sizeof(P->reserved) ); */
|
||||||
|
memset( P->salt, 0, sizeof( P->salt ) );
|
||||||
|
memset( P->personal, 0, sizeof( P->personal ) );
|
||||||
|
return blake2s_init_param( S, P );
|
||||||
|
}
|
||||||
|
|
||||||
|
int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen )
|
||||||
|
{
|
||||||
|
blake2s_param P[1];
|
||||||
|
|
||||||
|
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
|
||||||
|
|
||||||
|
if ( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1;
|
||||||
|
|
||||||
|
P->digest_length = (uint8_t)outlen;
|
||||||
|
P->key_length = (uint8_t)keylen;
|
||||||
|
P->fanout = 1;
|
||||||
|
P->depth = 1;
|
||||||
|
store32( &P->leaf_length, 0 );
|
||||||
|
store32( &P->node_offset, 0 );
|
||||||
|
store16( &P->xof_length, 0 );
|
||||||
|
P->node_depth = 0;
|
||||||
|
P->inner_length = 0;
|
||||||
|
/* memset(P->reserved, 0, sizeof(P->reserved) ); */
|
||||||
|
memset( P->salt, 0, sizeof( P->salt ) );
|
||||||
|
memset( P->personal, 0, sizeof( P->personal ) );
|
||||||
|
|
||||||
|
if( blake2s_init_param( S, P ) < 0 ) return -1;
|
||||||
|
|
||||||
|
{
|
||||||
|
uint8_t block[BLAKE2S_BLOCKBYTES];
|
||||||
|
memset( block, 0, BLAKE2S_BLOCKBYTES );
|
||||||
|
memcpy( block, key, keylen );
|
||||||
|
blake2s_update( S, block, BLAKE2S_BLOCKBYTES );
|
||||||
|
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define G(r,i,a,b,c,d) \
|
||||||
|
do { \
|
||||||
|
a = a + b + m[blake2s_sigma[r][2*i+0]]; \
|
||||||
|
d = rotr32(d ^ a, 16); \
|
||||||
|
c = c + d; \
|
||||||
|
b = rotr32(b ^ c, 12); \
|
||||||
|
a = a + b + m[blake2s_sigma[r][2*i+1]]; \
|
||||||
|
d = rotr32(d ^ a, 8); \
|
||||||
|
c = c + d; \
|
||||||
|
b = rotr32(b ^ c, 7); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define ROUND(r) \
|
||||||
|
do { \
|
||||||
|
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
|
||||||
|
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
|
||||||
|
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
|
||||||
|
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
|
||||||
|
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
|
||||||
|
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
|
||||||
|
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
|
||||||
|
G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
static void blake2s_compress( blake2s_state *S, const uint8_t in[BLAKE2S_BLOCKBYTES] )
|
||||||
|
{
|
||||||
|
uint32_t m[16];
|
||||||
|
uint32_t v[16];
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
for( i = 0; i < 16; ++i ) {
|
||||||
|
m[i] = load32( in + i * sizeof( m[i] ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
for( i = 0; i < 8; ++i ) {
|
||||||
|
v[i] = S->h[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
v[ 8] = blake2s_IV[0];
|
||||||
|
v[ 9] = blake2s_IV[1];
|
||||||
|
v[10] = blake2s_IV[2];
|
||||||
|
v[11] = blake2s_IV[3];
|
||||||
|
v[12] = S->t[0] ^ blake2s_IV[4];
|
||||||
|
v[13] = S->t[1] ^ blake2s_IV[5];
|
||||||
|
v[14] = S->f[0] ^ blake2s_IV[6];
|
||||||
|
v[15] = S->f[1] ^ blake2s_IV[7];
|
||||||
|
|
||||||
|
ROUND( 0 );
|
||||||
|
ROUND( 1 );
|
||||||
|
ROUND( 2 );
|
||||||
|
ROUND( 3 );
|
||||||
|
ROUND( 4 );
|
||||||
|
ROUND( 5 );
|
||||||
|
ROUND( 6 );
|
||||||
|
ROUND( 7 );
|
||||||
|
ROUND( 8 );
|
||||||
|
ROUND( 9 );
|
||||||
|
|
||||||
|
for( i = 0; i < 8; ++i ) {
|
||||||
|
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef G
|
||||||
|
#undef ROUND
|
||||||
|
|
||||||
|
int blake2s_update( blake2s_state *S, const void *pin, size_t inlen )
|
||||||
|
{
|
||||||
|
const unsigned char * in = (const unsigned char *)pin;
|
||||||
|
if( inlen > 0 )
|
||||||
|
{
|
||||||
|
size_t left = S->buflen;
|
||||||
|
size_t fill = BLAKE2S_BLOCKBYTES - left;
|
||||||
|
if( inlen > fill )
|
||||||
|
{
|
||||||
|
S->buflen = 0;
|
||||||
|
memcpy( S->buf + left, in, fill ); /* Fill buffer */
|
||||||
|
blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
|
||||||
|
blake2s_compress( S, S->buf ); /* Compress */
|
||||||
|
in += fill; inlen -= fill;
|
||||||
|
while(inlen > BLAKE2S_BLOCKBYTES) {
|
||||||
|
blake2s_increment_counter(S, BLAKE2S_BLOCKBYTES);
|
||||||
|
blake2s_compress( S, in );
|
||||||
|
in += BLAKE2S_BLOCKBYTES;
|
||||||
|
inlen -= BLAKE2S_BLOCKBYTES;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
memcpy( S->buf + S->buflen, in, inlen );
|
||||||
|
S->buflen += inlen;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int blake2s_final( blake2s_state *S, void *out, size_t outlen )
|
||||||
|
{
|
||||||
|
uint8_t buffer[BLAKE2S_OUTBYTES] = {0};
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
if( out == NULL || outlen < S->outlen )
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if( blake2s_is_lastblock( S ) )
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
blake2s_increment_counter( S, ( uint32_t )S->buflen );
|
||||||
|
blake2s_set_lastblock( S );
|
||||||
|
memset( S->buf + S->buflen, 0, BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
|
||||||
|
blake2s_compress( S, S->buf );
|
||||||
|
|
||||||
|
for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
|
||||||
|
store32( buffer + sizeof( S->h[i] ) * i, S->h[i] );
|
||||||
|
|
||||||
|
memcpy( out, buffer, outlen );
|
||||||
|
secure_zero_memory(buffer, sizeof(buffer));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
|
||||||
|
{
|
||||||
|
blake2s_state S[1];
|
||||||
|
|
||||||
|
/* Verify parameters */
|
||||||
|
if ( NULL == in && inlen > 0 ) return -1;
|
||||||
|
|
||||||
|
if ( NULL == out ) return -1;
|
||||||
|
|
||||||
|
if ( NULL == key && keylen > 0) return -1;
|
||||||
|
|
||||||
|
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
|
||||||
|
|
||||||
|
if( keylen > BLAKE2S_KEYBYTES ) return -1;
|
||||||
|
|
||||||
|
if( keylen > 0 )
|
||||||
|
{
|
||||||
|
if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if( blake2s_init( S, outlen ) < 0 ) return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
blake2s_update( S, ( const uint8_t * )in, inlen );
|
||||||
|
blake2s_final( S, out, outlen );
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(SUPERCOP)
|
||||||
|
int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
|
||||||
|
{
|
||||||
|
return blake2s( out, BLAKE2S_OUTBYTES, in, inlen, NULL, 0 );
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(BLAKE2S_SELFTEST)
|
||||||
|
#include <string.h>
|
||||||
|
#include "blake2-kat.h"
|
||||||
|
int main( void )
|
||||||
|
{
|
||||||
|
uint8_t key[BLAKE2S_KEYBYTES];
|
||||||
|
uint8_t buf[BLAKE2_KAT_LENGTH];
|
||||||
|
size_t i, step;
|
||||||
|
|
||||||
|
for( i = 0; i < BLAKE2S_KEYBYTES; ++i )
|
||||||
|
key[i] = ( uint8_t )i;
|
||||||
|
|
||||||
|
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
|
||||||
|
buf[i] = ( uint8_t )i;
|
||||||
|
|
||||||
|
/* Test simple API */
|
||||||
|
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
|
||||||
|
{
|
||||||
|
uint8_t hash[BLAKE2S_OUTBYTES];
|
||||||
|
blake2s( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES );
|
||||||
|
|
||||||
|
if( 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) )
|
||||||
|
{
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Test streaming API */
|
||||||
|
for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) {
|
||||||
|
for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
|
||||||
|
uint8_t hash[BLAKE2S_OUTBYTES];
|
||||||
|
blake2s_state S;
|
||||||
|
uint8_t * p = buf;
|
||||||
|
size_t mlen = i;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
if( (err = blake2s_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (mlen >= step) {
|
||||||
|
if ( (err = blake2s_update(&S, p, step)) < 0 ) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
mlen -= step;
|
||||||
|
p += step;
|
||||||
|
}
|
||||||
|
if ( (err = blake2s_update(&S, p, mlen)) < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
if ( (err = blake2s_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != memcmp(hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
puts( "ok" );
|
||||||
|
return 0;
|
||||||
|
fail:
|
||||||
|
puts("error");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
359
3rd/BLAKE2/blake2sp-ref.c
Normal file
359
3rd/BLAKE2/blake2sp-ref.c
Normal file
@@ -0,0 +1,359 @@
|
|||||||
|
/*
|
||||||
|
BLAKE2 reference source code package - reference C implementations
|
||||||
|
|
||||||
|
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
|
||||||
|
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
|
||||||
|
your option. The terms of these licenses can be found at:
|
||||||
|
|
||||||
|
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||||
|
- OpenSSL license : https://www.openssl.org/source/license.html
|
||||||
|
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
More information about the BLAKE2 hash function can be found at
|
||||||
|
https://blake2.net.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#include <omp.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "blake2.h"
|
||||||
|
#include "blake2-impl.h"
|
||||||
|
|
||||||
|
#define PARALLELISM_DEGREE 8
|
||||||
|
|
||||||
|
/*
|
||||||
|
blake2sp_init_param defaults to setting the expecting output length
|
||||||
|
from the digest_length parameter block field.
|
||||||
|
|
||||||
|
In some cases, however, we do not want this, as the output length
|
||||||
|
of these instances is given by inner_length instead.
|
||||||
|
*/
|
||||||
|
static int blake2sp_init_leaf_param( blake2s_state *S, const blake2s_param *P )
|
||||||
|
{
|
||||||
|
int err = blake2s_init_param(S, P);
|
||||||
|
S->outlen = P->inner_length;
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int blake2sp_init_leaf( blake2s_state *S, size_t outlen, size_t keylen, uint64_t offset )
|
||||||
|
{
|
||||||
|
blake2s_param P[1];
|
||||||
|
P->digest_length = (uint8_t)outlen;
|
||||||
|
P->key_length = (uint8_t)keylen;
|
||||||
|
P->fanout = PARALLELISM_DEGREE;
|
||||||
|
P->depth = 2;
|
||||||
|
store32( &P->leaf_length, 0 );
|
||||||
|
store32( &P->node_offset, offset );
|
||||||
|
store16( &P->xof_length, 0 );
|
||||||
|
P->node_depth = 0;
|
||||||
|
P->inner_length = BLAKE2S_OUTBYTES;
|
||||||
|
memset( P->salt, 0, sizeof( P->salt ) );
|
||||||
|
memset( P->personal, 0, sizeof( P->personal ) );
|
||||||
|
return blake2sp_init_leaf_param( S, P );
|
||||||
|
}
|
||||||
|
|
||||||
|
static int blake2sp_init_root( blake2s_state *S, size_t outlen, size_t keylen )
|
||||||
|
{
|
||||||
|
blake2s_param P[1];
|
||||||
|
P->digest_length = (uint8_t)outlen;
|
||||||
|
P->key_length = (uint8_t)keylen;
|
||||||
|
P->fanout = PARALLELISM_DEGREE;
|
||||||
|
P->depth = 2;
|
||||||
|
store32( &P->leaf_length, 0 );
|
||||||
|
store32( &P->node_offset, 0 );
|
||||||
|
store16( &P->xof_length, 0 );
|
||||||
|
P->node_depth = 1;
|
||||||
|
P->inner_length = BLAKE2S_OUTBYTES;
|
||||||
|
memset( P->salt, 0, sizeof( P->salt ) );
|
||||||
|
memset( P->personal, 0, sizeof( P->personal ) );
|
||||||
|
return blake2s_init_param( S, P );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int blake2sp_init( blake2sp_state *S, size_t outlen )
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
|
||||||
|
|
||||||
|
memset( S->buf, 0, sizeof( S->buf ) );
|
||||||
|
S->buflen = 0;
|
||||||
|
S->outlen = outlen;
|
||||||
|
|
||||||
|
if( blake2sp_init_root( S->R, outlen, 0 ) < 0 )
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||||
|
if( blake2sp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1;
|
||||||
|
|
||||||
|
S->R->last_node = 1;
|
||||||
|
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen )
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
|
||||||
|
|
||||||
|
if( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1;
|
||||||
|
|
||||||
|
memset( S->buf, 0, sizeof( S->buf ) );
|
||||||
|
S->buflen = 0;
|
||||||
|
S->outlen = outlen;
|
||||||
|
|
||||||
|
if( blake2sp_init_root( S->R, outlen, keylen ) < 0 )
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||||
|
if( blake2sp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1;
|
||||||
|
|
||||||
|
S->R->last_node = 1;
|
||||||
|
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
|
||||||
|
{
|
||||||
|
uint8_t block[BLAKE2S_BLOCKBYTES];
|
||||||
|
memset( block, 0, BLAKE2S_BLOCKBYTES );
|
||||||
|
memcpy( block, key, keylen );
|
||||||
|
|
||||||
|
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||||
|
blake2s_update( S->S[i], block, BLAKE2S_BLOCKBYTES );
|
||||||
|
|
||||||
|
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int blake2sp_update( blake2sp_state *S, const void *pin, size_t inlen )
|
||||||
|
{
|
||||||
|
const unsigned char * in = (const unsigned char *)pin;
|
||||||
|
size_t left = S->buflen;
|
||||||
|
size_t fill = sizeof( S->buf ) - left;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
if( left && inlen >= fill )
|
||||||
|
{
|
||||||
|
memcpy( S->buf + left, in, fill );
|
||||||
|
|
||||||
|
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||||
|
blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES );
|
||||||
|
|
||||||
|
in += fill;
|
||||||
|
inlen -= fill;
|
||||||
|
left = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE)
|
||||||
|
#else
|
||||||
|
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
size_t i = omp_get_thread_num();
|
||||||
|
#endif
|
||||||
|
size_t inlen__ = inlen;
|
||||||
|
const unsigned char *in__ = ( const unsigned char * )in;
|
||||||
|
in__ += i * BLAKE2S_BLOCKBYTES;
|
||||||
|
|
||||||
|
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES )
|
||||||
|
{
|
||||||
|
blake2s_update( S->S[i], in__, BLAKE2S_BLOCKBYTES );
|
||||||
|
in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
|
||||||
|
inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES );
|
||||||
|
inlen %= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
|
||||||
|
|
||||||
|
if( inlen > 0 )
|
||||||
|
memcpy( S->buf + left, in, inlen );
|
||||||
|
|
||||||
|
S->buflen = left + inlen;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int blake2sp_final( blake2sp_state *S, void *out, size_t outlen )
|
||||||
|
{
|
||||||
|
uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES];
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
if(out == NULL || outlen < S->outlen) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||||
|
{
|
||||||
|
if( S->buflen > i * BLAKE2S_BLOCKBYTES )
|
||||||
|
{
|
||||||
|
size_t left = S->buflen - i * BLAKE2S_BLOCKBYTES;
|
||||||
|
|
||||||
|
if( left > BLAKE2S_BLOCKBYTES ) left = BLAKE2S_BLOCKBYTES;
|
||||||
|
|
||||||
|
blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, left );
|
||||||
|
}
|
||||||
|
|
||||||
|
blake2s_final( S->S[i], hash[i], BLAKE2S_OUTBYTES );
|
||||||
|
}
|
||||||
|
|
||||||
|
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||||
|
blake2s_update( S->R, hash[i], BLAKE2S_OUTBYTES );
|
||||||
|
|
||||||
|
return blake2s_final( S->R, out, S->outlen );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
|
||||||
|
{
|
||||||
|
uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES];
|
||||||
|
blake2s_state S[PARALLELISM_DEGREE][1];
|
||||||
|
blake2s_state FS[1];
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
/* Verify parameters */
|
||||||
|
if ( NULL == in && inlen > 0 ) return -1;
|
||||||
|
|
||||||
|
if ( NULL == out ) return -1;
|
||||||
|
|
||||||
|
if ( NULL == key && keylen > 0) return -1;
|
||||||
|
|
||||||
|
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
|
||||||
|
|
||||||
|
if( keylen > BLAKE2S_KEYBYTES ) return -1;
|
||||||
|
|
||||||
|
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||||
|
if( blake2sp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1;
|
||||||
|
|
||||||
|
S[PARALLELISM_DEGREE - 1]->last_node = 1; /* mark last node */
|
||||||
|
|
||||||
|
if( keylen > 0 )
|
||||||
|
{
|
||||||
|
uint8_t block[BLAKE2S_BLOCKBYTES];
|
||||||
|
memset( block, 0, BLAKE2S_BLOCKBYTES );
|
||||||
|
memcpy( block, key, keylen );
|
||||||
|
|
||||||
|
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||||
|
blake2s_update( S[i], block, BLAKE2S_BLOCKBYTES );
|
||||||
|
|
||||||
|
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE)
|
||||||
|
#else
|
||||||
|
|
||||||
|
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
size_t i = omp_get_thread_num();
|
||||||
|
#endif
|
||||||
|
size_t inlen__ = inlen;
|
||||||
|
const unsigned char *in__ = ( const unsigned char * )in;
|
||||||
|
in__ += i * BLAKE2S_BLOCKBYTES;
|
||||||
|
|
||||||
|
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES )
|
||||||
|
{
|
||||||
|
blake2s_update( S[i], in__, BLAKE2S_BLOCKBYTES );
|
||||||
|
in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
|
||||||
|
inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( inlen__ > i * BLAKE2S_BLOCKBYTES )
|
||||||
|
{
|
||||||
|
const size_t left = inlen__ - i * BLAKE2S_BLOCKBYTES;
|
||||||
|
const size_t len = left <= BLAKE2S_BLOCKBYTES ? left : BLAKE2S_BLOCKBYTES;
|
||||||
|
blake2s_update( S[i], in__, len );
|
||||||
|
}
|
||||||
|
|
||||||
|
blake2s_final( S[i], hash[i], BLAKE2S_OUTBYTES );
|
||||||
|
}
|
||||||
|
|
||||||
|
if( blake2sp_init_root( FS, outlen, keylen ) < 0 )
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
FS->last_node = 1;
|
||||||
|
|
||||||
|
for( i = 0; i < PARALLELISM_DEGREE; ++i )
|
||||||
|
blake2s_update( FS, hash[i], BLAKE2S_OUTBYTES );
|
||||||
|
|
||||||
|
return blake2s_final( FS, out, outlen );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(BLAKE2SP_SELFTEST)
|
||||||
|
#include <string.h>
|
||||||
|
#include "blake2-kat.h"
|
||||||
|
int main( void )
|
||||||
|
{
|
||||||
|
uint8_t key[BLAKE2S_KEYBYTES];
|
||||||
|
uint8_t buf[BLAKE2_KAT_LENGTH];
|
||||||
|
size_t i, step;
|
||||||
|
|
||||||
|
for( i = 0; i < BLAKE2S_KEYBYTES; ++i )
|
||||||
|
key[i] = ( uint8_t )i;
|
||||||
|
|
||||||
|
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
|
||||||
|
buf[i] = ( uint8_t )i;
|
||||||
|
|
||||||
|
/* Test simple API */
|
||||||
|
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
|
||||||
|
{
|
||||||
|
uint8_t hash[BLAKE2S_OUTBYTES];
|
||||||
|
blake2sp( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES );
|
||||||
|
|
||||||
|
if( 0 != memcmp( hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES ) )
|
||||||
|
{
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Test streaming API */
|
||||||
|
for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) {
|
||||||
|
for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
|
||||||
|
uint8_t hash[BLAKE2S_OUTBYTES];
|
||||||
|
blake2sp_state S;
|
||||||
|
uint8_t * p = buf;
|
||||||
|
size_t mlen = i;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
if( (err = blake2sp_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (mlen >= step) {
|
||||||
|
if ( (err = blake2sp_update(&S, p, step)) < 0 ) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
mlen -= step;
|
||||||
|
p += step;
|
||||||
|
}
|
||||||
|
if ( (err = blake2sp_update(&S, p, mlen)) < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
if ( (err = blake2sp_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != memcmp(hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
puts( "ok" );
|
||||||
|
return 0;
|
||||||
|
fail:
|
||||||
|
puts("error");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
241
3rd/BLAKE2/blake2xb-ref.c
Normal file
241
3rd/BLAKE2/blake2xb-ref.c
Normal file
@@ -0,0 +1,241 @@
|
|||||||
|
/*
|
||||||
|
BLAKE2 reference source code package - reference C implementations
|
||||||
|
|
||||||
|
Copyright 2016, JP Aumasson <jeanphilippe.aumasson@gmail.com>.
|
||||||
|
Copyright 2016, Samuel Neves <sneves@dei.uc.pt>.
|
||||||
|
|
||||||
|
You may use this under the terms of the CC0, the OpenSSL Licence, or
|
||||||
|
the Apache Public License 2.0, at your option. The terms of these
|
||||||
|
licenses can be found at:
|
||||||
|
|
||||||
|
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||||
|
- OpenSSL license : https://www.openssl.org/source/license.html
|
||||||
|
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
More information about the BLAKE2 hash function can be found at
|
||||||
|
https://blake2.net.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "blake2.h"
|
||||||
|
#include "blake2-impl.h"
|
||||||
|
|
||||||
|
int blake2xb_init( blake2xb_state *S, const size_t outlen ) {
|
||||||
|
return blake2xb_init_key(S, outlen, NULL, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int blake2xb_init_key( blake2xb_state *S, const size_t outlen, const void *key, size_t keylen)
|
||||||
|
{
|
||||||
|
if ( outlen == 0 || outlen > 0xFFFFFFFFUL ) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (NULL != key && keylen > BLAKE2B_KEYBYTES) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (NULL == key && keylen > 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Initialize parameter block */
|
||||||
|
S->P->digest_length = BLAKE2B_OUTBYTES;
|
||||||
|
S->P->key_length = keylen;
|
||||||
|
S->P->fanout = 1;
|
||||||
|
S->P->depth = 1;
|
||||||
|
store32( &S->P->leaf_length, 0 );
|
||||||
|
store32( &S->P->node_offset, 0 );
|
||||||
|
store32( &S->P->xof_length, outlen );
|
||||||
|
S->P->node_depth = 0;
|
||||||
|
S->P->inner_length = 0;
|
||||||
|
memset( S->P->reserved, 0, sizeof( S->P->reserved ) );
|
||||||
|
memset( S->P->salt, 0, sizeof( S->P->salt ) );
|
||||||
|
memset( S->P->personal, 0, sizeof( S->P->personal ) );
|
||||||
|
|
||||||
|
if( blake2b_init_param( S->S, S->P ) < 0 ) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (keylen > 0) {
|
||||||
|
uint8_t block[BLAKE2B_BLOCKBYTES];
|
||||||
|
memset(block, 0, BLAKE2B_BLOCKBYTES);
|
||||||
|
memcpy(block, key, keylen);
|
||||||
|
blake2b_update(S->S, block, BLAKE2B_BLOCKBYTES);
|
||||||
|
secure_zero_memory(block, BLAKE2B_BLOCKBYTES);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int blake2xb_update( blake2xb_state *S, const void *in, size_t inlen ) {
|
||||||
|
return blake2b_update( S->S, in, inlen );
|
||||||
|
}
|
||||||
|
|
||||||
|
int blake2xb_final( blake2xb_state *S, void *out, size_t outlen) {
|
||||||
|
|
||||||
|
blake2b_state C[1];
|
||||||
|
blake2b_param P[1];
|
||||||
|
uint32_t xof_length = load32(&S->P->xof_length);
|
||||||
|
uint8_t root[BLAKE2B_BLOCKBYTES];
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
if (NULL == out) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* outlen must match the output size defined in xof_length, */
|
||||||
|
/* unless it was -1, in which case anything goes except 0. */
|
||||||
|
if(xof_length == 0xFFFFFFFFUL) {
|
||||||
|
if(outlen == 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if(outlen != xof_length) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Finalize the root hash */
|
||||||
|
if (blake2b_final(S->S, root, BLAKE2B_OUTBYTES) < 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set common block structure values */
|
||||||
|
/* Copy values from parent instance, and only change the ones below */
|
||||||
|
memcpy(P, S->P, sizeof(blake2b_param));
|
||||||
|
P->key_length = 0;
|
||||||
|
P->fanout = 0;
|
||||||
|
P->depth = 0;
|
||||||
|
store32(&P->leaf_length, BLAKE2B_OUTBYTES);
|
||||||
|
P->inner_length = BLAKE2B_OUTBYTES;
|
||||||
|
P->node_depth = 0;
|
||||||
|
|
||||||
|
for (i = 0; outlen > 0; ++i) {
|
||||||
|
const size_t block_size = (outlen < BLAKE2B_OUTBYTES) ? outlen : BLAKE2B_OUTBYTES;
|
||||||
|
/* Initialize state */
|
||||||
|
P->digest_length = block_size;
|
||||||
|
store32(&P->node_offset, i);
|
||||||
|
blake2b_init_param(C, P);
|
||||||
|
/* Process key if needed */
|
||||||
|
blake2b_update(C, root, BLAKE2B_OUTBYTES);
|
||||||
|
if (blake2b_final(C, (uint8_t *)out + i * BLAKE2B_OUTBYTES, block_size) < 0 ) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
outlen -= block_size;
|
||||||
|
}
|
||||||
|
secure_zero_memory(root, sizeof(root));
|
||||||
|
secure_zero_memory(P, sizeof(P));
|
||||||
|
secure_zero_memory(C, sizeof(C));
|
||||||
|
/* Put blake2xb in an invalid state? cf. blake2s_is_lastblock */
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
int blake2xb(void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen)
|
||||||
|
{
|
||||||
|
blake2xb_state S[1];
|
||||||
|
|
||||||
|
/* Verify parameters */
|
||||||
|
if (NULL == in && inlen > 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (NULL == out)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (NULL == key && keylen > 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (keylen > BLAKE2B_KEYBYTES)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (outlen == 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
/* Initialize the root block structure */
|
||||||
|
if (blake2xb_init_key(S, outlen, key, keylen) < 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Absorb the input message */
|
||||||
|
blake2xb_update(S, in, inlen);
|
||||||
|
|
||||||
|
/* Compute the root node of the tree and the final hash using the counter construction */
|
||||||
|
return blake2xb_final(S, out, outlen);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(BLAKE2XB_SELFTEST)
|
||||||
|
#include <string.h>
|
||||||
|
#include "blake2-kat.h"
|
||||||
|
int main( void )
|
||||||
|
{
|
||||||
|
uint8_t key[BLAKE2B_KEYBYTES];
|
||||||
|
uint8_t buf[BLAKE2_KAT_LENGTH];
|
||||||
|
size_t i, step, outlen;
|
||||||
|
|
||||||
|
for( i = 0; i < BLAKE2B_KEYBYTES; ++i ) {
|
||||||
|
key[i] = ( uint8_t )i;
|
||||||
|
}
|
||||||
|
|
||||||
|
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) {
|
||||||
|
buf[i] = ( uint8_t )i;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Testing length of outputs rather than inputs */
|
||||||
|
/* (Test of input lengths mostly covered by blake2b tests) */
|
||||||
|
|
||||||
|
/* Test simple API */
|
||||||
|
for( outlen = 1; outlen <= BLAKE2_KAT_LENGTH; ++outlen )
|
||||||
|
{
|
||||||
|
uint8_t hash[BLAKE2_KAT_LENGTH] = {0};
|
||||||
|
if( blake2xb( hash, outlen, buf, BLAKE2_KAT_LENGTH, key, BLAKE2B_KEYBYTES ) < 0 ) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( 0 != memcmp( hash, blake2xb_keyed_kat[outlen-1], outlen ) )
|
||||||
|
{
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Test streaming API */
|
||||||
|
for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) {
|
||||||
|
for (outlen = 1; outlen <= BLAKE2_KAT_LENGTH; ++outlen) {
|
||||||
|
uint8_t hash[BLAKE2_KAT_LENGTH];
|
||||||
|
blake2xb_state S;
|
||||||
|
uint8_t * p = buf;
|
||||||
|
size_t mlen = BLAKE2_KAT_LENGTH;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
if( (err = blake2xb_init_key(&S, outlen, key, BLAKE2B_KEYBYTES)) < 0 ) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (mlen >= step) {
|
||||||
|
if ( (err = blake2xb_update(&S, p, step)) < 0 ) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
mlen -= step;
|
||||||
|
p += step;
|
||||||
|
}
|
||||||
|
if ( (err = blake2xb_update(&S, p, mlen)) < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
if ( (err = blake2xb_final(&S, hash, outlen)) < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != memcmp(hash, blake2xb_keyed_kat[outlen-1], outlen)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
puts( "ok" );
|
||||||
|
return 0;
|
||||||
|
fail:
|
||||||
|
puts("error");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
239
3rd/BLAKE2/blake2xs-ref.c
Normal file
239
3rd/BLAKE2/blake2xs-ref.c
Normal file
@@ -0,0 +1,239 @@
|
|||||||
|
/*
|
||||||
|
BLAKE2 reference source code package - reference C implementations
|
||||||
|
|
||||||
|
Copyright 2016, JP Aumasson <jeanphilippe.aumasson@gmail.com>.
|
||||||
|
Copyright 2016, Samuel Neves <sneves@dei.uc.pt>.
|
||||||
|
|
||||||
|
You may use this under the terms of the CC0, the OpenSSL Licence, or
|
||||||
|
the Apache Public License 2.0, at your option. The terms of these
|
||||||
|
licenses can be found at:
|
||||||
|
|
||||||
|
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||||
|
- OpenSSL license : https://www.openssl.org/source/license.html
|
||||||
|
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
More information about the BLAKE2 hash function can be found at
|
||||||
|
https://blake2.net.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "blake2.h"
|
||||||
|
#include "blake2-impl.h"
|
||||||
|
|
||||||
|
int blake2xs_init( blake2xs_state *S, const size_t outlen ) {
|
||||||
|
return blake2xs_init_key(S, outlen, NULL, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int blake2xs_init_key( blake2xs_state *S, const size_t outlen, const void *key, size_t keylen )
|
||||||
|
{
|
||||||
|
if ( outlen == 0 || outlen > 0xFFFFUL ) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (NULL != key && keylen > BLAKE2S_KEYBYTES) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (NULL == key && keylen > 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Initialize parameter block */
|
||||||
|
S->P->digest_length = BLAKE2S_OUTBYTES;
|
||||||
|
S->P->key_length = keylen;
|
||||||
|
S->P->fanout = 1;
|
||||||
|
S->P->depth = 1;
|
||||||
|
store32( &S->P->leaf_length, 0 );
|
||||||
|
store32( &S->P->node_offset, 0 );
|
||||||
|
store16( &S->P->xof_length, outlen );
|
||||||
|
S->P->node_depth = 0;
|
||||||
|
S->P->inner_length = 0;
|
||||||
|
memset( S->P->salt, 0, sizeof( S->P->salt ) );
|
||||||
|
memset( S->P->personal, 0, sizeof( S->P->personal ) );
|
||||||
|
|
||||||
|
if( blake2s_init_param( S->S, S->P ) < 0 ) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (keylen > 0) {
|
||||||
|
uint8_t block[BLAKE2S_BLOCKBYTES];
|
||||||
|
memset(block, 0, BLAKE2S_BLOCKBYTES);
|
||||||
|
memcpy(block, key, keylen);
|
||||||
|
blake2s_update(S->S, block, BLAKE2S_BLOCKBYTES);
|
||||||
|
secure_zero_memory(block, BLAKE2S_BLOCKBYTES);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int blake2xs_update( blake2xs_state *S, const void *in, size_t inlen ) {
|
||||||
|
return blake2s_update( S->S, in, inlen );
|
||||||
|
}
|
||||||
|
|
||||||
|
int blake2xs_final(blake2xs_state *S, void *out, size_t outlen) {
|
||||||
|
|
||||||
|
blake2s_state C[1];
|
||||||
|
blake2s_param P[1];
|
||||||
|
uint16_t xof_length = load16(&S->P->xof_length);
|
||||||
|
uint8_t root[BLAKE2S_BLOCKBYTES];
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
if (NULL == out) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* outlen must match the output size defined in xof_length, */
|
||||||
|
/* unless it was -1, in which case anything goes except 0. */
|
||||||
|
if(xof_length == 0xFFFFUL) {
|
||||||
|
if(outlen == 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if(outlen != xof_length) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Finalize the root hash */
|
||||||
|
if (blake2s_final(S->S, root, BLAKE2S_OUTBYTES) < 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set common block structure values */
|
||||||
|
/* Copy values from parent instance, and only change the ones below */
|
||||||
|
memcpy(P, S->P, sizeof(blake2s_param));
|
||||||
|
P->key_length = 0;
|
||||||
|
P->fanout = 0;
|
||||||
|
P->depth = 0;
|
||||||
|
store32(&P->leaf_length, BLAKE2S_OUTBYTES);
|
||||||
|
P->inner_length = BLAKE2S_OUTBYTES;
|
||||||
|
P->node_depth = 0;
|
||||||
|
|
||||||
|
for (i = 0; outlen > 0; ++i) {
|
||||||
|
const size_t block_size = (outlen < BLAKE2S_OUTBYTES) ? outlen : BLAKE2S_OUTBYTES;
|
||||||
|
/* Initialize state */
|
||||||
|
P->digest_length = block_size;
|
||||||
|
store32(&P->node_offset, i);
|
||||||
|
blake2s_init_param(C, P);
|
||||||
|
/* Process key if needed */
|
||||||
|
blake2s_update(C, root, BLAKE2S_OUTBYTES);
|
||||||
|
if (blake2s_final(C, (uint8_t *)out + i * BLAKE2S_OUTBYTES, block_size) < 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
outlen -= block_size;
|
||||||
|
}
|
||||||
|
secure_zero_memory(root, sizeof(root));
|
||||||
|
secure_zero_memory(P, sizeof(P));
|
||||||
|
secure_zero_memory(C, sizeof(C));
|
||||||
|
/* Put blake2xs in an invalid state? cf. blake2s_is_lastblock */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int blake2xs(void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen)
|
||||||
|
{
|
||||||
|
blake2xs_state S[1];
|
||||||
|
|
||||||
|
/* Verify parameters */
|
||||||
|
if (NULL == in && inlen > 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (NULL == out)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (NULL == key && keylen > 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (keylen > BLAKE2S_KEYBYTES)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (outlen == 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
/* Initialize the root block structure */
|
||||||
|
if (blake2xs_init_key(S, outlen, key, keylen) < 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Absorb the input message */
|
||||||
|
blake2xs_update(S, in, inlen);
|
||||||
|
|
||||||
|
/* Compute the root node of the tree and the final hash using the counter construction */
|
||||||
|
return blake2xs_final(S, out, outlen);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(BLAKE2XS_SELFTEST)
|
||||||
|
#include <string.h>
|
||||||
|
#include "blake2-kat.h"
|
||||||
|
int main( void )
|
||||||
|
{
|
||||||
|
uint8_t key[BLAKE2S_KEYBYTES];
|
||||||
|
uint8_t buf[BLAKE2_KAT_LENGTH];
|
||||||
|
size_t i, step, outlen;
|
||||||
|
|
||||||
|
for( i = 0; i < BLAKE2S_KEYBYTES; ++i ) {
|
||||||
|
key[i] = ( uint8_t )i;
|
||||||
|
}
|
||||||
|
|
||||||
|
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) {
|
||||||
|
buf[i] = ( uint8_t )i;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Testing length of outputs rather than inputs */
|
||||||
|
/* (Test of input lengths mostly covered by blake2s tests) */
|
||||||
|
|
||||||
|
/* Test simple API */
|
||||||
|
for( outlen = 1; outlen <= BLAKE2_KAT_LENGTH; ++outlen )
|
||||||
|
{
|
||||||
|
uint8_t hash[BLAKE2_KAT_LENGTH] = {0};
|
||||||
|
if( blake2xs( hash, outlen, buf, BLAKE2_KAT_LENGTH, key, BLAKE2S_KEYBYTES ) < 0 ) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( 0 != memcmp( hash, blake2xs_keyed_kat[outlen-1], outlen ) )
|
||||||
|
{
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Test streaming API */
|
||||||
|
for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) {
|
||||||
|
for (outlen = 1; outlen <= BLAKE2_KAT_LENGTH; ++outlen) {
|
||||||
|
uint8_t hash[BLAKE2_KAT_LENGTH];
|
||||||
|
blake2xs_state S;
|
||||||
|
uint8_t * p = buf;
|
||||||
|
size_t mlen = BLAKE2_KAT_LENGTH;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
if( (err = blake2xs_init_key(&S, outlen, key, BLAKE2S_KEYBYTES)) < 0 ) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (mlen >= step) {
|
||||||
|
if ( (err = blake2xs_update(&S, p, step)) < 0 ) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
mlen -= step;
|
||||||
|
p += step;
|
||||||
|
}
|
||||||
|
if ( (err = blake2xs_update(&S, p, mlen)) < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
if ( (err = blake2xs_final(&S, hash, outlen)) < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != memcmp(hash, blake2xs_keyed_kat[outlen-1], outlen)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
puts( "ok" );
|
||||||
|
return 0;
|
||||||
|
fail:
|
||||||
|
puts("error");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
@@ -5,7 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define lapi_c
|
#define lapi_c
|
||||||
#define LUA_CORE
|
|
||||||
|
|
||||||
#include "lprefix.h"
|
#include "lprefix.h"
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define lcode_c
|
#define lcode_c
|
||||||
#define LUA_CORE
|
|
||||||
|
|
||||||
#include "lprefix.h"
|
#include "lprefix.h"
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define lctype_c
|
#define lctype_c
|
||||||
#define LUA_CORE
|
|
||||||
|
|
||||||
#include "lprefix.h"
|
#include "lprefix.h"
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define ldebug_c
|
#define ldebug_c
|
||||||
#define LUA_CORE
|
|
||||||
|
|
||||||
#include "lprefix.h"
|
#include "lprefix.h"
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define ldo_c
|
#define ldo_c
|
||||||
#define LUA_CORE
|
|
||||||
|
|
||||||
#include "lprefix.h"
|
#include "lprefix.h"
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define ldump_c
|
#define ldump_c
|
||||||
#define LUA_CORE
|
|
||||||
|
|
||||||
#include "lprefix.h"
|
#include "lprefix.h"
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define lfunc_c
|
#define lfunc_c
|
||||||
#define LUA_CORE
|
|
||||||
|
|
||||||
#include "lprefix.h"
|
#include "lprefix.h"
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define lgc_c
|
#define lgc_c
|
||||||
#define LUA_CORE
|
|
||||||
|
|
||||||
#include "lprefix.h"
|
#include "lprefix.h"
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define llex_c
|
#define llex_c
|
||||||
#define LUA_CORE
|
|
||||||
|
|
||||||
#include "lprefix.h"
|
#include "lprefix.h"
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define lmem_c
|
#define lmem_c
|
||||||
#define LUA_CORE
|
|
||||||
|
|
||||||
#include "lprefix.h"
|
#include "lprefix.h"
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define lobject_c
|
#define lobject_c
|
||||||
#define LUA_CORE
|
|
||||||
|
|
||||||
#include "lprefix.h"
|
#include "lprefix.h"
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define lopcodes_c
|
#define lopcodes_c
|
||||||
#define LUA_CORE
|
|
||||||
|
|
||||||
#include "lprefix.h"
|
#include "lprefix.h"
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define lparser_c
|
#define lparser_c
|
||||||
#define LUA_CORE
|
|
||||||
|
|
||||||
#include "lprefix.h"
|
#include "lprefix.h"
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define lstate_c
|
#define lstate_c
|
||||||
#define LUA_CORE
|
|
||||||
|
|
||||||
#include "lprefix.h"
|
#include "lprefix.h"
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define lstring_c
|
#define lstring_c
|
||||||
#define LUA_CORE
|
|
||||||
|
|
||||||
#include "lprefix.h"
|
#include "lprefix.h"
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define ltable_c
|
#define ltable_c
|
||||||
#define LUA_CORE
|
|
||||||
|
|
||||||
#include "lprefix.h"
|
#include "lprefix.h"
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define ltm_c
|
#define ltm_c
|
||||||
#define LUA_CORE
|
|
||||||
|
|
||||||
#include "lprefix.h"
|
#include "lprefix.h"
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define lundump_c
|
#define lundump_c
|
||||||
#define LUA_CORE
|
|
||||||
|
|
||||||
#include "lprefix.h"
|
#include "lprefix.h"
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define lvm_c
|
#define lvm_c
|
||||||
#define LUA_CORE
|
|
||||||
|
|
||||||
#include "lprefix.h"
|
#include "lprefix.h"
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define lzio_c
|
#define lzio_c
|
||||||
#define LUA_CORE
|
|
||||||
|
|
||||||
#include "lprefix.h"
|
#include "lprefix.h"
|
||||||
|
|
||||||
|
|||||||
1403
3rd/pcre2/CMakeLists.txt
Normal file
1403
3rd/pcre2/CMakeLists.txt
Normal file
@@ -0,0 +1,1403 @@
|
|||||||
|
# CMakeLists.txt
|
||||||
|
#
|
||||||
|
# This file enables PCRE2 to be built with the CMake configuration and build
|
||||||
|
# tool. Download CMake in source or binary form from http://www.cmake.org/
|
||||||
|
# Converted to support PCRE2 from the original PCRE file, August 2014.
|
||||||
|
#
|
||||||
|
# Original listfile by Christian Ehrlicher <Ch.Ehrlicher@gmx.de>
|
||||||
|
# Refined and expanded by Daniel Richard G. <skunk@iSKUNK.ORG>
|
||||||
|
# 2007-09-14 mod by Sheri so 7.4 supported configuration options can be entered
|
||||||
|
# 2007-09-19 Adjusted by PH to retain previous default settings
|
||||||
|
# 2007-12-26 (a) On UNIX, use names libpcre instead of just pcre
|
||||||
|
# (b) Ensure pcretest and pcregrep link with the local library,
|
||||||
|
# not a previously-installed one.
|
||||||
|
# (c) Add PCRE_SUPPORT_LIBREADLINE, PCRE_SUPPORT_LIBZ, and
|
||||||
|
# PCRE_SUPPORT_LIBBZ2.
|
||||||
|
# 2008-01-20 Brought up to date to include several new features by Christian
|
||||||
|
# Ehrlicher.
|
||||||
|
# 2008-01-22 Sheri added options for backward compatibility of library names
|
||||||
|
# when building with minGW:
|
||||||
|
# if "ON", NON_STANDARD_LIB_PREFIX causes shared libraries to
|
||||||
|
# be built without "lib" as prefix. (The libraries will be named
|
||||||
|
# pcre.dll, pcreposix.dll and pcrecpp.dll).
|
||||||
|
# if "ON", NON_STANDARD_LIB_SUFFIX causes shared libraries to
|
||||||
|
# be built with suffix of "-0.dll". (The libraries will be named
|
||||||
|
# libpcre-0.dll, libpcreposix-0.dll and libpcrecpp-0.dll - same names
|
||||||
|
# built by default with Configure and Make.
|
||||||
|
# 2008-01-23 PH removed the automatic build of pcredemo.
|
||||||
|
# 2008-04-22 PH modified READLINE support so it finds NCURSES when needed.
|
||||||
|
# 2008-07-03 PH updated for revised UCP property support (change of files)
|
||||||
|
# 2009-03-23 PH applied Steven Van Ingelgem's patch to change the name
|
||||||
|
# CMAKE_BINARY_DIR to PROJECT_BINARY_DIR so that it works when PCRE
|
||||||
|
# is included within another project.
|
||||||
|
# 2009-03-23 PH applied a modified version of Steven Van Ingelgem's patches to
|
||||||
|
# add options to stop the building of pcregrep and the tests, and
|
||||||
|
# to disable the final configuration report.
|
||||||
|
# 2009-04-11 PH applied Christian Ehrlicher's patch to show compiler flags that
|
||||||
|
# are set by specifying a release type.
|
||||||
|
# 2010-01-02 PH added test for stdint.h
|
||||||
|
# 2010-03-02 PH added test for inttypes.h
|
||||||
|
# 2011-08-01 PH added PCREGREP_BUFSIZE
|
||||||
|
# 2011-08-22 PH added PCRE_SUPPORT_JIT
|
||||||
|
# 2011-09-06 PH modified WIN32 ADD_TEST line as suggested by Sergey Cherepanov
|
||||||
|
# 2011-09-06 PH added PCRE_SUPPORT_PCREGREP_JIT
|
||||||
|
# 2011-10-04 Sheri added support for including coff data in windows shared libraries
|
||||||
|
# compiled with MINGW if pcre.rc and/or pcreposix.rc are placed in
|
||||||
|
# the source dir by the user prior to building
|
||||||
|
# 2011-10-04 Sheri changed various add_test's to use exes' location built instead
|
||||||
|
# of DEBUG location only (likely only matters in MSVC)
|
||||||
|
# 2011-10-04 Sheri added scripts to provide needed variables to RunTest and
|
||||||
|
# RunGrepTest (used for UNIX and Msys)
|
||||||
|
# 2011-10-04 Sheri added scripts to provide needed variables and to execute
|
||||||
|
# RunTest.bat in Win32 (for effortless testing with "make test")
|
||||||
|
# 2011-10-04 Sheri Increased minimum required cmake version
|
||||||
|
# 2012-01-06 PH removed pcre_info.c and added pcre_string_utils.c
|
||||||
|
# 2012-01-10 Zoltan Herczeg added libpcre16 support
|
||||||
|
# 2012-01-13 Stephen Kelly added out of source build support
|
||||||
|
# 2012-01-17 PH applied Stephen Kelly's patch to parse the version data out
|
||||||
|
# of the configure.ac file
|
||||||
|
# 2012-02-26 PH added support for libedit
|
||||||
|
# 2012-09-06 PH added support for PCRE_EBCDIC_NL25
|
||||||
|
# 2012-09-08 ChPe added PCRE32 support
|
||||||
|
# 2012-10-23 PH added support for VALGRIND and GCOV
|
||||||
|
# 2012-12-08 PH added patch from Daniel Richard G to quash some MSVC warnings
|
||||||
|
# 2013-07-01 PH realized that the "support" for GCOV was a total nonsense and
|
||||||
|
# so it has been removed.
|
||||||
|
# 2013-10-08 PH got rid of the "source" command, which is a bash-ism (use ".")
|
||||||
|
# 2013-11-05 PH added support for PARENS_NEST_LIMIT
|
||||||
|
# 2014-08-29 PH converted the file for PCRE2 (which has no C++).
|
||||||
|
# 2015-04-24 PH added support for PCRE2_DEBUG
|
||||||
|
# 2015-07-16 PH updated for new pcre2_find_bracket source module
|
||||||
|
# 2015-08-24 PH correct C_FLAGS setting (patch from Roy Ivy III)
|
||||||
|
# 2015-10=16 PH added support for never-backslash-C
|
||||||
|
# 2016-03-01 PH applied Chris Wilson's patch for MSVC static
|
||||||
|
# 2016-06-24 PH applied Chris Wilson's second patch, putting the first under
|
||||||
|
# a new option instead of being unconditional.
|
||||||
|
# 2016-10-05 PH fixed a typo (PCRE should be PCRE2) in above patch
|
||||||
|
# fix by David Gaussmann
|
||||||
|
# 2016-10-07 PH added PCREGREP_MAX_BUFSIZE
|
||||||
|
# 2017-03-11 PH turned HEAP_MATCH_RECURSE into a NO-OP for 10.30
|
||||||
|
# 2017-04-08 PH added HEAP_LIMIT
|
||||||
|
# 2017-06-15 ZH added SUPPORT_JIT_SEALLOC support
|
||||||
|
# 2018-06-19 PH added checks for stdint.h and inttypes.h (later removed)
|
||||||
|
# 2018-06-27 PH added Daniel's patch to increase the stack for MSVC
|
||||||
|
# 2018-11-14 PH removed unnecessary checks for stdint.h and inttypes.h
|
||||||
|
# 2018-11-16 PH added PCRE2GREP_SUPPORT_CALLOUT_FORK support and tidied
|
||||||
|
# 2019-02-16 PH hacked to avoid CMP0026 policy issue (see comments below)
|
||||||
|
# 2020-03-16 PH renamed dftables as pcre2_dftables (as elsewhere)
|
||||||
|
# 2020-03-24 PH changed CMAKE_MODULE_PATH definition to add, not replace
|
||||||
|
# 2020-04-08 Carlo added function check for secure_getenv, fixed strerror
|
||||||
|
# 2020-04-16 enh added check for __attribute__((uninitialized))
|
||||||
|
# 2020-04-25 PH applied patches from Uwe Korn to support pkg-config and
|
||||||
|
# library versioning.
|
||||||
|
# 2020-04-25 Carlo added function check for mkostemp used in ProtExecAllocator
|
||||||
|
# 2020-04-28 PH added function check for memfd_create based on Carlo's patch
|
||||||
|
# 2020-05-25 PH added a check for Intel CET
|
||||||
|
# 2020-12-03 PH altered the definition of pcre2test as suggested by Daniel
|
||||||
|
# 2021-06-29 JWSB added the option to build static library with PIC.
|
||||||
|
# 2021-07-05 JWSB modified such both the static and shared library can be
|
||||||
|
# build in one go.
|
||||||
|
# 2021-08-28 PH increased minimum version
|
||||||
|
# 2021-08-28 PH added test for realpath()
|
||||||
|
# 2022-12-10 PH added support for pcre2posix_test
|
||||||
|
# 2023-01-15 Carlo added C99 as the minimum required
|
||||||
|
# 2023-08-06 PH added support for setting variable length lookbehind maximum
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
# We have used `gersemi` for auto-formatting our CMake files.
|
||||||
|
# Applied to all CMake files using:
|
||||||
|
# > pip3 install gersemi
|
||||||
|
# > gersemi --in-place --line-length 120 --indent 2 \
|
||||||
|
# ./CMakeLists.txt ./cmake/*.cmake ./cmake/*.cmake.in
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
# Increased minimum to 3.15 to allow use of string(REPEAT).
|
||||||
|
cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
|
||||||
|
project(PCRE2 C)
|
||||||
|
set(CMAKE_C_STANDARD 99)
|
||||||
|
set(CMAKE_C_STANDARD_REQUIRED TRUE)
|
||||||
|
|
||||||
|
set(CMAKE_C_VISIBILITY_PRESET hidden)
|
||||||
|
cmake_policy(SET CMP0063 NEW)
|
||||||
|
|
||||||
|
# Set policy CMP0026 to avoid warnings for the use of LOCATION in
|
||||||
|
# GET_TARGET_PROPERTY. This should no longer be required.
|
||||||
|
# CMAKE_POLICY(SET CMP0026 OLD)
|
||||||
|
|
||||||
|
# With a recent cmake, you can provide a rootdir to look for non
|
||||||
|
# standard installed library dependencies, but to do so, the policy
|
||||||
|
# needs to be set to new (by uncommenting the following)
|
||||||
|
# CMAKE_POLICY(SET CMP0074 NEW)
|
||||||
|
|
||||||
|
# For FindReadline.cmake. This was changed to allow setting CMAKE_MODULE_PATH
|
||||||
|
# on the command line.
|
||||||
|
# SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||||
|
|
||||||
|
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||||
|
|
||||||
|
include_directories(${PROJECT_SOURCE_DIR}/src)
|
||||||
|
|
||||||
|
# external packages
|
||||||
|
find_package(BZip2)
|
||||||
|
find_package(ZLIB)
|
||||||
|
find_package(Readline)
|
||||||
|
find_package(Editline)
|
||||||
|
|
||||||
|
# Configuration checks
|
||||||
|
|
||||||
|
include(CheckCSourceCompiles)
|
||||||
|
include(CheckFunctionExists)
|
||||||
|
include(CheckSymbolExists)
|
||||||
|
include(CheckIncludeFile)
|
||||||
|
include(CheckTypeSize)
|
||||||
|
include(GNUInstallDirs) # for CMAKE_INSTALL_LIBDIR
|
||||||
|
|
||||||
|
check_include_file(assert.h HAVE_ASSERT_H)
|
||||||
|
check_include_file(dirent.h HAVE_DIRENT_H)
|
||||||
|
check_include_file(sys/stat.h HAVE_SYS_STAT_H)
|
||||||
|
check_include_file(sys/types.h HAVE_SYS_TYPES_H)
|
||||||
|
check_include_file(unistd.h HAVE_UNISTD_H)
|
||||||
|
check_include_file(windows.h HAVE_WINDOWS_H)
|
||||||
|
|
||||||
|
check_symbol_exists(bcopy "strings.h" HAVE_BCOPY)
|
||||||
|
check_symbol_exists(memfd_create "sys/mman.h" HAVE_MEMFD_CREATE)
|
||||||
|
check_symbol_exists(memmove "string.h" HAVE_MEMMOVE)
|
||||||
|
check_symbol_exists(secure_getenv "stdlib.h" HAVE_SECURE_GETENV)
|
||||||
|
check_symbol_exists(strerror "string.h" HAVE_STRERROR)
|
||||||
|
|
||||||
|
check_c_source_compiles(
|
||||||
|
[=[
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <limits.h>
|
||||||
|
int main(int c, char *v[]) { char buf[PATH_MAX]; realpath(v[c], buf); return 0; }
|
||||||
|
]=]
|
||||||
|
HAVE_REALPATH
|
||||||
|
)
|
||||||
|
|
||||||
|
set(ORIG_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
|
||||||
|
if(NOT MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "XL")
|
||||||
|
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
check_c_source_compiles(
|
||||||
|
"int main(void) { char buf[128] __attribute__((uninitialized)); (void)buf; return 0; }"
|
||||||
|
HAVE_ATTRIBUTE_UNINITIALIZED
|
||||||
|
)
|
||||||
|
|
||||||
|
check_c_source_compiles(
|
||||||
|
[=[
|
||||||
|
extern __attribute__ ((visibility ("default"))) int f(void);
|
||||||
|
int main(void) { return f(); }
|
||||||
|
int f(void) { return 42; }
|
||||||
|
]=]
|
||||||
|
HAVE_VISIBILITY
|
||||||
|
)
|
||||||
|
|
||||||
|
set(CMAKE_REQUIRED_FLAGS ${ORIG_CMAKE_REQUIRED_FLAGS})
|
||||||
|
|
||||||
|
check_c_source_compiles("int main(void) { __assume(1); return 0; }" HAVE_BUILTIN_ASSUME)
|
||||||
|
|
||||||
|
check_c_source_compiles(
|
||||||
|
[=[
|
||||||
|
#include <stddef.h>
|
||||||
|
int main(void) { int a,b; size_t m; __builtin_mul_overflow(a,b,&m); return 0; }
|
||||||
|
]=]
|
||||||
|
HAVE_BUILTIN_MUL_OVERFLOW
|
||||||
|
)
|
||||||
|
|
||||||
|
check_c_source_compiles(
|
||||||
|
"int main(int c, char *v[]) { if (c) __builtin_unreachable(); return (int)(*v[0]); }"
|
||||||
|
HAVE_BUILTIN_UNREACHABLE
|
||||||
|
)
|
||||||
|
|
||||||
|
if(HAVE_VISIBILITY)
|
||||||
|
set(PCRE2_EXPORT [=[__attribute__ ((visibility ("default")))]=])
|
||||||
|
else()
|
||||||
|
set(PCRE2_EXPORT)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Check whether Intel CET is enabled, and if so, adjust compiler flags. This
|
||||||
|
# code was written by PH, trying to imitate the logic from the autotools
|
||||||
|
# configuration.
|
||||||
|
|
||||||
|
check_c_source_compiles(
|
||||||
|
[=[
|
||||||
|
#ifndef __CET__
|
||||||
|
#error CET is not enabled
|
||||||
|
#endif
|
||||||
|
int main() { return 0; }
|
||||||
|
]=]
|
||||||
|
INTEL_CET_ENABLED
|
||||||
|
)
|
||||||
|
|
||||||
|
if(INTEL_CET_ENABLED)
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mshstk")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# User-configurable options
|
||||||
|
#
|
||||||
|
# Note: CMakeSetup displays these in alphabetical order, regardless of
|
||||||
|
# the order we use here.
|
||||||
|
|
||||||
|
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries.")
|
||||||
|
|
||||||
|
option(BUILD_STATIC_LIBS "Build static libraries." ON)
|
||||||
|
|
||||||
|
option(PCRE2_BUILD_PCRE2_8 "Build 8 bit PCRE2 library" ON)
|
||||||
|
|
||||||
|
option(PCRE2_BUILD_PCRE2_16 "Build 16 bit PCRE2 library" OFF)
|
||||||
|
|
||||||
|
option(PCRE2_BUILD_PCRE2_32 "Build 32 bit PCRE2 library" OFF)
|
||||||
|
|
||||||
|
option(PCRE2_STATIC_PIC "Build the static library with the option position independent code enabled." OFF)
|
||||||
|
|
||||||
|
set(PCRE2_DEBUG "IfDebugBuild" CACHE STRING "Include debugging code")
|
||||||
|
set_property(CACHE PCRE2_DEBUG PROPERTY STRINGS "IfDebugBuild" "ON" "OFF")
|
||||||
|
|
||||||
|
option(PCRE2_DISABLE_PERCENT_ZT "Disable the use of %zu and %td (rarely needed)" OFF)
|
||||||
|
|
||||||
|
set(
|
||||||
|
PCRE2_EBCDIC
|
||||||
|
OFF
|
||||||
|
CACHE BOOL
|
||||||
|
"Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems.)"
|
||||||
|
)
|
||||||
|
|
||||||
|
set(PCRE2_EBCDIC_NL25 OFF CACHE BOOL "Use 0x25 as EBCDIC NL character instead of 0x15; implies EBCDIC.")
|
||||||
|
|
||||||
|
set(
|
||||||
|
PCRE2_LINK_SIZE
|
||||||
|
"2"
|
||||||
|
CACHE STRING
|
||||||
|
"Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details."
|
||||||
|
)
|
||||||
|
|
||||||
|
set(
|
||||||
|
PCRE2_PARENS_NEST_LIMIT
|
||||||
|
"250"
|
||||||
|
CACHE STRING
|
||||||
|
"Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details."
|
||||||
|
)
|
||||||
|
|
||||||
|
set(
|
||||||
|
PCRE2_HEAP_LIMIT
|
||||||
|
"20000000"
|
||||||
|
CACHE STRING
|
||||||
|
"Default limit on heap memory (kibibytes). See HEAP_LIMIT in config.h.in for details."
|
||||||
|
)
|
||||||
|
|
||||||
|
set(PCRE2_MAX_VARLOOKBEHIND "255" CACHE STRING "Default limit on variable lookbehinds.")
|
||||||
|
|
||||||
|
set(
|
||||||
|
PCRE2_MATCH_LIMIT
|
||||||
|
"10000000"
|
||||||
|
CACHE STRING
|
||||||
|
"Default limit on internal looping. See MATCH_LIMIT in config.h.in for details."
|
||||||
|
)
|
||||||
|
|
||||||
|
set(
|
||||||
|
PCRE2_MATCH_LIMIT_DEPTH
|
||||||
|
"MATCH_LIMIT"
|
||||||
|
CACHE STRING
|
||||||
|
"Default limit on internal depth of search. See MATCH_LIMIT_DEPTH in config.h.in for details."
|
||||||
|
)
|
||||||
|
|
||||||
|
set(
|
||||||
|
PCRE2GREP_BUFSIZE
|
||||||
|
"20480"
|
||||||
|
CACHE STRING
|
||||||
|
"Buffer starting size parameter for pcre2grep. See PCRE2GREP_BUFSIZE in config.h.in for details."
|
||||||
|
)
|
||||||
|
|
||||||
|
set(
|
||||||
|
PCRE2GREP_MAX_BUFSIZE
|
||||||
|
"1048576"
|
||||||
|
CACHE STRING
|
||||||
|
"Buffer maximum size parameter for pcre2grep. See PCRE2GREP_MAX_BUFSIZE in config.h.in for details."
|
||||||
|
)
|
||||||
|
|
||||||
|
set(PCRE2_NEWLINE "LF" CACHE STRING "What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF, NUL).")
|
||||||
|
|
||||||
|
set(PCRE2_HEAP_MATCH_RECURSE OFF CACHE BOOL "Obsolete option: do not use")
|
||||||
|
|
||||||
|
set(PCRE2_SUPPORT_JIT OFF CACHE BOOL "Enable support for Just-in-time compiling.")
|
||||||
|
|
||||||
|
if(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
|
||||||
|
set(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL "Enable SELinux compatible execmem allocator in JIT (experimental).")
|
||||||
|
else()
|
||||||
|
set(PCRE2_SUPPORT_JIT_SEALLOC IGNORE)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(PCRE2GREP_SUPPORT_JIT ON CACHE BOOL "Enable use of Just-in-time compiling in pcre2grep.")
|
||||||
|
|
||||||
|
set(PCRE2GREP_SUPPORT_CALLOUT ON CACHE BOOL "Enable callout string support in pcre2grep.")
|
||||||
|
|
||||||
|
set(PCRE2GREP_SUPPORT_CALLOUT_FORK ON CACHE BOOL "Enable callout string fork support in pcre2grep.")
|
||||||
|
|
||||||
|
set(PCRE2_SUPPORT_UNICODE ON CACHE BOOL "Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.")
|
||||||
|
|
||||||
|
set(
|
||||||
|
PCRE2_SUPPORT_BSR_ANYCRLF
|
||||||
|
OFF
|
||||||
|
CACHE BOOL
|
||||||
|
"ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks"
|
||||||
|
)
|
||||||
|
|
||||||
|
set(PCRE2_NEVER_BACKSLASH_C OFF CACHE BOOL "If ON, backslash-C (upper case C) is locked out.")
|
||||||
|
|
||||||
|
set(PCRE2_SUPPORT_VALGRIND OFF CACHE BOOL "Enable Valgrind support.")
|
||||||
|
|
||||||
|
option(PCRE2_SHOW_REPORT "Show the final configuration report" ON)
|
||||||
|
option(PCRE2_BUILD_PCRE2GREP "Build pcre2grep" ON)
|
||||||
|
option(PCRE2_BUILD_TESTS "Build the tests" ON)
|
||||||
|
|
||||||
|
set(
|
||||||
|
PCRE2_INSTALL_CMAKEDIR
|
||||||
|
"${CMAKE_INSTALL_LIBDIR}/cmake/pcre2"
|
||||||
|
CACHE STRING
|
||||||
|
"Path used during CMake install for placing PCRE2's CMake config files, relative to the installation root (prefix)"
|
||||||
|
)
|
||||||
|
|
||||||
|
if(MINGW)
|
||||||
|
option(
|
||||||
|
NON_STANDARD_LIB_PREFIX
|
||||||
|
"ON=Shared libraries built in mingw will be named pcre2.dll, etc., instead of libpcre2.dll, etc."
|
||||||
|
OFF
|
||||||
|
)
|
||||||
|
|
||||||
|
option(
|
||||||
|
NON_STANDARD_LIB_SUFFIX
|
||||||
|
"ON=Shared libraries built in mingw will be named libpcre2-0.dll, etc., instead of libpcre2.dll, etc."
|
||||||
|
OFF
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MSVC)
|
||||||
|
option(PCRE2_STATIC_RUNTIME "ON=Compile against the static runtime (/MT)." OFF)
|
||||||
|
option(INSTALL_MSVC_PDB "ON=Install .pdb files built by MSVC, if generated" OFF)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# bzip2 lib
|
||||||
|
if(BZIP2_FOUND)
|
||||||
|
option(PCRE2_SUPPORT_LIBBZ2 "Enable support for linking pcre2grep with libbz2." ON)
|
||||||
|
endif()
|
||||||
|
if(PCRE2_SUPPORT_LIBBZ2)
|
||||||
|
include_directories(${BZIP2_INCLUDE_DIR})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# zlib
|
||||||
|
if(ZLIB_FOUND)
|
||||||
|
option(PCRE2_SUPPORT_LIBZ "Enable support for linking pcre2grep with libz." ON)
|
||||||
|
endif()
|
||||||
|
if(PCRE2_SUPPORT_LIBZ)
|
||||||
|
include_directories(${ZLIB_INCLUDE_DIR})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# editline lib
|
||||||
|
if(EDITLINE_FOUND)
|
||||||
|
option(PCRE2_SUPPORT_LIBEDIT "Enable support for linking pcre2test with libedit." OFF)
|
||||||
|
endif()
|
||||||
|
if(EDITLINE_FOUND)
|
||||||
|
if(PCRE2_SUPPORT_LIBEDIT)
|
||||||
|
include_directories(${EDITLINE_INCLUDE_DIR})
|
||||||
|
endif()
|
||||||
|
else()
|
||||||
|
if(PCRE2_SUPPORT_LIBEDIT)
|
||||||
|
message(
|
||||||
|
FATAL_ERROR
|
||||||
|
" libedit not found, set EDITLINE_INCLUDE_DIR to a compatible header\n"
|
||||||
|
" or set Editline_ROOT to a full libedit installed tree, as needed\n"
|
||||||
|
" Might need to enable policy CMP0074 in CMakeLists.txt"
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# readline lib
|
||||||
|
if(READLINE_FOUND)
|
||||||
|
option(PCRE2_SUPPORT_LIBREADLINE "Enable support for linking pcre2test with libreadline." ON)
|
||||||
|
endif()
|
||||||
|
if(PCRE2_SUPPORT_LIBREADLINE)
|
||||||
|
include_directories(${READLINE_INCLUDE_DIR})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Prepare build configuration
|
||||||
|
|
||||||
|
if(NOT BUILD_SHARED_LIBS AND NOT BUILD_STATIC_LIBS)
|
||||||
|
message(FATAL_ERROR "At least one of BUILD_SHARED_LIBS or BUILD_STATIC_LIBS must be enabled.")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(NOT PCRE2_BUILD_PCRE2_8 AND NOT PCRE2_BUILD_PCRE2_16 AND NOT PCRE2_BUILD_PCRE2_32)
|
||||||
|
message(
|
||||||
|
FATAL_ERROR
|
||||||
|
"At least one of PCRE2_BUILD_PCRE2_8, PCRE2_BUILD_PCRE2_16 or PCRE2_BUILD_PCRE2_32 must be enabled"
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_BUILD_PCRE2_8)
|
||||||
|
set(SUPPORT_PCRE2_8 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_BUILD_PCRE2_16)
|
||||||
|
set(SUPPORT_PCRE2_16 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_BUILD_PCRE2_32)
|
||||||
|
set(SUPPORT_PCRE2_32 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8)
|
||||||
|
message(STATUS "** PCRE2_BUILD_PCRE2_8 must be enabled for the pcre2grep program")
|
||||||
|
set(PCRE2_BUILD_PCRE2GREP OFF)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT)
|
||||||
|
if(READLINE_FOUND)
|
||||||
|
message(
|
||||||
|
FATAL_ERROR
|
||||||
|
" Only one of the readline compatible libraries can be enabled.\n"
|
||||||
|
" Disable libreadline with -DPCRE2_SUPPORT_LIBREADLINE=OFF"
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_SUPPORT_BSR_ANYCRLF)
|
||||||
|
set(BSR_ANYCRLF 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_NEVER_BACKSLASH_C)
|
||||||
|
set(NEVER_BACKSLASH_C 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_SUPPORT_UNICODE)
|
||||||
|
set(SUPPORT_UNICODE 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_SUPPORT_JIT)
|
||||||
|
set(SUPPORT_JIT 1)
|
||||||
|
if(UNIX)
|
||||||
|
find_package(Threads REQUIRED)
|
||||||
|
if(CMAKE_USE_PTHREADS_INIT)
|
||||||
|
set(REQUIRE_PTHREAD 1)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_SUPPORT_JIT_SEALLOC)
|
||||||
|
set(CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE)
|
||||||
|
check_symbol_exists(mkostemp stdlib.h REQUIRED)
|
||||||
|
unset(CMAKE_REQUIRED_DEFINITIONS)
|
||||||
|
if(${REQUIRED})
|
||||||
|
if(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
|
||||||
|
add_compile_definitions(_GNU_SOURCE)
|
||||||
|
set(SLJIT_PROT_EXECUTABLE_ALLOCATOR 1)
|
||||||
|
else()
|
||||||
|
message(FATAL_ERROR "Your configuration is not supported")
|
||||||
|
endif()
|
||||||
|
else()
|
||||||
|
set(PCRE2_SUPPORT_JIT_SEALLOC OFF)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2GREP_SUPPORT_JIT)
|
||||||
|
set(SUPPORT_PCRE2GREP_JIT 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2GREP_SUPPORT_CALLOUT)
|
||||||
|
set(SUPPORT_PCRE2GREP_CALLOUT 1)
|
||||||
|
if(PCRE2GREP_SUPPORT_CALLOUT_FORK)
|
||||||
|
set(SUPPORT_PCRE2GREP_CALLOUT_FORK 1)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_SUPPORT_VALGRIND)
|
||||||
|
set(SUPPORT_VALGRIND 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_DISABLE_PERCENT_ZT)
|
||||||
|
set(DISABLE_PERCENT_ZT 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# This next one used to reference ${READLINE_LIBRARY})
|
||||||
|
# but I was advised to add the NCURSES test as well, along with
|
||||||
|
# some modifications to cmake/FindReadline.cmake which should
|
||||||
|
# make it possible to override the default if necessary. PH
|
||||||
|
|
||||||
|
if(PCRE2_SUPPORT_LIBREADLINE)
|
||||||
|
set(SUPPORT_LIBREADLINE 1)
|
||||||
|
set(PCRE2TEST_LIBS ${READLINE_LIBRARY} ${NCURSES_LIBRARY})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# libedit is a plug-compatible alternative to libreadline
|
||||||
|
|
||||||
|
if(PCRE2_SUPPORT_LIBEDIT)
|
||||||
|
set(SUPPORT_LIBEDIT 1)
|
||||||
|
set(PCRE2TEST_LIBS ${EDITLINE_LIBRARY})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_SUPPORT_LIBZ)
|
||||||
|
set(SUPPORT_LIBZ 1)
|
||||||
|
set(PCRE2GREP_LIBS ${PCRE2GREP_LIBS} ${ZLIB_LIBRARIES})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_SUPPORT_LIBBZ2)
|
||||||
|
set(SUPPORT_LIBBZ2 1)
|
||||||
|
set(PCRE2GREP_LIBS ${PCRE2GREP_LIBS} ${BZIP2_LIBRARIES})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(NEWLINE_DEFAULT "")
|
||||||
|
|
||||||
|
if(PCRE2_NEWLINE STREQUAL "CR")
|
||||||
|
set(NEWLINE_DEFAULT "1")
|
||||||
|
endif()
|
||||||
|
if(PCRE2_NEWLINE STREQUAL "LF")
|
||||||
|
set(NEWLINE_DEFAULT "2")
|
||||||
|
endif()
|
||||||
|
if(PCRE2_NEWLINE STREQUAL "CRLF")
|
||||||
|
set(NEWLINE_DEFAULT "3")
|
||||||
|
endif()
|
||||||
|
if(PCRE2_NEWLINE STREQUAL "ANY")
|
||||||
|
set(NEWLINE_DEFAULT "4")
|
||||||
|
endif()
|
||||||
|
if(PCRE2_NEWLINE STREQUAL "ANYCRLF")
|
||||||
|
set(NEWLINE_DEFAULT "5")
|
||||||
|
endif()
|
||||||
|
if(PCRE2_NEWLINE STREQUAL "NUL")
|
||||||
|
set(NEWLINE_DEFAULT "6")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(NEWLINE_DEFAULT STREQUAL "")
|
||||||
|
message(
|
||||||
|
FATAL_ERROR
|
||||||
|
"The PCRE2_NEWLINE variable must be set to one of the following values: \"LF\", \"CR\", \"CRLF\", \"ANY\", \"ANYCRLF\"."
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_EBCDIC)
|
||||||
|
set(EBCDIC 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_EBCDIC_NL25)
|
||||||
|
set(EBCDIC 1)
|
||||||
|
set(EBCDIC_NL25 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Output files
|
||||||
|
|
||||||
|
configure_file(config-cmake.h.in ${PROJECT_BINARY_DIR}/config.h @ONLY)
|
||||||
|
|
||||||
|
# Parse version numbers and date out of configure.ac
|
||||||
|
|
||||||
|
file(
|
||||||
|
STRINGS
|
||||||
|
${PROJECT_SOURCE_DIR}/configure.ac
|
||||||
|
configure_lines
|
||||||
|
LIMIT_COUNT
|
||||||
|
50 # Read only the first 50 lines of the file
|
||||||
|
)
|
||||||
|
|
||||||
|
set(
|
||||||
|
SEARCHED_VARIABLES
|
||||||
|
"pcre2_major"
|
||||||
|
"pcre2_minor"
|
||||||
|
"pcre2_prerelease"
|
||||||
|
"pcre2_date"
|
||||||
|
"libpcre2_posix_version"
|
||||||
|
"libpcre2_8_version"
|
||||||
|
"libpcre2_16_version"
|
||||||
|
"libpcre2_32_version"
|
||||||
|
)
|
||||||
|
foreach(configure_line ${configure_lines})
|
||||||
|
foreach(substitution_variable ${SEARCHED_VARIABLES})
|
||||||
|
string(TOUPPER ${substitution_variable} substitution_variable_upper)
|
||||||
|
if(NOT ${substitution_variable_upper})
|
||||||
|
string(REGEX MATCH "m4_define\\(${substitution_variable}, *\\[(.*)\\]" MATCHED_STRING ${configure_line})
|
||||||
|
if(CMAKE_MATCH_1)
|
||||||
|
set(${substitution_variable_upper} ${CMAKE_MATCH_1})
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
macro(PARSE_LIB_VERSION variable_prefix)
|
||||||
|
string(REPLACE ":" ";" ${variable_prefix}_VERSION_LIST ${${variable_prefix}_VERSION})
|
||||||
|
list(GET ${variable_prefix}_VERSION_LIST 0 ${variable_prefix}_VERSION_CURRENT)
|
||||||
|
list(GET ${variable_prefix}_VERSION_LIST 1 ${variable_prefix}_VERSION_REVISION)
|
||||||
|
list(GET ${variable_prefix}_VERSION_LIST 2 ${variable_prefix}_VERSION_AGE)
|
||||||
|
|
||||||
|
math(EXPR ${variable_prefix}_SOVERSION "${${variable_prefix}_VERSION_CURRENT} - ${${variable_prefix}_VERSION_AGE}")
|
||||||
|
math(EXPR ${variable_prefix}_MACHO_COMPATIBILITY_VERSION "${${variable_prefix}_VERSION_CURRENT} + 1")
|
||||||
|
math(EXPR ${variable_prefix}_MACHO_CURRENT_VERSION "${${variable_prefix}_VERSION_CURRENT} + 1")
|
||||||
|
set(
|
||||||
|
${variable_prefix}_MACHO_CURRENT_VERSION
|
||||||
|
"${${variable_prefix}_MACHO_CURRENT_VERSION}.${${variable_prefix}_VERSION_REVISION}}"
|
||||||
|
)
|
||||||
|
set(
|
||||||
|
${variable_prefix}_VERSION
|
||||||
|
"${${variable_prefix}_SOVERSION}.${${variable_prefix}_VERSION_AGE}.${${variable_prefix}_VERSION_REVISION}"
|
||||||
|
)
|
||||||
|
endmacro()
|
||||||
|
|
||||||
|
parse_lib_version(LIBPCRE2_POSIX)
|
||||||
|
parse_lib_version(LIBPCRE2_8)
|
||||||
|
parse_lib_version(LIBPCRE2_16)
|
||||||
|
parse_lib_version(LIBPCRE2_32)
|
||||||
|
|
||||||
|
configure_file(src/pcre2.h.in ${PROJECT_BINARY_DIR}/pcre2.h @ONLY)
|
||||||
|
|
||||||
|
# Make sure to not link debug libs
|
||||||
|
# against release libs and vice versa
|
||||||
|
if(WIN32)
|
||||||
|
set(CMAKE_DEBUG_POSTFIX "d")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Character table generation
|
||||||
|
|
||||||
|
option(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)
|
||||||
|
if(PCRE2_REBUILD_CHARTABLES)
|
||||||
|
add_executable(pcre2_dftables src/pcre2_dftables.c)
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT ${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||||
|
COMMAND pcre2_dftables
|
||||||
|
ARGS ${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||||
|
DEPENDS pcre2_dftables
|
||||||
|
COMMENT "Generating character tables (pcre2_chartables.c) for current locale"
|
||||||
|
VERBATIM
|
||||||
|
)
|
||||||
|
else()
|
||||||
|
configure_file(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.dist ${PROJECT_BINARY_DIR}/pcre2_chartables.c COPYONLY)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Source code
|
||||||
|
|
||||||
|
set(PCRE2_HEADERS ${PROJECT_BINARY_DIR}/pcre2.h)
|
||||||
|
|
||||||
|
set(
|
||||||
|
PCRE2_SOURCES
|
||||||
|
src/pcre2_auto_possess.c
|
||||||
|
${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||||
|
src/pcre2_chkdint.c
|
||||||
|
src/pcre2_compile.c
|
||||||
|
src/pcre2_compile_class.c
|
||||||
|
src/pcre2_config.c
|
||||||
|
src/pcre2_context.c
|
||||||
|
src/pcre2_convert.c
|
||||||
|
src/pcre2_dfa_match.c
|
||||||
|
src/pcre2_error.c
|
||||||
|
src/pcre2_extuni.c
|
||||||
|
src/pcre2_find_bracket.c
|
||||||
|
src/pcre2_jit_compile.c
|
||||||
|
src/pcre2_maketables.c
|
||||||
|
src/pcre2_match.c
|
||||||
|
src/pcre2_match_data.c
|
||||||
|
src/pcre2_newline.c
|
||||||
|
src/pcre2_ord2utf.c
|
||||||
|
src/pcre2_pattern_info.c
|
||||||
|
src/pcre2_script_run.c
|
||||||
|
src/pcre2_serialize.c
|
||||||
|
src/pcre2_string_utils.c
|
||||||
|
src/pcre2_study.c
|
||||||
|
src/pcre2_substitute.c
|
||||||
|
src/pcre2_substring.c
|
||||||
|
src/pcre2_tables.c
|
||||||
|
src/pcre2_ucd.c
|
||||||
|
src/pcre2_valid_utf.c
|
||||||
|
src/pcre2_xclass.c
|
||||||
|
)
|
||||||
|
|
||||||
|
set(PCRE2POSIX_HEADERS src/pcre2posix.h)
|
||||||
|
set(PCRE2POSIX_SOURCES src/pcre2posix.c)
|
||||||
|
|
||||||
|
if(MINGW AND BUILD_SHARED_LIBS)
|
||||||
|
if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT ${PROJECT_SOURCE_DIR}/pcre2.o PRE-LINK
|
||||||
|
COMMAND windres
|
||||||
|
ARGS pcre2.rc pcre2.o
|
||||||
|
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||||
|
COMMENT "Using pcre2 coff info in mingw build"
|
||||||
|
)
|
||||||
|
set(PCRE2_SOURCES ${PCRE2_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2.o)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT ${PROJECT_SOURCE_DIR}/pcre2posix.o PRE-LINK
|
||||||
|
COMMAND windres
|
||||||
|
ARGS pcre2posix.rc pcre2posix.o
|
||||||
|
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||||
|
COMMENT "Using pcre2posix coff info in mingw build"
|
||||||
|
)
|
||||||
|
set(PCRE2POSIX_SOURCES ${PCRE2POSIX_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2posix.o)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MSVC AND BUILD_SHARED_LIBS)
|
||||||
|
if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
|
||||||
|
set(PCRE2_SOURCES ${PCRE2_SOURCES} pcre2.rc)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
|
||||||
|
set(PCRE2POSIX_SOURCES ${PCRE2POSIX_SOURCES} pcre2posix.rc)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Fix static compilation with MSVC: https://bugs.exim.org/show_bug.cgi?id=1681
|
||||||
|
# This code was taken from the CMake wiki, not from WebM.
|
||||||
|
|
||||||
|
if(MSVC AND PCRE2_STATIC_RUNTIME)
|
||||||
|
message(STATUS "** MSVC and PCRE2_STATIC_RUNTIME: modifying compiler flags to use static runtime library")
|
||||||
|
foreach(
|
||||||
|
flag_var
|
||||||
|
CMAKE_C_FLAGS
|
||||||
|
CMAKE_C_FLAGS_DEBUG
|
||||||
|
CMAKE_C_FLAGS_RELEASE
|
||||||
|
CMAKE_C_FLAGS_MINSIZEREL
|
||||||
|
CMAKE_C_FLAGS_RELWITHDEBINFO
|
||||||
|
)
|
||||||
|
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
|
||||||
|
endforeach()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Build setup
|
||||||
|
|
||||||
|
add_compile_definitions(HAVE_CONFIG_H)
|
||||||
|
|
||||||
|
if(PCRE2_DEBUG STREQUAL "IfDebugBuild")
|
||||||
|
add_compile_definitions("$<$<CONFIG:Debug>:PCRE2_DEBUG>")
|
||||||
|
elseif(PCRE2_DEBUG)
|
||||||
|
add_compile_definitions("PCRE2_DEBUG")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MSVC)
|
||||||
|
add_compile_definitions(_CRT_SECURE_NO_DEPRECATE _CRT_SECURE_NO_WARNINGS)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(CMAKE_INCLUDE_CURRENT_DIR 1)
|
||||||
|
|
||||||
|
set(TARGETS)
|
||||||
|
|
||||||
|
# 8-bit library
|
||||||
|
|
||||||
|
if(PCRE2_BUILD_PCRE2_8)
|
||||||
|
if(BUILD_STATIC_LIBS)
|
||||||
|
add_library(pcre2-8-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||||
|
set_target_properties(
|
||||||
|
pcre2-8-static
|
||||||
|
PROPERTIES
|
||||||
|
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
|
||||||
|
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}"
|
||||||
|
MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}"
|
||||||
|
VERSION ${LIBPCRE2_8_VERSION}
|
||||||
|
SOVERSION ${LIBPCRE2_8_SOVERSION}
|
||||||
|
)
|
||||||
|
target_compile_definitions(pcre2-8-static PUBLIC PCRE2_STATIC)
|
||||||
|
target_include_directories(pcre2-8-static PUBLIC ${PROJECT_BINARY_DIR})
|
||||||
|
if(REQUIRE_PTHREAD)
|
||||||
|
target_link_libraries(pcre2-8-static Threads::Threads)
|
||||||
|
endif()
|
||||||
|
set(TARGETS ${TARGETS} pcre2-8-static)
|
||||||
|
add_library(pcre2-posix-static STATIC ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
|
||||||
|
set_target_properties(
|
||||||
|
pcre2-posix-static
|
||||||
|
PROPERTIES
|
||||||
|
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
|
||||||
|
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}"
|
||||||
|
MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}"
|
||||||
|
VERSION ${LIBPCRE2_POSIX_VERSION}
|
||||||
|
SOVERSION ${LIBPCRE2_POSIX_SOVERSION}
|
||||||
|
)
|
||||||
|
target_link_libraries(pcre2-posix-static pcre2-8-static)
|
||||||
|
target_include_directories(pcre2-posix-static PUBLIC ${PROJECT_SOURCE_DIR}/src)
|
||||||
|
set(TARGETS ${TARGETS} pcre2-posix-static)
|
||||||
|
|
||||||
|
if(MSVC)
|
||||||
|
set_target_properties(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8-static)
|
||||||
|
set_target_properties(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix-static)
|
||||||
|
else()
|
||||||
|
set_target_properties(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8)
|
||||||
|
set_target_properties(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix)
|
||||||
|
endif()
|
||||||
|
if(PCRE2_STATIC_PIC)
|
||||||
|
set_target_properties(pcre2-8-static pcre2-posix-static PROPERTIES POSITION_INDEPENDENT_CODE 1)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(BUILD_SHARED_LIBS)
|
||||||
|
add_library(pcre2-8-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||||
|
target_include_directories(pcre2-8-shared PUBLIC ${PROJECT_BINARY_DIR})
|
||||||
|
set_target_properties(
|
||||||
|
pcre2-8-shared
|
||||||
|
PROPERTIES
|
||||||
|
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
|
||||||
|
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}"
|
||||||
|
MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}"
|
||||||
|
VERSION ${LIBPCRE2_8_VERSION}
|
||||||
|
SOVERSION ${LIBPCRE2_8_SOVERSION}
|
||||||
|
OUTPUT_NAME pcre2-8
|
||||||
|
)
|
||||||
|
if(REQUIRE_PTHREAD)
|
||||||
|
target_link_libraries(pcre2-8-shared Threads::Threads)
|
||||||
|
endif()
|
||||||
|
set(TARGETS ${TARGETS} pcre2-8-shared)
|
||||||
|
set(DLL_PDB_FILES $<TARGET_PDB_FILE_DIR:pcre2-8-shared>/pcre2-8.pdb ${DLL_PDB_FILES})
|
||||||
|
set(DLL_PDB_DEBUG_FILES $<TARGET_PDB_FILE_DIR:pcre2-8-shared>/pcre2-8d.pdb ${DLL_PDB_DEBUG_FILES})
|
||||||
|
|
||||||
|
add_library(pcre2-posix-shared SHARED ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
|
||||||
|
target_include_directories(pcre2-posix-shared PUBLIC ${PROJECT_SOURCE_DIR}/src)
|
||||||
|
set_target_properties(
|
||||||
|
pcre2-posix-shared
|
||||||
|
PROPERTIES
|
||||||
|
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
|
||||||
|
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}"
|
||||||
|
MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}"
|
||||||
|
VERSION ${LIBPCRE2_POSIX_VERSION}
|
||||||
|
SOVERSION ${LIBPCRE2_POSIX_SOVERSION}
|
||||||
|
OUTPUT_NAME pcre2-posix
|
||||||
|
)
|
||||||
|
set(PCRE2POSIX_CFLAG "-DPCRE2POSIX_SHARED")
|
||||||
|
target_compile_definitions(pcre2-posix-shared PUBLIC ${PCRE2POSIX_CFLAG})
|
||||||
|
target_link_libraries(pcre2-posix-shared pcre2-8-shared)
|
||||||
|
set(TARGETS ${TARGETS} pcre2-posix-shared)
|
||||||
|
set(DLL_PDB_FILES $<TARGET_PDB_FILE_DIR:pcre2-posix-shared>/pcre2-posix.pdb ${DLL_PDB_FILES})
|
||||||
|
set(DLL_PDB_DEBUG_FILES $<TARGET_PDB_FILE_DIR:pcre2-posix-shared>/pcre2-posixd.pdb ${DLL_PDB_DEBUG_FILES})
|
||||||
|
|
||||||
|
if(MINGW)
|
||||||
|
if(NON_STANDARD_LIB_PREFIX)
|
||||||
|
set_target_properties(pcre2-8-shared pcre2-posix-shared PROPERTIES PREFIX "")
|
||||||
|
endif()
|
||||||
|
if(NON_STANDARD_LIB_SUFFIX)
|
||||||
|
set_target_properties(pcre2-8-shared pcre2-posix-shared PROPERTIES SUFFIX "-0.dll")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(BUILD_STATIC_LIBS)
|
||||||
|
add_library(pcre2-8 ALIAS pcre2-8-static)
|
||||||
|
add_library(pcre2-posix ALIAS pcre2-posix-static)
|
||||||
|
else()
|
||||||
|
add_library(pcre2-8 ALIAS pcre2-8-shared)
|
||||||
|
add_library(pcre2-posix ALIAS pcre2-posix-shared)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# 16-bit library
|
||||||
|
|
||||||
|
if(PCRE2_BUILD_PCRE2_16)
|
||||||
|
if(BUILD_STATIC_LIBS)
|
||||||
|
add_library(pcre2-16-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||||
|
target_include_directories(pcre2-16-static PUBLIC ${PROJECT_BINARY_DIR})
|
||||||
|
set_target_properties(
|
||||||
|
pcre2-16-static
|
||||||
|
PROPERTIES
|
||||||
|
UNITY_BUILD OFF
|
||||||
|
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16
|
||||||
|
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
|
||||||
|
MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
|
||||||
|
VERSION ${LIBPCRE2_16_VERSION}
|
||||||
|
SOVERSION ${LIBPCRE2_16_SOVERSION}
|
||||||
|
)
|
||||||
|
target_compile_definitions(pcre2-16-static PUBLIC PCRE2_STATIC)
|
||||||
|
if(REQUIRE_PTHREAD)
|
||||||
|
target_link_libraries(pcre2-16-static Threads::Threads)
|
||||||
|
endif()
|
||||||
|
set(TARGETS ${TARGETS} pcre2-16-static)
|
||||||
|
|
||||||
|
if(MSVC)
|
||||||
|
set_target_properties(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16-static)
|
||||||
|
else()
|
||||||
|
set_target_properties(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16)
|
||||||
|
endif()
|
||||||
|
if(PCRE2_STATIC_PIC)
|
||||||
|
set_target_properties(pcre2-16-static PROPERTIES POSITION_INDEPENDENT_CODE 1)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(BUILD_SHARED_LIBS)
|
||||||
|
add_library(pcre2-16-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||||
|
target_include_directories(pcre2-16-shared PUBLIC ${PROJECT_BINARY_DIR})
|
||||||
|
set_target_properties(
|
||||||
|
pcre2-16-shared
|
||||||
|
PROPERTIES
|
||||||
|
UNITY_BUILD OFF
|
||||||
|
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16
|
||||||
|
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
|
||||||
|
MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
|
||||||
|
VERSION ${LIBPCRE2_16_VERSION}
|
||||||
|
SOVERSION ${LIBPCRE2_16_SOVERSION}
|
||||||
|
OUTPUT_NAME pcre2-16
|
||||||
|
)
|
||||||
|
if(REQUIRE_PTHREAD)
|
||||||
|
target_link_libraries(pcre2-16-shared Threads::Threads)
|
||||||
|
endif()
|
||||||
|
set(TARGETS ${TARGETS} pcre2-16-shared)
|
||||||
|
set(DLL_PDB_FILES $<TARGET_PDB_FILE_DIR:pcre2-16-shared>/pcre2-16.pdb ${DLL_PDB_FILES})
|
||||||
|
set(DLL_PDB_DEBUG_FILES $<TARGET_PDB_FILE_DIR:pcre2-16-shared>/pcre2-16d.pdb ${DLL_PDB_DEBUG_FILES})
|
||||||
|
|
||||||
|
if(MINGW)
|
||||||
|
if(NON_STANDARD_LIB_PREFIX)
|
||||||
|
set_target_properties(pcre2-16-shared PROPERTIES PREFIX "")
|
||||||
|
endif()
|
||||||
|
if(NON_STANDARD_LIB_SUFFIX)
|
||||||
|
set_target_properties(pcre2-16-shared PROPERTIES SUFFIX "-0.dll")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(BUILD_STATIC_LIBS)
|
||||||
|
add_library(pcre2-16 ALIAS pcre2-16-static)
|
||||||
|
else()
|
||||||
|
add_library(pcre2-16 ALIAS pcre2-16-shared)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# 32-bit library
|
||||||
|
|
||||||
|
if(PCRE2_BUILD_PCRE2_32)
|
||||||
|
if(BUILD_STATIC_LIBS)
|
||||||
|
add_library(pcre2-32-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||||
|
target_include_directories(pcre2-32-static PUBLIC ${PROJECT_BINARY_DIR})
|
||||||
|
set_target_properties(
|
||||||
|
pcre2-32-static
|
||||||
|
PROPERTIES
|
||||||
|
UNITY_BUILD OFF
|
||||||
|
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32
|
||||||
|
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
|
||||||
|
MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
|
||||||
|
VERSION ${LIBPCRE2_32_VERSION}
|
||||||
|
SOVERSION ${LIBPCRE2_32_SOVERSION}
|
||||||
|
)
|
||||||
|
target_compile_definitions(pcre2-32-static PUBLIC PCRE2_STATIC)
|
||||||
|
if(REQUIRE_PTHREAD)
|
||||||
|
target_link_libraries(pcre2-32-static Threads::Threads)
|
||||||
|
endif()
|
||||||
|
set(TARGETS ${TARGETS} pcre2-32-static)
|
||||||
|
|
||||||
|
if(MSVC)
|
||||||
|
set_target_properties(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32-static)
|
||||||
|
else()
|
||||||
|
set_target_properties(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32)
|
||||||
|
endif()
|
||||||
|
if(PCRE2_STATIC_PIC)
|
||||||
|
set_target_properties(pcre2-32-static PROPERTIES POSITION_INDEPENDENT_CODE 1)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(BUILD_SHARED_LIBS)
|
||||||
|
add_library(pcre2-32-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||||
|
target_include_directories(pcre2-32-shared PUBLIC ${PROJECT_BINARY_DIR})
|
||||||
|
set_target_properties(
|
||||||
|
pcre2-32-shared
|
||||||
|
PROPERTIES
|
||||||
|
UNITY_BUILD OFF
|
||||||
|
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32
|
||||||
|
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
|
||||||
|
MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
|
||||||
|
VERSION ${LIBPCRE2_32_VERSION}
|
||||||
|
SOVERSION ${LIBPCRE2_32_SOVERSION}
|
||||||
|
OUTPUT_NAME pcre2-32
|
||||||
|
)
|
||||||
|
if(REQUIRE_PTHREAD)
|
||||||
|
target_link_libraries(pcre2-32-shared Threads::Threads)
|
||||||
|
endif()
|
||||||
|
set(TARGETS ${TARGETS} pcre2-32-shared)
|
||||||
|
set(DLL_PDB_FILES $<TARGET_PDB_FILE_DIR:pcre2-32-shared>/pcre2-32.pdb ${DLL_PDB_FILES})
|
||||||
|
set(DLL_PDB_DEBUG_FILES $<TARGET_PDB_FILE_DIR:pcre2-32-shared>/pcre2-32d.pdb ${DLL_PDB_DEBUG_FILES})
|
||||||
|
|
||||||
|
if(MINGW)
|
||||||
|
if(NON_STANDARD_LIB_PREFIX)
|
||||||
|
set_target_properties(pcre2-32-shared PROPERTIES PREFIX "")
|
||||||
|
endif()
|
||||||
|
if(NON_STANDARD_LIB_SUFFIX)
|
||||||
|
set_target_properties(pcre2-32-shared PROPERTIES SUFFIX "-0.dll")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(BUILD_STATIC_LIBS)
|
||||||
|
add_library(pcre2-32 ALIAS pcre2-32-static)
|
||||||
|
else()
|
||||||
|
add_library(pcre2-32 ALIAS pcre2-32-shared)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Generate pkg-config files
|
||||||
|
|
||||||
|
set(PACKAGE_VERSION "${PCRE2_MAJOR}.${PCRE2_MINOR}")
|
||||||
|
set(prefix ${CMAKE_INSTALL_PREFIX})
|
||||||
|
set(exec_prefix "\${prefix}")
|
||||||
|
set(libdir "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}")
|
||||||
|
set(includedir "\${prefix}/include")
|
||||||
|
if(WIN32 AND (CMAKE_BUILD_TYPE MATCHES Debug))
|
||||||
|
set(LIB_POSTFIX ${CMAKE_DEBUG_POSTFIX})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_BUILD_PCRE2_8)
|
||||||
|
configure_file(libpcre2-posix.pc.in libpcre2-posix.pc @ONLY)
|
||||||
|
list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-posix.pc")
|
||||||
|
configure_file(libpcre2-8.pc.in libpcre2-8.pc @ONLY)
|
||||||
|
list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-8.pc")
|
||||||
|
set(enable_pcre2_8 "yes")
|
||||||
|
else()
|
||||||
|
set(enable_pcre2_8 "no")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_BUILD_PCRE2_16)
|
||||||
|
configure_file(libpcre2-16.pc.in libpcre2-16.pc @ONLY)
|
||||||
|
list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-16.pc")
|
||||||
|
set(enable_pcre2_16 "yes")
|
||||||
|
else()
|
||||||
|
set(enable_pcre2_16 "no")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_BUILD_PCRE2_32)
|
||||||
|
configure_file(libpcre2-32.pc.in libpcre2-32.pc @ONLY)
|
||||||
|
list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-32.pc")
|
||||||
|
set(enable_pcre2_32 "yes")
|
||||||
|
else()
|
||||||
|
set(enable_pcre2_32 "no")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
configure_file(pcre2-config.in pcre2-config @ONLY NEWLINE_STYLE LF)
|
||||||
|
|
||||||
|
# Executables
|
||||||
|
|
||||||
|
if(PCRE2_BUILD_PCRE2GREP)
|
||||||
|
add_executable(pcre2grep src/pcre2grep.c)
|
||||||
|
set_property(TARGET pcre2grep PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
||||||
|
set(TARGETS ${TARGETS} pcre2grep)
|
||||||
|
target_link_libraries(pcre2grep pcre2-posix ${PCRE2GREP_LIBS})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Testing
|
||||||
|
|
||||||
|
if(PCRE2_BUILD_TESTS)
|
||||||
|
enable_testing()
|
||||||
|
|
||||||
|
set(PCRE2TEST_SOURCES src/pcre2test.c)
|
||||||
|
|
||||||
|
if(MSVC)
|
||||||
|
# This is needed to avoid a stack overflow error in the standard tests. The
|
||||||
|
# flag should be indicated with a forward-slash instead of a hyphen, but
|
||||||
|
# then CMake treats it as a file path.
|
||||||
|
set(PCRE2TEST_LINKER_FLAGS -STACK:2500000)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
add_executable(pcre2test ${PCRE2TEST_SOURCES})
|
||||||
|
set(TARGETS ${TARGETS} pcre2test)
|
||||||
|
if(PCRE2_BUILD_PCRE2_8)
|
||||||
|
list(APPEND PCRE2TEST_LIBS pcre2-posix pcre2-8)
|
||||||
|
endif()
|
||||||
|
if(PCRE2_BUILD_PCRE2_16)
|
||||||
|
list(APPEND PCRE2TEST_LIBS pcre2-16)
|
||||||
|
endif()
|
||||||
|
if(PCRE2_BUILD_PCRE2_32)
|
||||||
|
list(APPEND PCRE2TEST_LIBS pcre2-32)
|
||||||
|
endif()
|
||||||
|
target_link_libraries(pcre2test ${PCRE2TEST_LIBS} ${PCRE2TEST_LINKER_FLAGS})
|
||||||
|
|
||||||
|
if(PCRE2_BUILD_PCRE2_8)
|
||||||
|
add_executable(pcre2posix_test src/pcre2posix_test.c)
|
||||||
|
target_link_libraries(pcre2posix_test pcre2-posix pcre2-8)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_SUPPORT_JIT)
|
||||||
|
add_executable(pcre2_jit_test src/pcre2_jit_test.c)
|
||||||
|
set(PCRE2_JIT_TEST_LIBS)
|
||||||
|
if(PCRE2_BUILD_PCRE2_8)
|
||||||
|
list(APPEND PCRE2_JIT_TEST_LIBS pcre2-8)
|
||||||
|
endif()
|
||||||
|
if(PCRE2_BUILD_PCRE2_16)
|
||||||
|
list(APPEND PCRE2_JIT_TEST_LIBS pcre2-16)
|
||||||
|
endif()
|
||||||
|
if(PCRE2_BUILD_PCRE2_32)
|
||||||
|
list(APPEND PCRE2_JIT_TEST_LIBS pcre2-32)
|
||||||
|
endif()
|
||||||
|
target_link_libraries(pcre2_jit_test ${PCRE2_JIT_TEST_LIBS})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# =================================================
|
||||||
|
# Write out a CTest configuration file
|
||||||
|
#
|
||||||
|
file(
|
||||||
|
WRITE
|
||||||
|
${PROJECT_BINARY_DIR}/CTestCustom.ctest
|
||||||
|
"# This is a generated file.
|
||||||
|
MESSAGE(\"When testing is complete, review test output in the
|
||||||
|
\\\"${PROJECT_BINARY_DIR}/Testing/Temporary\\\" folder.\")
|
||||||
|
MESSAGE(\" \")
|
||||||
|
"
|
||||||
|
)
|
||||||
|
|
||||||
|
file(
|
||||||
|
WRITE
|
||||||
|
${PROJECT_BINARY_DIR}/pcre2_test.sh
|
||||||
|
"#! /bin/sh
|
||||||
|
# This is a generated file.
|
||||||
|
srcdir=${PROJECT_SOURCE_DIR}
|
||||||
|
pcre2test=${PROJECT_BINARY_DIR}/pcre2test
|
||||||
|
test -z \"$CMAKE_CONFIG_TYPE\" || pcre2test=${PROJECT_BINARY_DIR}/$CMAKE_CONFIG_TYPE/pcre2test
|
||||||
|
. ${PROJECT_SOURCE_DIR}/RunTest
|
||||||
|
if test \"$?\" != \"0\"; then exit 1; fi
|
||||||
|
# End
|
||||||
|
"
|
||||||
|
)
|
||||||
|
|
||||||
|
if(UNIX)
|
||||||
|
add_test(pcre2_test sh ${PROJECT_BINARY_DIR}/pcre2_test.sh)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_BUILD_PCRE2GREP)
|
||||||
|
file(
|
||||||
|
WRITE
|
||||||
|
${PROJECT_BINARY_DIR}/pcre2_grep_test.sh
|
||||||
|
"#! /bin/sh
|
||||||
|
# This is a generated file.
|
||||||
|
srcdir=${PROJECT_SOURCE_DIR}
|
||||||
|
pcre2grep=${PROJECT_BINARY_DIR}/pcre2grep
|
||||||
|
test -z \"$CMAKE_CONFIG_TYPE\" || pcre2grep=${PROJECT_BINARY_DIR}/$CMAKE_CONFIG_TYPE/pcre2grep
|
||||||
|
pcre2test=${PROJECT_BINARY_DIR}/pcre2test
|
||||||
|
test -z \"$CMAKE_CONFIG_TYPE\" || pcre2test=${PROJECT_BINARY_DIR}/$CMAKE_CONFIG_TYPE/pcre2test
|
||||||
|
. ${PROJECT_SOURCE_DIR}/RunGrepTest
|
||||||
|
if test \"$?\" != \"0\"; then exit 1; fi
|
||||||
|
# End
|
||||||
|
"
|
||||||
|
)
|
||||||
|
|
||||||
|
if(UNIX)
|
||||||
|
add_test(pcre2_grep_test sh ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(WIN32)
|
||||||
|
# Provide environment for executing the bat file version of RunTest
|
||||||
|
file(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} winsrc)
|
||||||
|
file(TO_NATIVE_PATH ${PROJECT_BINARY_DIR} winbin)
|
||||||
|
|
||||||
|
file(
|
||||||
|
WRITE
|
||||||
|
${PROJECT_BINARY_DIR}/pcre2_test.bat
|
||||||
|
"\@REM This is a generated file.
|
||||||
|
\@echo off
|
||||||
|
setlocal
|
||||||
|
SET srcdir=\"${winsrc}\"
|
||||||
|
SET pcre2test=\"${winbin}\\pcre2test.exe\"
|
||||||
|
if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2test=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2test.exe\"
|
||||||
|
call %srcdir%\\RunTest.bat
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo RunTest.bat tests successfully completed
|
||||||
|
"
|
||||||
|
)
|
||||||
|
|
||||||
|
add_test(NAME pcre2_test_bat COMMAND pcre2_test.bat)
|
||||||
|
set_tests_properties(pcre2_test_bat PROPERTIES PASS_REGULAR_EXPRESSION "RunTest\\.bat tests successfully completed")
|
||||||
|
|
||||||
|
if(PCRE2_BUILD_PCRE2GREP)
|
||||||
|
file(
|
||||||
|
WRITE
|
||||||
|
${PROJECT_BINARY_DIR}/pcre2_grep_test.bat
|
||||||
|
"\@REM This is a generated file.
|
||||||
|
\@echo off
|
||||||
|
setlocal
|
||||||
|
SET srcdir=\"${winsrc}\"
|
||||||
|
SET pcre2test=\"${winbin}\\pcre2test.exe\"
|
||||||
|
if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2test=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2test.exe\"
|
||||||
|
SET pcre2grep=\"${winbin}\\pcre2grep.exe\"
|
||||||
|
if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2grep=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2grep.exe\"
|
||||||
|
call %srcdir%\\RunGrepTest.bat
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo RunGrepTest.bat tests successfully completed
|
||||||
|
"
|
||||||
|
)
|
||||||
|
|
||||||
|
add_test(NAME pcre2_grep_test_bat COMMAND pcre2_grep_test.bat)
|
||||||
|
set_tests_properties(
|
||||||
|
pcre2_grep_test_bat
|
||||||
|
PROPERTIES PASS_REGULAR_EXPRESSION "RunGrepTest\\.bat tests successfully completed"
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if("$ENV{OSTYPE}" STREQUAL "msys")
|
||||||
|
# Both the sh and bat file versions of RunTest are run if make test is used
|
||||||
|
# in msys
|
||||||
|
add_test(pcre2_test_sh sh.exe ${PROJECT_BINARY_DIR}/pcre2_test.sh)
|
||||||
|
if(PCRE2_BUILD_PCRE2GREP)
|
||||||
|
add_test(pcre2_grep_test sh.exe ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Changed to accommodate testing whichever location was just built
|
||||||
|
|
||||||
|
if(PCRE2_SUPPORT_JIT)
|
||||||
|
add_test(pcre2_jit_test pcre2_jit_test)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_BUILD_PCRE2_8)
|
||||||
|
add_test(pcre2posix_test pcre2posix_test)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Installation
|
||||||
|
|
||||||
|
set(CMAKE_INSTALL_ALWAYS 1)
|
||||||
|
|
||||||
|
install(
|
||||||
|
TARGETS ${TARGETS}
|
||||||
|
RUNTIME DESTINATION bin
|
||||||
|
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||||
|
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||||
|
)
|
||||||
|
install(FILES ${pkg_config_files} DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
|
||||||
|
install(
|
||||||
|
FILES "${CMAKE_CURRENT_BINARY_DIR}/pcre2-config"
|
||||||
|
DESTINATION bin
|
||||||
|
# Set 0755 permissions
|
||||||
|
PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE
|
||||||
|
)
|
||||||
|
|
||||||
|
install(FILES ${PCRE2_HEADERS} ${PCRE2POSIX_HEADERS} DESTINATION include)
|
||||||
|
|
||||||
|
# CMake config files.
|
||||||
|
set(PCRE2_CONFIG_IN ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config.cmake.in)
|
||||||
|
set(PCRE2_CONFIG_OUT ${CMAKE_CURRENT_BINARY_DIR}/cmake/pcre2-config.cmake)
|
||||||
|
configure_file(${PCRE2_CONFIG_IN} ${PCRE2_CONFIG_OUT} @ONLY)
|
||||||
|
set(PCRE2_CONFIG_VERSION_IN ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config-version.cmake.in)
|
||||||
|
set(PCRE2_CONFIG_VERSION_OUT ${CMAKE_CURRENT_BINARY_DIR}/cmake/pcre2-config-version.cmake)
|
||||||
|
configure_file(${PCRE2_CONFIG_VERSION_IN} ${PCRE2_CONFIG_VERSION_OUT} @ONLY)
|
||||||
|
install(FILES ${PCRE2_CONFIG_OUT} ${PCRE2_CONFIG_VERSION_OUT} DESTINATION "${PCRE2_INSTALL_CMAKEDIR}")
|
||||||
|
|
||||||
|
file(GLOB html ${PROJECT_SOURCE_DIR}/doc/html/*.html ${PROJECT_SOURCE_DIR}/doc/html/*.txt)
|
||||||
|
file(
|
||||||
|
GLOB txts
|
||||||
|
${PROJECT_SOURCE_DIR}/doc/*.txt
|
||||||
|
AUTHORS.md
|
||||||
|
COPYING
|
||||||
|
ChangeLog
|
||||||
|
LICENCE.md
|
||||||
|
NEWS
|
||||||
|
README
|
||||||
|
SECURITY.md
|
||||||
|
)
|
||||||
|
file(GLOB man1 ${PROJECT_SOURCE_DIR}/doc/*.1)
|
||||||
|
file(GLOB man3 ${PROJECT_SOURCE_DIR}/doc/*.3)
|
||||||
|
|
||||||
|
install(FILES ${man1} DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
|
||||||
|
install(FILES ${man3} DESTINATION ${CMAKE_INSTALL_MANDIR}/man3)
|
||||||
|
install(FILES ${txts} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/doc/pcre2)
|
||||||
|
install(FILES ${html} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/doc/pcre2/html)
|
||||||
|
|
||||||
|
if(MSVC AND INSTALL_MSVC_PDB)
|
||||||
|
install(FILES ${DLL_PDB_FILES} DESTINATION bin CONFIGURATIONS RelWithDebInfo)
|
||||||
|
install(FILES ${DLL_PDB_DEBUG_FILES} DESTINATION bin CONFIGURATIONS Debug)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Help, only for nice output
|
||||||
|
if(BUILD_STATIC_LIBS)
|
||||||
|
set(BUILD_STATIC_LIBS ON)
|
||||||
|
else()
|
||||||
|
set(BUILD_STATIC_LIBS OFF)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_HEAP_MATCH_RECURSE)
|
||||||
|
message(WARNING "HEAP_MATCH_RECURSE is obsolete and does nothing.")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PCRE2_SHOW_REPORT)
|
||||||
|
message(STATUS "")
|
||||||
|
message(STATUS "")
|
||||||
|
message(STATUS "PCRE2-${PCRE2_MAJOR}.${PCRE2_MINOR} configuration summary:")
|
||||||
|
message(STATUS "")
|
||||||
|
message(STATUS " Install prefix .................... : ${CMAKE_INSTALL_PREFIX}")
|
||||||
|
message(STATUS " C compiler ........................ : ${CMAKE_C_COMPILER}")
|
||||||
|
|
||||||
|
if(CMAKE_C_FLAGS)
|
||||||
|
set(CFSP " ")
|
||||||
|
endif()
|
||||||
|
if(CMAKE_CONFIGURATION_TYPES)
|
||||||
|
foreach(config IN LISTS CMAKE_CONFIGURATION_TYPES)
|
||||||
|
string(TOUPPER "${config}" buildtype)
|
||||||
|
string(LENGTH " (${config})" buildtypelen)
|
||||||
|
math(EXPR dotslen "18 - ${buildtypelen}")
|
||||||
|
string(REPEAT "." ${dotslen} dots)
|
||||||
|
message(STATUS " C compiler flags (${config}) ${dots} : ${CMAKE_C_FLAGS}${CFSP}${CMAKE_C_FLAGS_${buildtype}}")
|
||||||
|
endforeach()
|
||||||
|
else()
|
||||||
|
string(TOUPPER "${CMAKE_BUILD_TYPE}" buildtype)
|
||||||
|
message(STATUS " C compiler flags .................. : ${CMAKE_C_FLAGS}${CFSP}${CMAKE_C_FLAGS_${buildtype}}")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
message(STATUS "")
|
||||||
|
if(CMAKE_CONFIGURATION_TYPES)
|
||||||
|
message(STATUS " Build configurations .............. : ${CMAKE_CONFIGURATION_TYPES}")
|
||||||
|
else()
|
||||||
|
message(STATUS " Build type ........................ : ${CMAKE_BUILD_TYPE}")
|
||||||
|
endif()
|
||||||
|
message(STATUS " Build 8 bit PCRE2 library ......... : ${PCRE2_BUILD_PCRE2_8}")
|
||||||
|
message(STATUS " Build 16 bit PCRE2 library ........ : ${PCRE2_BUILD_PCRE2_16}")
|
||||||
|
message(STATUS " Build 32 bit PCRE2 library ........ : ${PCRE2_BUILD_PCRE2_32}")
|
||||||
|
message(STATUS " Include debugging code ............ : ${PCRE2_DEBUG}")
|
||||||
|
message(STATUS " Enable JIT compiling support ...... : ${PCRE2_SUPPORT_JIT}")
|
||||||
|
message(STATUS " Use SELinux allocator in JIT ...... : ${PCRE2_SUPPORT_JIT_SEALLOC}")
|
||||||
|
message(STATUS " Enable Unicode support ............ : ${PCRE2_SUPPORT_UNICODE}")
|
||||||
|
message(STATUS " Newline char/sequence ............. : ${PCRE2_NEWLINE}")
|
||||||
|
message(STATUS " \\R matches only ANYCRLF ........... : ${PCRE2_SUPPORT_BSR_ANYCRLF}")
|
||||||
|
message(STATUS " \\C is disabled .................... : ${PCRE2_NEVER_BACKSLASH_C}")
|
||||||
|
message(STATUS " EBCDIC coding ..................... : ${PCRE2_EBCDIC}")
|
||||||
|
message(STATUS " EBCDIC coding with NL=0x25 ........ : ${PCRE2_EBCDIC_NL25}")
|
||||||
|
message(STATUS " Rebuild char tables ............... : ${PCRE2_REBUILD_CHARTABLES}")
|
||||||
|
message(STATUS " Internal link size ................ : ${PCRE2_LINK_SIZE}")
|
||||||
|
message(STATUS " Maximum variable lookbehind ....... : ${PCRE2_MAX_VARLOOKBEHIND}")
|
||||||
|
message(STATUS " Parentheses nest limit ............ : ${PCRE2_PARENS_NEST_LIMIT}")
|
||||||
|
message(STATUS " Heap limit ........................ : ${PCRE2_HEAP_LIMIT}")
|
||||||
|
message(STATUS " Match limit ....................... : ${PCRE2_MATCH_LIMIT}")
|
||||||
|
message(STATUS " Match depth limit ................. : ${PCRE2_MATCH_LIMIT_DEPTH}")
|
||||||
|
message(STATUS " Build shared libs ................. : ${BUILD_SHARED_LIBS}")
|
||||||
|
message(STATUS " Build static libs ................. : ${BUILD_STATIC_LIBS}")
|
||||||
|
message(STATUS " with PIC enabled ............... : ${PCRE2_STATIC_PIC}")
|
||||||
|
message(STATUS " Build pcre2grep ................... : ${PCRE2_BUILD_PCRE2GREP}")
|
||||||
|
message(STATUS " Enable JIT in pcre2grep ........... : ${PCRE2GREP_SUPPORT_JIT}")
|
||||||
|
message(STATUS " Enable callouts in pcre2grep ...... : ${PCRE2GREP_SUPPORT_CALLOUT}")
|
||||||
|
message(STATUS " Enable callout fork in pcre2grep .. : ${PCRE2GREP_SUPPORT_CALLOUT_FORK}")
|
||||||
|
message(STATUS " Buffer size for pcre2grep ......... : ${PCRE2GREP_BUFSIZE}")
|
||||||
|
message(STATUS " Build tests (implies pcre2test .... : ${PCRE2_BUILD_TESTS}")
|
||||||
|
message(STATUS " and pcre2grep)")
|
||||||
|
if(ZLIB_FOUND)
|
||||||
|
message(STATUS " Link pcre2grep with libz .......... : ${PCRE2_SUPPORT_LIBZ}")
|
||||||
|
else()
|
||||||
|
message(STATUS " Link pcre2grep with libz .......... : Library not found")
|
||||||
|
endif()
|
||||||
|
if(BZIP2_FOUND)
|
||||||
|
message(STATUS " Link pcre2grep with libbz2 ........ : ${PCRE2_SUPPORT_LIBBZ2}")
|
||||||
|
else()
|
||||||
|
message(STATUS " Link pcre2grep with libbz2 ........ : Library not found")
|
||||||
|
endif()
|
||||||
|
if(EDITLINE_FOUND)
|
||||||
|
message(STATUS " Link pcre2test with libeditline ... : ${PCRE2_SUPPORT_LIBEDIT}")
|
||||||
|
else()
|
||||||
|
message(STATUS " Link pcre2test with libeditline ... : Library not found")
|
||||||
|
endif()
|
||||||
|
if(READLINE_FOUND)
|
||||||
|
message(STATUS " Link pcre2test with libreadline ... : ${PCRE2_SUPPORT_LIBREADLINE}")
|
||||||
|
else()
|
||||||
|
message(STATUS " Link pcre2test with libreadline ... : Library not found")
|
||||||
|
endif()
|
||||||
|
message(STATUS " Support Valgrind .................. : ${PCRE2_SUPPORT_VALGRIND}")
|
||||||
|
if(PCRE2_DISABLE_PERCENT_ZT)
|
||||||
|
message(STATUS " Use %zu and %td ................... : OFF")
|
||||||
|
else()
|
||||||
|
message(STATUS " Use %zu and %td ................... : AUTO")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MINGW AND BUILD_SHARED_LIBS)
|
||||||
|
message(STATUS " Non-standard dll names (prefix) ... : ${NON_STANDARD_LIB_PREFIX}")
|
||||||
|
message(STATUS " Non-standard dll names (suffix) ... : ${NON_STANDARD_LIB_SUFFIX}")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MSVC)
|
||||||
|
message(STATUS " Install MSVC .pdb files ........... : ${INSTALL_MSVC_PDB}")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
message(STATUS "")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# end CMakeLists.txt
|
||||||
22
3rd/pcre2/cmake/COPYING-CMAKE-SCRIPTS
Normal file
22
3rd/pcre2/cmake/COPYING-CMAKE-SCRIPTS
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
3. The name of the author may not be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||||
|
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||||
|
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||||
|
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||||
|
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
13
3rd/pcre2/cmake/FindEditline.cmake
Normal file
13
3rd/pcre2/cmake/FindEditline.cmake
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
# Modified from FindReadline.cmake (PH Feb 2012)
|
||||||
|
|
||||||
|
if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)
|
||||||
|
set(EDITLINE_FOUND TRUE)
|
||||||
|
else()
|
||||||
|
find_path(EDITLINE_INCLUDE_DIR readline.h PATH_SUFFIXES editline edit/readline)
|
||||||
|
|
||||||
|
find_library(EDITLINE_LIBRARY NAMES edit)
|
||||||
|
include(FindPackageHandleStandardArgs)
|
||||||
|
find_package_handle_standard_args(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
|
||||||
|
|
||||||
|
mark_as_advanced(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
|
||||||
|
endif()
|
||||||
27
3rd/pcre2/cmake/FindReadline.cmake
Normal file
27
3rd/pcre2/cmake/FindReadline.cmake
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
# from http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/FindReadline.cmake
|
||||||
|
# http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/COPYING-CMAKE-SCRIPTS
|
||||||
|
# --> BSD licensed
|
||||||
|
#
|
||||||
|
# GNU Readline library finder
|
||||||
|
if(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
|
||||||
|
set(READLINE_FOUND TRUE)
|
||||||
|
else()
|
||||||
|
find_path(READLINE_INCLUDE_DIR readline/readline.h /usr/include/readline)
|
||||||
|
|
||||||
|
# 2008-04-22 The next clause used to read like this:
|
||||||
|
#
|
||||||
|
# FIND_LIBRARY(READLINE_LIBRARY NAMES readline)
|
||||||
|
# FIND_LIBRARY(NCURSES_LIBRARY NAMES ncurses )
|
||||||
|
# include(FindPackageHandleStandardArgs)
|
||||||
|
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG NCURSES_LIBRARY READLINE_INCLUDE_DIR READLINE_LIBRARY )
|
||||||
|
#
|
||||||
|
# I was advised to modify it such that it will find an ncurses library if
|
||||||
|
# required, but not if one was explicitly given, that is, it allows the
|
||||||
|
# default to be overridden. PH
|
||||||
|
|
||||||
|
find_library(READLINE_LIBRARY NAMES readline)
|
||||||
|
include(FindPackageHandleStandardArgs)
|
||||||
|
find_package_handle_standard_args(Readline DEFAULT_MSG READLINE_INCLUDE_DIR READLINE_LIBRARY)
|
||||||
|
|
||||||
|
mark_as_advanced(READLINE_INCLUDE_DIR READLINE_LIBRARY)
|
||||||
|
endif()
|
||||||
14
3rd/pcre2/cmake/pcre2-config-version.cmake.in
Normal file
14
3rd/pcre2/cmake/pcre2-config-version.cmake.in
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
set(PACKAGE_VERSION_MAJOR @PCRE2_MAJOR@)
|
||||||
|
set(PACKAGE_VERSION_MINOR @PCRE2_MINOR@)
|
||||||
|
set(PACKAGE_VERSION_PATCH 0)
|
||||||
|
set(PACKAGE_VERSION @PCRE2_MAJOR@.@PCRE2_MINOR@.0)
|
||||||
|
|
||||||
|
# Check whether the requested PACKAGE_FIND_VERSION is compatible
|
||||||
|
if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION OR PACKAGE_VERSION_MAJOR GREATER PACKAGE_FIND_VERSION_MAJOR)
|
||||||
|
set(PACKAGE_VERSION_COMPATIBLE FALSE)
|
||||||
|
else()
|
||||||
|
set(PACKAGE_VERSION_COMPATIBLE TRUE)
|
||||||
|
if(PACKAGE_VERSION VERSION_EQUAL PACKAGE_FIND_VERSION)
|
||||||
|
set(PACKAGE_VERSION_EXACT TRUE)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
168
3rd/pcre2/cmake/pcre2-config.cmake.in
Normal file
168
3rd/pcre2/cmake/pcre2-config.cmake.in
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
# pcre2-config.cmake
|
||||||
|
# ----------------
|
||||||
|
#
|
||||||
|
# Finds the PCRE2 library, specify the starting search path in PCRE2_ROOT.
|
||||||
|
#
|
||||||
|
# Static vs. shared
|
||||||
|
# -----------------
|
||||||
|
# To make use of the static library instead of the shared one, one needs
|
||||||
|
# to set the variable PCRE2_USE_STATIC_LIBS to ON before calling find_package.
|
||||||
|
# Example:
|
||||||
|
# set(PCRE2_USE_STATIC_LIBS ON)
|
||||||
|
# find_package(PCRE2 CONFIG COMPONENTS 8BIT)
|
||||||
|
#
|
||||||
|
# This will define the following variables:
|
||||||
|
#
|
||||||
|
# PCRE2_FOUND - True if the system has the PCRE2 library.
|
||||||
|
# PCRE2_VERSION - The version of the PCRE2 library which was found.
|
||||||
|
#
|
||||||
|
# and the following imported targets:
|
||||||
|
#
|
||||||
|
# PCRE2::8BIT - The 8 bit PCRE2 library.
|
||||||
|
# PCRE2::16BIT - The 16 bit PCRE2 library.
|
||||||
|
# PCRE2::32BIT - The 32 bit PCRE2 library.
|
||||||
|
# PCRE2::POSIX - The POSIX PCRE2 library.
|
||||||
|
|
||||||
|
set(PCRE2_NON_STANDARD_LIB_PREFIX @NON_STANDARD_LIB_PREFIX@)
|
||||||
|
set(PCRE2_NON_STANDARD_LIB_SUFFIX @NON_STANDARD_LIB_SUFFIX@)
|
||||||
|
set(PCRE2_8BIT_NAME pcre2-8)
|
||||||
|
set(PCRE2_16BIT_NAME pcre2-16)
|
||||||
|
set(PCRE2_32BIT_NAME pcre2-32)
|
||||||
|
set(PCRE2_POSIX_NAME pcre2-posix)
|
||||||
|
find_path(PCRE2_INCLUDE_DIR NAMES pcre2.h DOC "PCRE2 include directory")
|
||||||
|
if(PCRE2_USE_STATIC_LIBS)
|
||||||
|
if(MSVC)
|
||||||
|
set(PCRE2_8BIT_NAME pcre2-8-static)
|
||||||
|
set(PCRE2_16BIT_NAME pcre2-16-static)
|
||||||
|
set(PCRE2_32BIT_NAME pcre2-32-static)
|
||||||
|
set(PCRE2_POSIX_NAME pcre2-posix-static)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(PCRE2_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX})
|
||||||
|
set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
|
||||||
|
else()
|
||||||
|
set(PCRE2_PREFIX ${CMAKE_SHARED_LIBRARY_PREFIX})
|
||||||
|
if(MINGW AND PCRE2_NON_STANDARD_LIB_PREFIX)
|
||||||
|
set(PCRE2_PREFIX "")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(PCRE2_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
|
||||||
|
if(MINGW AND PCRE2_NON_STANDARD_LIB_SUFFIX)
|
||||||
|
set(PCRE2_SUFFIX "-0.dll")
|
||||||
|
elseif(MSVC)
|
||||||
|
set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
find_library(
|
||||||
|
PCRE2_8BIT_LIBRARY
|
||||||
|
NAMES ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX}
|
||||||
|
DOC "8 bit PCRE2 library"
|
||||||
|
)
|
||||||
|
find_library(
|
||||||
|
PCRE2_16BIT_LIBRARY
|
||||||
|
NAMES ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}d${PCRE2_SUFFIX}
|
||||||
|
DOC "16 bit PCRE2 library"
|
||||||
|
)
|
||||||
|
find_library(
|
||||||
|
PCRE2_32BIT_LIBRARY
|
||||||
|
NAMES ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}d${PCRE2_SUFFIX}
|
||||||
|
DOC "32 bit PCRE2 library"
|
||||||
|
)
|
||||||
|
find_library(
|
||||||
|
PCRE2_POSIX_LIBRARY
|
||||||
|
NAMES ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}d${PCRE2_SUFFIX}
|
||||||
|
DOC "8 bit POSIX PCRE2 library"
|
||||||
|
)
|
||||||
|
unset(PCRE2_NON_STANDARD_LIB_PREFIX)
|
||||||
|
unset(PCRE2_NON_STANDARD_LIB_SUFFIX)
|
||||||
|
unset(PCRE2_8BIT_NAME)
|
||||||
|
unset(PCRE2_16BIT_NAME)
|
||||||
|
unset(PCRE2_32BIT_NAME)
|
||||||
|
unset(PCRE2_POSIX_NAME)
|
||||||
|
|
||||||
|
# Set version
|
||||||
|
if(PCRE2_INCLUDE_DIR)
|
||||||
|
set(PCRE2_VERSION "@PCRE2_MAJOR@.@PCRE2_MINOR@.0")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Which components have been found.
|
||||||
|
if(PCRE2_8BIT_LIBRARY)
|
||||||
|
set(PCRE2_8BIT_FOUND TRUE)
|
||||||
|
endif()
|
||||||
|
if(PCRE2_16BIT_LIBRARY)
|
||||||
|
set(PCRE2_16BIT_FOUND TRUE)
|
||||||
|
endif()
|
||||||
|
if(PCRE2_32BIT_LIBRARY)
|
||||||
|
set(PCRE2_32BIT_FOUND TRUE)
|
||||||
|
endif()
|
||||||
|
if(PCRE2_POSIX_LIBRARY)
|
||||||
|
set(PCRE2_POSIX_FOUND TRUE)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Check if at least one component has been specified.
|
||||||
|
list(LENGTH PCRE2_FIND_COMPONENTS PCRE2_NCOMPONENTS)
|
||||||
|
if(PCRE2_NCOMPONENTS LESS 1)
|
||||||
|
message(FATAL_ERROR "No components have been specified. This is not allowed. Please, specify at least one component.")
|
||||||
|
endif()
|
||||||
|
unset(PCRE2_NCOMPONENTS)
|
||||||
|
|
||||||
|
# When POSIX component has been specified make sure that also 8BIT component is specified.
|
||||||
|
set(PCRE2_8BIT_COMPONENT FALSE)
|
||||||
|
set(PCRE2_POSIX_COMPONENT FALSE)
|
||||||
|
foreach(component ${PCRE2_FIND_COMPONENTS})
|
||||||
|
if(component STREQUAL "8BIT")
|
||||||
|
set(PCRE2_8BIT_COMPONENT TRUE)
|
||||||
|
elseif(component STREQUAL "POSIX")
|
||||||
|
set(PCRE2_POSIX_COMPONENT TRUE)
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
if(PCRE2_POSIX_COMPONENT AND NOT PCRE2_8BIT_COMPONENT)
|
||||||
|
message(
|
||||||
|
FATAL_ERROR
|
||||||
|
"The component POSIX is specified while the 8BIT one is not. This is not allowed. Please, also specify the 8BIT component."
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
unset(PCRE2_8BIT_COMPONENT)
|
||||||
|
unset(PCRE2_POSIX_COMPONENT)
|
||||||
|
|
||||||
|
include(FindPackageHandleStandardArgs)
|
||||||
|
set(${CMAKE_FIND_PACKAGE_NAME}_CONFIG "${CMAKE_CURRENT_LIST_FILE}")
|
||||||
|
find_package_handle_standard_args(
|
||||||
|
PCRE2
|
||||||
|
FOUND_VAR PCRE2_FOUND
|
||||||
|
REQUIRED_VARS PCRE2_INCLUDE_DIR
|
||||||
|
HANDLE_COMPONENTS
|
||||||
|
VERSION_VAR PCRE2_VERSION
|
||||||
|
CONFIG_MODE
|
||||||
|
)
|
||||||
|
|
||||||
|
set(PCRE2_LIBRARIES)
|
||||||
|
if(PCRE2_FOUND)
|
||||||
|
foreach(component ${PCRE2_FIND_COMPONENTS})
|
||||||
|
if(PCRE2_USE_STATIC_LIBS)
|
||||||
|
add_library(PCRE2::${component} STATIC IMPORTED)
|
||||||
|
target_compile_definitions(PCRE2::${component} INTERFACE PCRE2_STATIC)
|
||||||
|
else()
|
||||||
|
add_library(PCRE2::${component} SHARED IMPORTED)
|
||||||
|
endif()
|
||||||
|
set_target_properties(
|
||||||
|
PCRE2::${component}
|
||||||
|
PROPERTIES
|
||||||
|
IMPORTED_LOCATION "${PCRE2_${component}_LIBRARY}"
|
||||||
|
IMPORTED_IMPLIB "${PCRE2_${component}_LIBRARY}"
|
||||||
|
INTERFACE_INCLUDE_DIRECTORIES "${PCRE2_INCLUDE_DIR}"
|
||||||
|
)
|
||||||
|
if(component STREQUAL "POSIX")
|
||||||
|
set_target_properties(
|
||||||
|
PCRE2::${component}
|
||||||
|
PROPERTIES INTERFACE_LINK_LIBRARIES "PCRE2::8BIT" LINK_LIBRARIES "PCRE2::8BIT"
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(PCRE2_LIBRARIES ${PCRE2_LIBRARIES} ${PCRE2_${component}_LIBRARY})
|
||||||
|
mark_as_advanced(PCRE2_${component}_LIBRARY)
|
||||||
|
endforeach()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
mark_as_advanced(PCRE2_INCLUDE_DIR)
|
||||||
58
3rd/pcre2/config-cmake.h.in
Normal file
58
3rd/pcre2/config-cmake.h.in
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
/* config.h for CMake builds */
|
||||||
|
|
||||||
|
#cmakedefine HAVE_ASSERT_H 1
|
||||||
|
#cmakedefine HAVE_BUILTIN_ASSUME 1
|
||||||
|
#cmakedefine HAVE_BUILTIN_MUL_OVERFLOW 1
|
||||||
|
#cmakedefine HAVE_BUILTIN_UNREACHABLE 1
|
||||||
|
#cmakedefine HAVE_ATTRIBUTE_UNINITIALIZED 1
|
||||||
|
#cmakedefine HAVE_DIRENT_H 1
|
||||||
|
#cmakedefine HAVE_SYS_STAT_H 1
|
||||||
|
#cmakedefine HAVE_SYS_TYPES_H 1
|
||||||
|
#cmakedefine HAVE_UNISTD_H 1
|
||||||
|
#cmakedefine HAVE_WINDOWS_H 1
|
||||||
|
|
||||||
|
#cmakedefine HAVE_BCOPY 1
|
||||||
|
#cmakedefine HAVE_MEMFD_CREATE 1
|
||||||
|
#cmakedefine HAVE_MEMMOVE 1
|
||||||
|
#cmakedefine HAVE_SECURE_GETENV 1
|
||||||
|
#cmakedefine HAVE_STRERROR 1
|
||||||
|
|
||||||
|
#cmakedefine SUPPORT_PCRE2_8 1
|
||||||
|
#cmakedefine SUPPORT_PCRE2_16 1
|
||||||
|
#cmakedefine SUPPORT_PCRE2_32 1
|
||||||
|
#cmakedefine DISABLE_PERCENT_ZT 1
|
||||||
|
|
||||||
|
#cmakedefine SUPPORT_LIBBZ2 1
|
||||||
|
#cmakedefine SUPPORT_LIBEDIT 1
|
||||||
|
#cmakedefine SUPPORT_LIBREADLINE 1
|
||||||
|
#cmakedefine SUPPORT_LIBZ 1
|
||||||
|
|
||||||
|
#cmakedefine SUPPORT_JIT 1
|
||||||
|
#cmakedefine SLJIT_PROT_EXECUTABLE_ALLOCATOR 1
|
||||||
|
#cmakedefine SUPPORT_PCRE2GREP_JIT 1
|
||||||
|
#cmakedefine SUPPORT_PCRE2GREP_CALLOUT 1
|
||||||
|
#cmakedefine SUPPORT_PCRE2GREP_CALLOUT_FORK 1
|
||||||
|
#cmakedefine SUPPORT_UNICODE 1
|
||||||
|
#cmakedefine SUPPORT_VALGRIND 1
|
||||||
|
|
||||||
|
#cmakedefine BSR_ANYCRLF 1
|
||||||
|
#cmakedefine EBCDIC 1
|
||||||
|
#cmakedefine EBCDIC_NL25 1
|
||||||
|
#cmakedefine HEAP_MATCH_RECURSE 1
|
||||||
|
#cmakedefine NEVER_BACKSLASH_C 1
|
||||||
|
|
||||||
|
#define PCRE2_EXPORT @PCRE2_EXPORT@
|
||||||
|
#define LINK_SIZE @PCRE2_LINK_SIZE@
|
||||||
|
#define HEAP_LIMIT @PCRE2_HEAP_LIMIT@
|
||||||
|
#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@
|
||||||
|
#define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@
|
||||||
|
#define MAX_VARLOOKBEHIND @PCRE2_MAX_VARLOOKBEHIND@
|
||||||
|
#define NEWLINE_DEFAULT @NEWLINE_DEFAULT@
|
||||||
|
#define PARENS_NEST_LIMIT @PCRE2_PARENS_NEST_LIMIT@
|
||||||
|
#define PCRE2GREP_BUFSIZE @PCRE2GREP_BUFSIZE@
|
||||||
|
#define PCRE2GREP_MAX_BUFSIZE @PCRE2GREP_MAX_BUFSIZE@
|
||||||
|
|
||||||
|
#define MAX_NAME_SIZE 128
|
||||||
|
#define MAX_NAME_COUNT 10000
|
||||||
|
|
||||||
|
/* end config.h for CMake builds */
|
||||||
1228
3rd/pcre2/configure.ac
Normal file
1228
3rd/pcre2/configure.ac
Normal file
@@ -0,0 +1,1228 @@
|
|||||||
|
dnl Process this file with autoconf to produce a configure script.
|
||||||
|
|
||||||
|
dnl NOTE FOR MAINTAINERS: Do not use minor version numbers 08 or 09 because
|
||||||
|
dnl the leading zeros may cause them to be treated as invalid octal constants
|
||||||
|
dnl if a PCRE2 user writes code that uses PCRE2_MINOR as a number. There is now
|
||||||
|
dnl a check further down that throws an error if 08 or 09 are used.
|
||||||
|
|
||||||
|
dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
|
||||||
|
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||||
|
|
||||||
|
m4_define(pcre2_major, [10])
|
||||||
|
m4_define(pcre2_minor, [45])
|
||||||
|
m4_define(pcre2_prerelease, [])
|
||||||
|
m4_define(pcre2_date, [2025-02-05])
|
||||||
|
|
||||||
|
# Libtool shared library interface versions (current:revision:age)
|
||||||
|
m4_define(libpcre2_8_version, [14:0:14])
|
||||||
|
m4_define(libpcre2_16_version, [14:0:14])
|
||||||
|
m4_define(libpcre2_32_version, [14:0:14])
|
||||||
|
m4_define(libpcre2_posix_version, [3:6:0])
|
||||||
|
|
||||||
|
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||||
|
# 50 lines of this file. Please update that if the variables above are moved.
|
||||||
|
|
||||||
|
AC_PREREQ([2.60])
|
||||||
|
AC_INIT([PCRE2],pcre2_major.pcre2_minor[]pcre2_prerelease,[],[pcre2])
|
||||||
|
AC_CONFIG_SRCDIR([src/pcre2.h.in])
|
||||||
|
AM_INIT_AUTOMAKE([dist-bzip2 dist-zip foreign])
|
||||||
|
ifelse(pcre2_prerelease, [-DEV],
|
||||||
|
[dnl For development builds, ./configure is not checked in to Git, so we are
|
||||||
|
dnl happy to have it regenerated as needed.
|
||||||
|
AM_MAINTAINER_MODE([enable])],
|
||||||
|
[dnl For a release build (or RC), the ./configure script we ship in the
|
||||||
|
dnl tarball (and check in to the Git tag) should not be regenerated
|
||||||
|
dnl implicitly. This is important if users want to check out a release tag
|
||||||
|
dnl using Git.
|
||||||
|
AM_MAINTAINER_MODE])
|
||||||
|
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
||||||
|
AC_CONFIG_HEADERS(src/config.h)
|
||||||
|
|
||||||
|
# This was added at the suggestion of libtoolize (03-Jan-10)
|
||||||
|
AC_CONFIG_MACRO_DIR([m4])
|
||||||
|
|
||||||
|
# The default CFLAGS in Autoconf are "-g -O2" for gcc and just "-g" for any
|
||||||
|
# other compiler. There doesn't seem to be a standard way of getting rid of the
|
||||||
|
# -g (which I don't think is needed for a production library). This fudge seems
|
||||||
|
# to achieve the necessary. First, we remember the externally set values of
|
||||||
|
# CFLAGS. Then call the AC_PROG_CC macro to find the compiler - if CFLAGS is
|
||||||
|
# not set, it will be set to Autoconf's defaults. Afterwards, if the original
|
||||||
|
# values were not set, remove the -g from the Autoconf defaults.
|
||||||
|
|
||||||
|
remember_set_CFLAGS="$CFLAGS"
|
||||||
|
|
||||||
|
m4_version_prereq(2.70, [AC_PROG_CC], [AC_PROG_CC_C99])
|
||||||
|
AM_PROG_CC_C_O
|
||||||
|
AC_USE_SYSTEM_EXTENSIONS
|
||||||
|
|
||||||
|
if test "x$remember_set_CFLAGS" = "x"
|
||||||
|
then
|
||||||
|
if test "$CFLAGS" = "-g -O2"
|
||||||
|
then
|
||||||
|
CFLAGS="-O2"
|
||||||
|
elif test "$CFLAGS" = "-g"
|
||||||
|
then
|
||||||
|
CFLAGS=""
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# This is a new thing required to stop a warning from automake 1.12
|
||||||
|
m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
|
||||||
|
|
||||||
|
# Check for a 64-bit integer type
|
||||||
|
AC_TYPE_INT64_T
|
||||||
|
|
||||||
|
AC_PROG_INSTALL
|
||||||
|
LT_INIT([win32-dll])
|
||||||
|
AC_PROG_LN_S
|
||||||
|
|
||||||
|
AC_SYS_LARGEFILE
|
||||||
|
|
||||||
|
# Check for GCC visibility feature
|
||||||
|
|
||||||
|
PCRE2_VISIBILITY
|
||||||
|
|
||||||
|
# Check for Clang __attribute__((uninitialized)) feature
|
||||||
|
|
||||||
|
AC_MSG_CHECKING([for __attribute__((uninitialized))])
|
||||||
|
AC_LANG_PUSH([C])
|
||||||
|
tmp_CFLAGS=$CFLAGS
|
||||||
|
if test $WORKING_WERROR -eq 1; then
|
||||||
|
CFLAGS="$CFLAGS -Werror"
|
||||||
|
fi
|
||||||
|
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,
|
||||||
|
[[char buf[128] __attribute__((uninitialized));(void)buf]])],
|
||||||
|
[pcre2_cc_cv_attribute_uninitialized=yes],
|
||||||
|
[pcre2_cc_cv_attribute_uninitialized=no])
|
||||||
|
AC_MSG_RESULT([$pcre2_cc_cv_attribute_uninitialized])
|
||||||
|
if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then
|
||||||
|
AC_DEFINE([HAVE_ATTRIBUTE_UNINITIALIZED], 1, [Define this if your compiler
|
||||||
|
supports __attribute__((uninitialized))])
|
||||||
|
fi
|
||||||
|
CFLAGS=$tmp_CFLAGS
|
||||||
|
AC_LANG_POP([C])
|
||||||
|
|
||||||
|
# Check for the assume() builtin
|
||||||
|
|
||||||
|
AC_MSG_CHECKING([for __assume()])
|
||||||
|
AC_LANG_PUSH([C])
|
||||||
|
AC_LINK_IFELSE([AC_LANG_PROGRAM([[]], [[__assume(1)]])],
|
||||||
|
[pcre2_cc_cv_builtin_assume=yes],
|
||||||
|
[pcre2_cc_cv_builtin_assume=no])
|
||||||
|
AC_MSG_RESULT([$pcre2_cc_cv_builtin_assume])
|
||||||
|
if test "$pcre2_cc_cv_builtin_assume" = yes; then
|
||||||
|
AC_DEFINE([HAVE_BUILTIN_ASSUME], 1,
|
||||||
|
[Define this if your compiler provides __assume()])
|
||||||
|
fi
|
||||||
|
AC_LANG_POP([C])
|
||||||
|
|
||||||
|
# Check for the mul_overflow() builtin
|
||||||
|
|
||||||
|
AC_MSG_CHECKING([for __builtin_mul_overflow()])
|
||||||
|
AC_LANG_PUSH([C])
|
||||||
|
AC_LINK_IFELSE([AC_LANG_PROGRAM([[
|
||||||
|
#ifdef HAVE_SYS_TYPES_H
|
||||||
|
#include <sys/types.h>
|
||||||
|
#endif
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
int a, b;
|
||||||
|
size_t m;
|
||||||
|
]], [[__builtin_mul_overflow(a, b, &m)]])],
|
||||||
|
[pcre2_cc_cv_builtin_mul_overflow=yes],
|
||||||
|
[pcre2_cc_cv_builtin_mul_overflow=no])
|
||||||
|
AC_MSG_RESULT([$pcre2_cc_cv_builtin_mul_overflow])
|
||||||
|
if test "$pcre2_cc_cv_builtin_mul_overflow" = yes; then
|
||||||
|
AC_DEFINE([HAVE_BUILTIN_MUL_OVERFLOW], 1,
|
||||||
|
[Define this if your compiler provides __builtin_mul_overflow()])
|
||||||
|
fi
|
||||||
|
AC_LANG_POP([C])
|
||||||
|
|
||||||
|
# Check for the unreachable() builtin
|
||||||
|
|
||||||
|
AC_MSG_CHECKING([for __builtin_unreachable()])
|
||||||
|
AC_LANG_PUSH([C])
|
||||||
|
AC_LINK_IFELSE([AC_LANG_PROGRAM([[int r;]], [[if (r) __builtin_unreachable()]])],
|
||||||
|
[pcre2_cc_cv_builtin_unreachable=yes],
|
||||||
|
[pcre2_cc_cv_builtin_unreachable=no])
|
||||||
|
AC_MSG_RESULT([$pcre2_cc_cv_builtin_unreachable])
|
||||||
|
if test "$pcre2_cc_cv_builtin_unreachable" = yes; then
|
||||||
|
AC_DEFINE([HAVE_BUILTIN_UNREACHABLE], 1,
|
||||||
|
[Define this if your compiler provides __builtin_unreachable()])
|
||||||
|
fi
|
||||||
|
AC_LANG_POP([C])
|
||||||
|
|
||||||
|
# Versioning
|
||||||
|
|
||||||
|
PCRE2_MAJOR="pcre2_major"
|
||||||
|
PCRE2_MINOR="pcre2_minor"
|
||||||
|
PCRE2_PRERELEASE="pcre2_prerelease"
|
||||||
|
PCRE2_DATE="pcre2_date"
|
||||||
|
|
||||||
|
if test "$PCRE2_MINOR" = "08" -o "$PCRE2_MINOR" = "09"
|
||||||
|
then
|
||||||
|
echo "***"
|
||||||
|
echo "*** Minor version number $PCRE2_MINOR must not be used. ***"
|
||||||
|
echo "*** Use only 00 to 07 or 10 onwards, to avoid octal issues. ***"
|
||||||
|
echo "***"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
AC_SUBST(PCRE2_MAJOR)
|
||||||
|
AC_SUBST(PCRE2_MINOR)
|
||||||
|
AC_SUBST(PCRE2_PRERELEASE)
|
||||||
|
AC_SUBST(PCRE2_DATE)
|
||||||
|
|
||||||
|
# Set a more sensible default value for $(htmldir).
|
||||||
|
if test "x$htmldir" = 'x${docdir}'
|
||||||
|
then
|
||||||
|
htmldir='${docdir}/html'
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Force an error for PCRE1 size options
|
||||||
|
AC_ARG_ENABLE(pcre8,,,enable_pcre8=no)
|
||||||
|
AC_ARG_ENABLE(pcre16,,,enable_pcre16=no)
|
||||||
|
AC_ARG_ENABLE(pcre32,,,enable_pcre32=no)
|
||||||
|
|
||||||
|
if test "$enable_pcre8$enable_pcre16$enable_pcre32" != "nonono"
|
||||||
|
then
|
||||||
|
echo "** ERROR: Use --[[en|dis]]able-pcre2-[[8|16|32]], not --[[en|dis]]able-pcre[[8|16|32]]"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Handle --disable-pcre2-8 (enabled by default)
|
||||||
|
AC_ARG_ENABLE(pcre2-8,
|
||||||
|
AS_HELP_STRING([--disable-pcre2-8],
|
||||||
|
[disable 8 bit character support]),
|
||||||
|
, enable_pcre2_8=unset)
|
||||||
|
AC_SUBST(enable_pcre2_8)
|
||||||
|
|
||||||
|
# Handle --enable-pcre2-16 (disabled by default)
|
||||||
|
AC_ARG_ENABLE(pcre2-16,
|
||||||
|
AS_HELP_STRING([--enable-pcre2-16],
|
||||||
|
[enable 16 bit character support]),
|
||||||
|
, enable_pcre2_16=unset)
|
||||||
|
AC_SUBST(enable_pcre2_16)
|
||||||
|
|
||||||
|
# Handle --enable-pcre2-32 (disabled by default)
|
||||||
|
AC_ARG_ENABLE(pcre2-32,
|
||||||
|
AS_HELP_STRING([--enable-pcre2-32],
|
||||||
|
[enable 32 bit character support]),
|
||||||
|
, enable_pcre2_32=unset)
|
||||||
|
AC_SUBST(enable_pcre2_32)
|
||||||
|
|
||||||
|
# Handle --enable-debug (disabled by default)
|
||||||
|
AC_ARG_ENABLE(debug,
|
||||||
|
AS_HELP_STRING([--enable-debug],
|
||||||
|
[enable debugging code]),
|
||||||
|
, enable_debug=no)
|
||||||
|
|
||||||
|
# Handle --enable-jit (disabled by default)
|
||||||
|
AC_ARG_ENABLE(jit,
|
||||||
|
AS_HELP_STRING([--enable-jit],
|
||||||
|
[enable Just-In-Time compiling support]),
|
||||||
|
, enable_jit=no)
|
||||||
|
|
||||||
|
# This code enables JIT if the hardware supports it.
|
||||||
|
if test "$enable_jit" = "auto"; then
|
||||||
|
AC_LANG(C)
|
||||||
|
SAVE_CPPFLAGS=$CPPFLAGS
|
||||||
|
CPPFLAGS=-I$srcdir
|
||||||
|
AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
|
||||||
|
#define SLJIT_CONFIG_AUTO 1
|
||||||
|
#include "deps/sljit/sljit_src/sljitConfigCPU.h"
|
||||||
|
#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
|
||||||
|
#error unsupported
|
||||||
|
#endif]])], enable_jit=yes, enable_jit=no)
|
||||||
|
CPPFLAGS=$SAVE_CPPFLAGS
|
||||||
|
echo checking for JIT support on this hardware... $enable_jit
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Handle --enable-jit-sealloc (disabled by default and only experimental)
|
||||||
|
case $host_os in
|
||||||
|
linux* | netbsd*)
|
||||||
|
AC_ARG_ENABLE(jit-sealloc,
|
||||||
|
AS_HELP_STRING([--enable-jit-sealloc],
|
||||||
|
[enable SELinux compatible execmem allocator in JIT (experimental)]),
|
||||||
|
,enable_jit_sealloc=no)
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
enable_jit_sealloc=unsupported
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# Handle --disable-pcre2grep-jit (enabled by default)
|
||||||
|
AC_ARG_ENABLE(pcre2grep-jit,
|
||||||
|
AS_HELP_STRING([--disable-pcre2grep-jit],
|
||||||
|
[disable JIT support in pcre2grep]),
|
||||||
|
, enable_pcre2grep_jit=yes)
|
||||||
|
|
||||||
|
# Handle --disable-pcre2grep-callout (enabled by default)
|
||||||
|
AC_ARG_ENABLE(pcre2grep-callout,
|
||||||
|
AS_HELP_STRING([--disable-pcre2grep-callout],
|
||||||
|
[disable callout script support in pcre2grep]),
|
||||||
|
, enable_pcre2grep_callout=yes)
|
||||||
|
|
||||||
|
# Handle --disable-pcre2grep-callout-fork (enabled by default)
|
||||||
|
AC_ARG_ENABLE(pcre2grep-callout-fork,
|
||||||
|
AS_HELP_STRING([--disable-pcre2grep-callout-fork],
|
||||||
|
[disable callout script fork support in pcre2grep]),
|
||||||
|
, enable_pcre2grep_callout_fork=yes)
|
||||||
|
|
||||||
|
# Handle --enable-rebuild-chartables
|
||||||
|
AC_ARG_ENABLE(rebuild-chartables,
|
||||||
|
AS_HELP_STRING([--enable-rebuild-chartables],
|
||||||
|
[rebuild character tables in current locale]),
|
||||||
|
, enable_rebuild_chartables=no)
|
||||||
|
|
||||||
|
# Handle --disable-unicode (enabled by default)
|
||||||
|
AC_ARG_ENABLE(unicode,
|
||||||
|
AS_HELP_STRING([--disable-unicode],
|
||||||
|
[disable Unicode support]),
|
||||||
|
, enable_unicode=unset)
|
||||||
|
|
||||||
|
# Handle newline options
|
||||||
|
ac_pcre2_newline=lf
|
||||||
|
AC_ARG_ENABLE(newline-is-cr,
|
||||||
|
AS_HELP_STRING([--enable-newline-is-cr],
|
||||||
|
[use CR as newline character]),
|
||||||
|
ac_pcre2_newline=cr)
|
||||||
|
AC_ARG_ENABLE(newline-is-lf,
|
||||||
|
AS_HELP_STRING([--enable-newline-is-lf],
|
||||||
|
[use LF as newline character (default)]),
|
||||||
|
ac_pcre2_newline=lf)
|
||||||
|
AC_ARG_ENABLE(newline-is-crlf,
|
||||||
|
AS_HELP_STRING([--enable-newline-is-crlf],
|
||||||
|
[use CRLF as newline sequence]),
|
||||||
|
ac_pcre2_newline=crlf)
|
||||||
|
AC_ARG_ENABLE(newline-is-anycrlf,
|
||||||
|
AS_HELP_STRING([--enable-newline-is-anycrlf],
|
||||||
|
[use CR, LF, or CRLF as newline sequence]),
|
||||||
|
ac_pcre2_newline=anycrlf)
|
||||||
|
AC_ARG_ENABLE(newline-is-any,
|
||||||
|
AS_HELP_STRING([--enable-newline-is-any],
|
||||||
|
[use any valid Unicode newline sequence]),
|
||||||
|
ac_pcre2_newline=any)
|
||||||
|
AC_ARG_ENABLE(newline-is-nul,
|
||||||
|
AS_HELP_STRING([--enable-newline-is-nul],
|
||||||
|
[use NUL (binary zero) as newline character]),
|
||||||
|
ac_pcre2_newline=nul)
|
||||||
|
enable_newline="$ac_pcre2_newline"
|
||||||
|
|
||||||
|
# Handle --enable-bsr-anycrlf
|
||||||
|
AC_ARG_ENABLE(bsr-anycrlf,
|
||||||
|
AS_HELP_STRING([--enable-bsr-anycrlf],
|
||||||
|
[\R matches only CR, LF, CRLF by default]),
|
||||||
|
, enable_bsr_anycrlf=no)
|
||||||
|
|
||||||
|
# Handle --enable-never-backslash-C
|
||||||
|
AC_ARG_ENABLE(never-backslash-C,
|
||||||
|
AS_HELP_STRING([--enable-never-backslash-C],
|
||||||
|
[use of \C causes an error]),
|
||||||
|
, enable_never_backslash_C=no)
|
||||||
|
|
||||||
|
# Handle --enable-ebcdic
|
||||||
|
AC_ARG_ENABLE(ebcdic,
|
||||||
|
AS_HELP_STRING([--enable-ebcdic],
|
||||||
|
[assume EBCDIC coding rather than ASCII; incompatible with --enable-unicode; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]),
|
||||||
|
, enable_ebcdic=no)
|
||||||
|
|
||||||
|
# Handle --enable-ebcdic-nl25
|
||||||
|
AC_ARG_ENABLE(ebcdic-nl25,
|
||||||
|
AS_HELP_STRING([--enable-ebcdic-nl25],
|
||||||
|
[set EBCDIC code for NL to 0x25 instead of 0x15; it implies --enable-ebcdic]),
|
||||||
|
, enable_ebcdic_nl25=no)
|
||||||
|
|
||||||
|
# Handle --enable-pcre2grep-libz
|
||||||
|
AC_ARG_ENABLE(pcre2grep-libz,
|
||||||
|
AS_HELP_STRING([--enable-pcre2grep-libz],
|
||||||
|
[link pcre2grep with libz to handle .gz files]),
|
||||||
|
, enable_pcre2grep_libz=no)
|
||||||
|
|
||||||
|
# Handle --enable-pcre2grep-libbz2
|
||||||
|
AC_ARG_ENABLE(pcre2grep-libbz2,
|
||||||
|
AS_HELP_STRING([--enable-pcre2grep-libbz2],
|
||||||
|
[link pcre2grep with libbz2 to handle .bz2 files]),
|
||||||
|
, enable_pcre2grep_libbz2=no)
|
||||||
|
|
||||||
|
# Handle --with-pcre2grep-bufsize=N
|
||||||
|
AC_ARG_WITH(pcre2grep-bufsize,
|
||||||
|
AS_HELP_STRING([--with-pcre2grep-bufsize=N],
|
||||||
|
[pcre2grep initial buffer size (default=20480, minimum=8192)]),
|
||||||
|
, with_pcre2grep_bufsize=20480)
|
||||||
|
|
||||||
|
# Handle --with-pcre2grep-max-bufsize=N
|
||||||
|
AC_ARG_WITH(pcre2grep-max-bufsize,
|
||||||
|
AS_HELP_STRING([--with-pcre2grep-max-bufsize=N],
|
||||||
|
[pcre2grep maximum buffer size (default=1048576, minimum=8192)]),
|
||||||
|
, with_pcre2grep_max_bufsize=1048576)
|
||||||
|
|
||||||
|
# Handle --enable-pcre2test-libedit
|
||||||
|
AC_ARG_ENABLE(pcre2test-libedit,
|
||||||
|
AS_HELP_STRING([--enable-pcre2test-libedit],
|
||||||
|
[link pcre2test with libedit]),
|
||||||
|
, enable_pcre2test_libedit=no)
|
||||||
|
|
||||||
|
# Handle --enable-pcre2test-libreadline
|
||||||
|
AC_ARG_ENABLE(pcre2test-libreadline,
|
||||||
|
AS_HELP_STRING([--enable-pcre2test-libreadline],
|
||||||
|
[link pcre2test with libreadline]),
|
||||||
|
, enable_pcre2test_libreadline=no)
|
||||||
|
|
||||||
|
# Handle --with-link-size=N
|
||||||
|
AC_ARG_WITH(link-size,
|
||||||
|
AS_HELP_STRING([--with-link-size=N],
|
||||||
|
[internal link size (2, 3, or 4 allowed; default=2)]),
|
||||||
|
, with_link_size=2)
|
||||||
|
|
||||||
|
# Handle --with-max-varlookbehind=N
|
||||||
|
AC_ARG_WITH(max-varlookbehind,
|
||||||
|
AS_HELP_STRING([--with-max-varlookbehind=N],
|
||||||
|
[maximum length of variable lookbehind (default=255)]),
|
||||||
|
, with_max_varlookbehind=255)
|
||||||
|
|
||||||
|
# Handle --with-parens-nest-limit=N
|
||||||
|
AC_ARG_WITH(parens-nest-limit,
|
||||||
|
AS_HELP_STRING([--with-parens-nest-limit=N],
|
||||||
|
[nested parentheses limit (default=250)]),
|
||||||
|
, with_parens_nest_limit=250)
|
||||||
|
|
||||||
|
# Handle --with-heap-limit
|
||||||
|
AC_ARG_WITH(heap-limit,
|
||||||
|
AS_HELP_STRING([--with-heap-limit=N],
|
||||||
|
[default limit on heap memory (kibibytes, default=20000000)]),
|
||||||
|
, with_heap_limit=20000000)
|
||||||
|
|
||||||
|
# Handle --with-match-limit=N
|
||||||
|
AC_ARG_WITH(match-limit,
|
||||||
|
AS_HELP_STRING([--with-match-limit=N],
|
||||||
|
[default limit on internal looping (default=10000000)]),
|
||||||
|
, with_match_limit=10000000)
|
||||||
|
|
||||||
|
# Handle --with-match-limit-depth=N
|
||||||
|
# Recognize old synonym --with-match-limit-recursion
|
||||||
|
#
|
||||||
|
# Note: In config.h, the default is to define MATCH_LIMIT_DEPTH symbolically as
|
||||||
|
# MATCH_LIMIT, which in turn is defined to be some numeric value (e.g.
|
||||||
|
# 10000000). MATCH_LIMIT_DEPTH can otherwise be set to some different numeric
|
||||||
|
# value (or even the same numeric value as MATCH_LIMIT, though no longer
|
||||||
|
# defined in terms of the latter).
|
||||||
|
#
|
||||||
|
AC_ARG_WITH(match-limit-depth,
|
||||||
|
AS_HELP_STRING([--with-match-limit-depth=N],
|
||||||
|
[default limit on match tree depth (default=MATCH_LIMIT)]),
|
||||||
|
, with_match_limit_depth=MATCH_LIMIT)
|
||||||
|
|
||||||
|
AC_ARG_WITH(match-limit-recursion,,
|
||||||
|
, with_match_limit_recursion=UNSET)
|
||||||
|
|
||||||
|
# Handle --enable-valgrind
|
||||||
|
AC_ARG_ENABLE(valgrind,
|
||||||
|
AS_HELP_STRING([--enable-valgrind],
|
||||||
|
[enable valgrind support]),
|
||||||
|
, enable_valgrind=no)
|
||||||
|
|
||||||
|
# Enable code coverage reports using gcov
|
||||||
|
AC_ARG_ENABLE(coverage,
|
||||||
|
AS_HELP_STRING([--enable-coverage],
|
||||||
|
[enable code coverage reports using gcov]),
|
||||||
|
, enable_coverage=no)
|
||||||
|
|
||||||
|
# Handle --enable-fuzz-support
|
||||||
|
AC_ARG_ENABLE(fuzz_support,
|
||||||
|
AS_HELP_STRING([--enable-fuzz-support],
|
||||||
|
[enable fuzzer support]),
|
||||||
|
, enable_fuzz_support=no)
|
||||||
|
|
||||||
|
# Handle --enable-diff-fuzz-support
|
||||||
|
AC_ARG_ENABLE(diff_fuzz_support,
|
||||||
|
AS_HELP_STRING([--enable-diff-fuzz-support],
|
||||||
|
[enable differential fuzzer support]),
|
||||||
|
, enable_diff_fuzz_support=no)
|
||||||
|
|
||||||
|
# Handle --disable-stack-for-recursion
|
||||||
|
# This option became obsolete at release 10.30.
|
||||||
|
AC_ARG_ENABLE(stack-for-recursion,,
|
||||||
|
, enable_stack_for_recursion=yes)
|
||||||
|
|
||||||
|
# Original code
|
||||||
|
# AC_ARG_ENABLE(stack-for-recursion,
|
||||||
|
# AS_HELP_STRING([--disable-stack-for-recursion],
|
||||||
|
# [don't use stack recursion when matching]),
|
||||||
|
# , enable_stack_for_recursion=yes)
|
||||||
|
|
||||||
|
# Handle --disable-percent_zt (set as "auto" by default)
|
||||||
|
AC_ARG_ENABLE(percent-zt,
|
||||||
|
AS_HELP_STRING([--disable-percent-zt],
|
||||||
|
[disable the use of z and t formatting modifiers]),
|
||||||
|
, enable_percent_zt=auto)
|
||||||
|
|
||||||
|
# Set the default value for pcre2-8
|
||||||
|
if test "x$enable_pcre2_8" = "xunset"
|
||||||
|
then
|
||||||
|
enable_pcre2_8=yes
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Set the default value for pcre2-16
|
||||||
|
if test "x$enable_pcre2_16" = "xunset"
|
||||||
|
then
|
||||||
|
enable_pcre2_16=no
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Set the default value for pcre2-32
|
||||||
|
if test "x$enable_pcre2_32" = "xunset"
|
||||||
|
then
|
||||||
|
enable_pcre2_32=no
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Make sure at least one library is selected
|
||||||
|
if test "x$enable_pcre2_8$enable_pcre2_16$enable_pcre2_32" = "xnonono"
|
||||||
|
then
|
||||||
|
AC_MSG_ERROR([At least one of the 8, 16 or 32 bit libraries must be enabled])
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Unicode is enabled by default.
|
||||||
|
if test "x$enable_unicode" = "xunset"
|
||||||
|
then
|
||||||
|
enable_unicode=yes
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Convert the newline identifier into the appropriate integer value. These must
|
||||||
|
# agree with the PCRE2_NEWLINE_xxx values in pcre2.h.
|
||||||
|
|
||||||
|
case "$enable_newline" in
|
||||||
|
cr) ac_pcre2_newline_value=1 ;;
|
||||||
|
lf) ac_pcre2_newline_value=2 ;;
|
||||||
|
crlf) ac_pcre2_newline_value=3 ;;
|
||||||
|
any) ac_pcre2_newline_value=4 ;;
|
||||||
|
anycrlf) ac_pcre2_newline_value=5 ;;
|
||||||
|
nul) ac_pcre2_newline_value=6 ;;
|
||||||
|
*)
|
||||||
|
AC_MSG_ERROR([invalid argument "$enable_newline" to --enable-newline option])
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# --enable-ebcdic-nl25 implies --enable-ebcdic
|
||||||
|
if test "x$enable_ebcdic_nl25" = "xyes"; then
|
||||||
|
enable_ebcdic=yes
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled.
|
||||||
|
# Also check that UTF support is not requested, because PCRE2 cannot handle
|
||||||
|
# EBCDIC and UTF in the same build. To do so it would need to use different
|
||||||
|
# character constants depending on the mode. Also, EBCDIC cannot be used with
|
||||||
|
# 16-bit and 32-bit libraries.
|
||||||
|
#
|
||||||
|
if test "x$enable_ebcdic" = "xyes"; then
|
||||||
|
enable_rebuild_chartables=yes
|
||||||
|
if test "x$enable_unicode" = "xyes"; then
|
||||||
|
AC_MSG_ERROR([support for EBCDIC and Unicode cannot be enabled at the same time])
|
||||||
|
fi
|
||||||
|
if test "x$enable_pcre2_16" = "xyes" -o "x$enable_pcre2_32" = "xyes"; then
|
||||||
|
AC_MSG_ERROR([EBCDIC support is available only for the 8-bit library])
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check argument to --with-link-size
|
||||||
|
case "$with_link_size" in
|
||||||
|
2|3|4) ;;
|
||||||
|
*)
|
||||||
|
AC_MSG_ERROR([invalid argument "$with_link_size" to --with-link-size option])
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
AH_TOP([
|
||||||
|
/* PCRE2 is written in Standard C, but there are a few non-standard things it
|
||||||
|
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||||
|
systems.
|
||||||
|
|
||||||
|
In environments that support the GNU autotools, config.h.in is converted into
|
||||||
|
config.h by the "configure" script. In environments that use CMake,
|
||||||
|
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
|
||||||
|
hand" without using "configure" or CMake, you should copy the distributed
|
||||||
|
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||||
|
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||||
|
so that config.h is included at the start of every source.
|
||||||
|
|
||||||
|
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||||
|
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
||||||
|
but if you do, default values will be taken from config.h for non-boolean
|
||||||
|
macros that are not defined on the command line.
|
||||||
|
|
||||||
|
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be
|
||||||
|
defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All
|
||||||
|
such macros are listed as a commented #undef in config.h.generic. Macros such
|
||||||
|
as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||||
|
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||||
|
|
||||||
|
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||||
|
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||||
|
sure both macros are undefined; an emulation function will then be used. */])
|
||||||
|
|
||||||
|
# Checks for header files.
|
||||||
|
AC_CHECK_HEADERS(assert.h limits.h sys/types.h sys/stat.h dirent.h)
|
||||||
|
AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1])
|
||||||
|
AC_CHECK_HEADERS([sys/wait.h], [HAVE_SYS_WAIT_H=1])
|
||||||
|
|
||||||
|
# Conditional compilation
|
||||||
|
AM_CONDITIONAL(WITH_PCRE2_8, test "x$enable_pcre2_8" = "xyes")
|
||||||
|
AM_CONDITIONAL(WITH_PCRE2_16, test "x$enable_pcre2_16" = "xyes")
|
||||||
|
AM_CONDITIONAL(WITH_PCRE2_32, test "x$enable_pcre2_32" = "xyes")
|
||||||
|
AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
|
||||||
|
AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
|
||||||
|
AM_CONDITIONAL(WITH_UNICODE, test "x$enable_unicode" = "xyes")
|
||||||
|
AM_CONDITIONAL(WITH_VALGRIND, test "x$enable_valgrind" = "xyes")
|
||||||
|
AM_CONDITIONAL(WITH_FUZZ_SUPPORT, test "x$enable_fuzz_support" = "xyes")
|
||||||
|
AM_CONDITIONAL(WITH_DIFF_FUZZ_SUPPORT, test "x$enable_diff_fuzz_support" = "xyes")
|
||||||
|
|
||||||
|
if test "$enable_fuzz_support" = "yes" -a "$enable_pcre2_8" = "no"; then
|
||||||
|
echo "** ERROR: Fuzzer support requires the 8-bit library"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$enable_diff_fuzz_support" = "yes"; then
|
||||||
|
if test "$enable_fuzz_support" = "no"; then
|
||||||
|
echo "** ERROR: Differential fuzzing support requires fuzzing support"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if test "$enable_jit" = "no"; then
|
||||||
|
echo "** ERROR: Differential fuzzing support requires Just-in-Time compilation support"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
AC_DEFINE([SUPPORT_DIFF_FUZZ], [], [
|
||||||
|
Define to any value to enable differential fuzzing support.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Checks for typedefs, structures, and compiler characteristics.
|
||||||
|
|
||||||
|
AC_C_CONST
|
||||||
|
AC_TYPE_SIZE_T
|
||||||
|
|
||||||
|
# Checks for library functions.
|
||||||
|
|
||||||
|
AC_CHECK_FUNCS(bcopy memfd_create memmove mkostemp secure_getenv strerror)
|
||||||
|
AC_MSG_CHECKING([for realpath])
|
||||||
|
AC_LINK_IFELSE([AC_LANG_PROGRAM([[
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <limits.h>
|
||||||
|
]],[[
|
||||||
|
char buffer[PATH_MAX];
|
||||||
|
realpath(".", buffer);
|
||||||
|
]])],
|
||||||
|
[AC_MSG_RESULT([yes])
|
||||||
|
AC_DEFINE([HAVE_REALPATH], 1,
|
||||||
|
[Define to 1 if you have the `realpath' function.])
|
||||||
|
],
|
||||||
|
AC_MSG_RESULT([no]))
|
||||||
|
|
||||||
|
# Check for the availability of libz (aka zlib)
|
||||||
|
|
||||||
|
AC_CHECK_HEADERS([zlib.h], [HAVE_ZLIB_H=1])
|
||||||
|
AC_CHECK_LIB([z], [gzopen], [HAVE_LIBZ=1])
|
||||||
|
|
||||||
|
# Check for the availability of libbz2. Originally we just used AC_CHECK_LIB,
|
||||||
|
# as for libz. However, this had the following problem, diagnosed and fixed by
|
||||||
|
# a user:
|
||||||
|
#
|
||||||
|
# - libbz2 uses the Pascal calling convention (WINAPI) for the functions
|
||||||
|
# under Win32.
|
||||||
|
# - The standard autoconf AC_CHECK_LIB fails to include "bzlib.h",
|
||||||
|
# therefore missing the function definition.
|
||||||
|
# - The compiler thus generates a "C" signature for the test function.
|
||||||
|
# - The linker fails to find the "C" function.
|
||||||
|
# - PCRE2 fails to configure if asked to do so against libbz2.
|
||||||
|
#
|
||||||
|
# Solution:
|
||||||
|
#
|
||||||
|
# - Replace the AC_CHECK_LIB test with a custom test.
|
||||||
|
|
||||||
|
AC_CHECK_HEADERS([bzlib.h], [HAVE_BZLIB_H=1])
|
||||||
|
# Original test
|
||||||
|
# AC_CHECK_LIB([bz2], [BZ2_bzopen], [HAVE_LIBBZ2=1])
|
||||||
|
#
|
||||||
|
# Custom test follows
|
||||||
|
|
||||||
|
AC_MSG_CHECKING([for libbz2])
|
||||||
|
OLD_LIBS="$LIBS"
|
||||||
|
LIBS="$LIBS -lbz2"
|
||||||
|
AC_LINK_IFELSE([AC_LANG_PROGRAM([[
|
||||||
|
#ifdef HAVE_BZLIB_H
|
||||||
|
#include <bzlib.h>
|
||||||
|
#endif]],
|
||||||
|
[[return (int)BZ2_bzopen("conftest", "rb");]])],
|
||||||
|
[AC_MSG_RESULT([yes]);HAVE_LIBBZ2=1; break;],
|
||||||
|
AC_MSG_RESULT([no]))
|
||||||
|
LIBS="$OLD_LIBS"
|
||||||
|
|
||||||
|
# Check for the availabiity of libreadline
|
||||||
|
|
||||||
|
if test "$enable_pcre2test_libreadline" = "yes"; then
|
||||||
|
AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_H=1])
|
||||||
|
AC_CHECK_HEADERS([readline/history.h], [HAVE_HISTORY_H=1])
|
||||||
|
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lreadline"],
|
||||||
|
[unset ac_cv_lib_readline_readline;
|
||||||
|
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltinfo"],
|
||||||
|
[unset ac_cv_lib_readline_readline;
|
||||||
|
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lcurses"],
|
||||||
|
[unset ac_cv_lib_readline_readline;
|
||||||
|
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncurses"],
|
||||||
|
[unset ac_cv_lib_readline_readline;
|
||||||
|
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncursesw"],
|
||||||
|
[unset ac_cv_lib_readline_readline;
|
||||||
|
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltermcap"],
|
||||||
|
[LIBREADLINE=""],
|
||||||
|
[-ltermcap])],
|
||||||
|
[-lncursesw])],
|
||||||
|
[-lncurses])],
|
||||||
|
[-lcurses])],
|
||||||
|
[-ltinfo])])
|
||||||
|
AC_SUBST(LIBREADLINE)
|
||||||
|
if test -n "$LIBREADLINE"; then
|
||||||
|
if test "$LIBREADLINE" != "-lreadline"; then
|
||||||
|
echo "-lreadline needs $LIBREADLINE"
|
||||||
|
LIBREADLINE="-lreadline $LIBREADLINE"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check for the availability of libedit. Different distributions put its
|
||||||
|
# headers in different places. Try to cover the most common ones.
|
||||||
|
|
||||||
|
if test "$enable_pcre2test_libedit" = "yes"; then
|
||||||
|
AC_CHECK_HEADERS([editline/readline.h edit/readline/readline.h readline.h], [
|
||||||
|
HAVE_LIBEDIT_HEADER=1
|
||||||
|
break
|
||||||
|
])
|
||||||
|
AC_CHECK_LIB([edit], [readline], [LIBEDIT="-ledit"])
|
||||||
|
fi
|
||||||
|
|
||||||
|
PCRE2_STATIC_CFLAG=""
|
||||||
|
if test "x$enable_shared" = "xno" ; then
|
||||||
|
AC_DEFINE([PCRE2_STATIC], [1], [
|
||||||
|
Define to any value if linking statically (TODO: make nice with Libtool)])
|
||||||
|
PCRE2_STATIC_CFLAG="-DPCRE2_STATIC"
|
||||||
|
fi
|
||||||
|
AC_SUBST(PCRE2_STATIC_CFLAG)
|
||||||
|
|
||||||
|
PCRE2POSIX_CFLAG=""
|
||||||
|
if test "x$enable_shared" = "xyes" ; then
|
||||||
|
PCRE2POSIX_CFLAG="-DPCRE2POSIX_SHARED"
|
||||||
|
fi
|
||||||
|
AC_SUBST(PCRE2POSIX_CFLAG)
|
||||||
|
|
||||||
|
# Here is where PCRE2-specific defines are handled
|
||||||
|
|
||||||
|
if test "$enable_pcre2_8" = "yes"; then
|
||||||
|
AC_DEFINE([SUPPORT_PCRE2_8], [], [
|
||||||
|
Define to any value to enable the 8 bit PCRE2 library.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$enable_pcre2_16" = "yes"; then
|
||||||
|
AC_DEFINE([SUPPORT_PCRE2_16], [], [
|
||||||
|
Define to any value to enable the 16 bit PCRE2 library.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$enable_pcre2_32" = "yes"; then
|
||||||
|
AC_DEFINE([SUPPORT_PCRE2_32], [], [
|
||||||
|
Define to any value to enable the 32 bit PCRE2 library.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$enable_debug" = "yes"; then
|
||||||
|
AC_DEFINE([PCRE2_DEBUG], [], [
|
||||||
|
Define to any value to include debugging code.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$enable_percent_zt" = "no"; then
|
||||||
|
AC_DEFINE([DISABLE_PERCENT_ZT], [], [
|
||||||
|
Define to any value to disable the use of the z and t modifiers in
|
||||||
|
formatting settings such as %zu or %td (this is rarely needed).])
|
||||||
|
else
|
||||||
|
enable_percent_zt=auto
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Unless running under Windows, JIT support requires pthreads.
|
||||||
|
|
||||||
|
if test "$enable_jit" = "yes"; then
|
||||||
|
if test "$HAVE_WINDOWS_H" != "1"; then
|
||||||
|
AX_PTHREAD([], [AC_MSG_ERROR([JIT support requires pthreads])])
|
||||||
|
CC="$PTHREAD_CC"
|
||||||
|
CFLAGS="$PTHREAD_CFLAGS $CFLAGS"
|
||||||
|
LIBS="$PTHREAD_LIBS $LIBS"
|
||||||
|
fi
|
||||||
|
AC_DEFINE([SUPPORT_JIT], [], [
|
||||||
|
Define to any value to enable support for Just-In-Time compiling.])
|
||||||
|
else
|
||||||
|
enable_pcre2grep_jit="no"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$enable_jit_sealloc" = "yes"; then
|
||||||
|
AC_DEFINE([SLJIT_PROT_EXECUTABLE_ALLOCATOR], [1], [
|
||||||
|
Define to any non-zero number to enable support for SELinux
|
||||||
|
compatible executable memory allocator in JIT. Note that this
|
||||||
|
will have no effect unless SUPPORT_JIT is also defined.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$enable_pcre2grep_jit" = "yes"; then
|
||||||
|
AC_DEFINE([SUPPORT_PCRE2GREP_JIT], [], [
|
||||||
|
Define to any value to enable JIT support in pcre2grep. Note that this will
|
||||||
|
have no effect unless SUPPORT_JIT is also defined.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$enable_pcre2grep_callout" = "yes"; then
|
||||||
|
if test "$enable_pcre2grep_callout_fork" = "yes"; then
|
||||||
|
if test "$HAVE_WINDOWS_H" != "1"; then
|
||||||
|
if test "$HAVE_SYS_WAIT_H" != "1"; then
|
||||||
|
AC_MSG_ERROR([Callout script support needs sys/wait.h.])
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT_FORK], [], [
|
||||||
|
Define to any value to enable fork support in pcre2grep callout scripts.
|
||||||
|
This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also
|
||||||
|
defined.])
|
||||||
|
fi
|
||||||
|
AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT], [], [
|
||||||
|
Define to any value to enable callout script support in pcre2grep.])
|
||||||
|
else
|
||||||
|
enable_pcre2grep_callout_fork="no"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$enable_unicode" = "yes"; then
|
||||||
|
AC_DEFINE([SUPPORT_UNICODE], [], [
|
||||||
|
Define to any value to enable support for Unicode and UTF encoding.
|
||||||
|
This will work even in an EBCDIC environment, but it is incompatible
|
||||||
|
with the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC
|
||||||
|
code *or* ASCII/Unicode, but not both at once.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$enable_pcre2grep_libz" = "yes"; then
|
||||||
|
AC_DEFINE([SUPPORT_LIBZ], [], [
|
||||||
|
Define to any value to allow pcre2grep to be linked with libz, so that it is
|
||||||
|
able to handle .gz files.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$enable_pcre2grep_libbz2" = "yes"; then
|
||||||
|
AC_DEFINE([SUPPORT_LIBBZ2], [], [
|
||||||
|
Define to any value to allow pcre2grep to be linked with libbz2, so that it
|
||||||
|
is able to handle .bz2 files.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test $with_pcre2grep_bufsize -lt 8192 ; then
|
||||||
|
AC_MSG_WARN([$with_pcre2grep_bufsize is too small for --with-pcre2grep-bufsize; using 8192])
|
||||||
|
with_pcre2grep_bufsize="8192"
|
||||||
|
else
|
||||||
|
if test $? -gt 1 ; then
|
||||||
|
AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize])
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test $with_pcre2grep_max_bufsize -lt $with_pcre2grep_bufsize ; then
|
||||||
|
with_pcre2grep_max_bufsize="$with_pcre2grep_bufsize"
|
||||||
|
else
|
||||||
|
if test $? -gt 1 ; then
|
||||||
|
AC_MSG_ERROR([Bad value for --with-pcre2grep-max-bufsize])
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [
|
||||||
|
The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
|
||||||
|
pcre2grep to hold parts of the file it is searching. The buffer will be
|
||||||
|
expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing very
|
||||||
|
long lines. The actual amount of memory used by pcre2grep is three times this
|
||||||
|
number, because it allows for the buffering of "before" and "after" lines.])
|
||||||
|
|
||||||
|
AC_DEFINE_UNQUOTED([PCRE2GREP_MAX_BUFSIZE], [$with_pcre2grep_max_bufsize], [
|
||||||
|
The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer
|
||||||
|
used by pcre2grep to hold parts of the file it is searching. The actual
|
||||||
|
amount of memory used by pcre2grep is three times this number, because it
|
||||||
|
allows for the buffering of "before" and "after" lines.])
|
||||||
|
|
||||||
|
if test "$enable_pcre2test_libedit" = "yes"; then
|
||||||
|
AC_DEFINE([SUPPORT_LIBEDIT], [], [
|
||||||
|
Define to any value to allow pcre2test to be linked with libedit.])
|
||||||
|
LIBREADLINE="$LIBEDIT"
|
||||||
|
elif test "$enable_pcre2test_libreadline" = "yes"; then
|
||||||
|
AC_DEFINE([SUPPORT_LIBREADLINE], [], [
|
||||||
|
Define to any value to allow pcre2test to be linked with libreadline.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
AC_DEFINE_UNQUOTED([NEWLINE_DEFAULT], [$ac_pcre2_newline_value], [
|
||||||
|
The value of NEWLINE_DEFAULT determines the default newline character
|
||||||
|
sequence. PCRE2 client programs can override this by selecting other values
|
||||||
|
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY),
|
||||||
|
5 (ANYCRLF), and 6 (NUL).])
|
||||||
|
|
||||||
|
if test "$enable_bsr_anycrlf" = "yes"; then
|
||||||
|
AC_DEFINE([BSR_ANYCRLF], [], [
|
||||||
|
By default, the \R escape sequence matches any Unicode line ending
|
||||||
|
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||||
|
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||||
|
The build-time default can be overridden by the user of PCRE2 at runtime.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$enable_never_backslash_C" = "yes"; then
|
||||||
|
AC_DEFINE([NEVER_BACKSLASH_C], [], [
|
||||||
|
Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [
|
||||||
|
The value of LINK_SIZE determines the number of bytes used to store
|
||||||
|
links as offsets within the compiled regex. The default is 2, which
|
||||||
|
allows for compiled patterns up to 65535 code units long. This covers the
|
||||||
|
vast majority of cases. However, PCRE2 can also be compiled to use 3 or 4
|
||||||
|
bytes instead. This allows for longer patterns in extreme cases.])
|
||||||
|
|
||||||
|
AC_DEFINE_UNQUOTED([MAX_VARLOOKBEHIND], [$with_max_varlookbehind], [
|
||||||
|
The value of MAX_VARLOOKBEHIND specifies the default maximum length, in
|
||||||
|
characters, for a variable-length lookbehind assertion.])
|
||||||
|
|
||||||
|
AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
|
||||||
|
The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||||
|
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||||
|
stack that is used while compiling a pattern.])
|
||||||
|
|
||||||
|
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
|
||||||
|
The value of MATCH_LIMIT determines the default number of times the
|
||||||
|
pcre2_match() function can record a backtrack position during a single
|
||||||
|
matching attempt. The value is also used to limit a loop counter in
|
||||||
|
pcre2_dfa_match(). There is a runtime interface for setting a different
|
||||||
|
limit. The limit exists in order to catch runaway regular expressions that
|
||||||
|
take forever to determine that they do not match. The default is set very
|
||||||
|
large so that it does not accidentally catch legitimate cases.])
|
||||||
|
|
||||||
|
# --with-match-limit-recursion is an obsolete synonym for --with-match-limit-depth
|
||||||
|
|
||||||
|
if test "$with_match_limit_recursion" != "UNSET"; then
|
||||||
|
cat <<EOF
|
||||||
|
|
||||||
|
WARNING: --with-match-limit-recursion is an obsolete option. Please use
|
||||||
|
--with-match-limit-depth in future. If both are set, --with-match-limit-depth
|
||||||
|
will be used. See also --with-heap-limit.
|
||||||
|
|
||||||
|
EOF
|
||||||
|
if test "$with_match_limit_depth" = "MATCH_LIMIT"; then
|
||||||
|
with_match_limit_depth=$with_match_limit_recursion
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
AC_DEFINE_UNQUOTED([MATCH_LIMIT_DEPTH], [$with_match_limit_depth], [
|
||||||
|
The above limit applies to all backtracks, whether or not they are nested. In
|
||||||
|
some environments it is desirable to limit the nesting of backtracking (that
|
||||||
|
is, the depth of tree that is searched) more strictly, in order to restrict
|
||||||
|
the maximum amount of heap memory that is used. The value of
|
||||||
|
MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it must
|
||||||
|
be less than the value of MATCH_LIMIT. The default is to use the same value
|
||||||
|
as MATCH_LIMIT. There is a runtime method for setting a different limit. In
|
||||||
|
the case of pcre2_dfa_match(), this limit controls the depth of the internal
|
||||||
|
nested function calls that are used for pattern recursions, lookarounds, and
|
||||||
|
atomic groups.])
|
||||||
|
|
||||||
|
AC_DEFINE_UNQUOTED([HEAP_LIMIT], [$with_heap_limit], [
|
||||||
|
This limits the amount of memory that may be used while matching
|
||||||
|
a pattern. It applies to both pcre2_match() and pcre2_dfa_match(). It does
|
||||||
|
not apply to JIT matching. The value is in kibibytes (units of 1024 bytes).])
|
||||||
|
|
||||||
|
AC_DEFINE([MAX_NAME_SIZE], [128], [
|
||||||
|
This limit is parameterized just in case anybody ever wants to
|
||||||
|
change it. Care must be taken if it is increased, because it guards
|
||||||
|
against integer overflow caused by enormously large patterns.])
|
||||||
|
|
||||||
|
AC_DEFINE([MAX_NAME_COUNT], [10000], [
|
||||||
|
This limit is parameterized just in case anybody ever wants to
|
||||||
|
change it. Care must be taken if it is increased, because it guards
|
||||||
|
against integer overflow caused by enormously large patterns.])
|
||||||
|
|
||||||
|
AH_VERBATIM([PCRE2_EXP_DEFN], [
|
||||||
|
/* If you are compiling for a system other than a Unix-like system or
|
||||||
|
Win32, and it needs some magic to be inserted before the definition
|
||||||
|
of a function that is exported by the library, define this macro to
|
||||||
|
contain the relevant magic. If you do not define this macro, a suitable
|
||||||
|
__declspec value is used for Windows systems; in other environments
|
||||||
|
a compiler relevant "extern" is used with any "visibility" related
|
||||||
|
attributes from PCRE2_EXPORT included.
|
||||||
|
This macro apears at the start of every exported function that is part
|
||||||
|
of the external API. It does not appear on functions that are "external"
|
||||||
|
in the C sense, but which are internal to the library. */
|
||||||
|
#undef PCRE2_EXP_DEFN])
|
||||||
|
|
||||||
|
if test "$enable_ebcdic" = "yes"; then
|
||||||
|
AC_DEFINE_UNQUOTED([EBCDIC], [], [
|
||||||
|
If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||||
|
character codes, define this macro to any value. When EBCDIC is set, PCRE2
|
||||||
|
assumes that all input strings are in EBCDIC. If you do not define this
|
||||||
|
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
|
||||||
|
is not possible to build a version of PCRE2 that supports both EBCDIC and
|
||||||
|
UTF-8/16/32.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$enable_ebcdic_nl25" = "yes"; then
|
||||||
|
AC_DEFINE_UNQUOTED([EBCDIC_NL25], [], [
|
||||||
|
In an EBCDIC environment, define this macro to any value to arrange for
|
||||||
|
the NL character to be 0x25 instead of the default 0x15. NL plays the role
|
||||||
|
that LF does in an ASCII/Unicode environment.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$enable_valgrind" = "yes"; then
|
||||||
|
AC_DEFINE_UNQUOTED([SUPPORT_VALGRIND], [], [
|
||||||
|
Define to any value for valgrind support to find invalid memory reads.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Platform specific issues
|
||||||
|
NO_UNDEFINED=
|
||||||
|
EXPORT_ALL_SYMBOLS=
|
||||||
|
case $host_os in
|
||||||
|
cygwin* | mingw* )
|
||||||
|
if test X"$enable_shared" = Xyes; then
|
||||||
|
NO_UNDEFINED="-no-undefined"
|
||||||
|
EXPORT_ALL_SYMBOLS="-Wl,--export-all-symbols"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# The extra LDFLAGS for each particular library. The libpcre2*_version values
|
||||||
|
# are m4 variables, assigned above.
|
||||||
|
|
||||||
|
EXTRA_LIBPCRE2_8_LDFLAGS="$EXTRA_LIBPCRE2_8_LDFLAGS \
|
||||||
|
$NO_UNDEFINED -version-info libpcre2_8_version"
|
||||||
|
|
||||||
|
EXTRA_LIBPCRE2_16_LDFLAGS="$EXTRA_LIBPCRE2_16_LDFLAGS \
|
||||||
|
$NO_UNDEFINED -version-info libpcre2_16_version"
|
||||||
|
|
||||||
|
EXTRA_LIBPCRE2_32_LDFLAGS="$EXTRA_LIBPCRE2_32_LDFLAGS \
|
||||||
|
$NO_UNDEFINED -version-info libpcre2_32_version"
|
||||||
|
|
||||||
|
EXTRA_LIBPCRE2_POSIX_LDFLAGS="$EXTRA_LIBPCRE2_POSIX_LDFLAGS \
|
||||||
|
$NO_UNDEFINED -version-info libpcre2_posix_version"
|
||||||
|
|
||||||
|
AC_SUBST(EXTRA_LIBPCRE2_8_LDFLAGS)
|
||||||
|
AC_SUBST(EXTRA_LIBPCRE2_16_LDFLAGS)
|
||||||
|
AC_SUBST(EXTRA_LIBPCRE2_32_LDFLAGS)
|
||||||
|
AC_SUBST(EXTRA_LIBPCRE2_POSIX_LDFLAGS)
|
||||||
|
|
||||||
|
# When we run 'make distcheck', use these arguments. Turning off compiler
|
||||||
|
# optimization makes it run faster.
|
||||||
|
DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre2-16 --enable-pcre2-32 --enable-jit"
|
||||||
|
AC_SUBST(DISTCHECK_CONFIGURE_FLAGS)
|
||||||
|
|
||||||
|
# Check that, if --enable-pcre2grep-libz or --enable-pcre2grep-libbz2 is
|
||||||
|
# specified, the relevant library is available.
|
||||||
|
|
||||||
|
if test "$enable_pcre2grep_libz" = "yes"; then
|
||||||
|
if test "$HAVE_ZLIB_H" != "1"; then
|
||||||
|
echo "** Cannot --enable-pcre2grep-libz because zlib.h was not found"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if test "$HAVE_LIBZ" != "1"; then
|
||||||
|
echo "** Cannot --enable-pcre2grep-libz because libz was not found"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
LIBZ="-lz"
|
||||||
|
fi
|
||||||
|
AC_SUBST(LIBZ)
|
||||||
|
|
||||||
|
if test "$enable_pcre2grep_libbz2" = "yes"; then
|
||||||
|
if test "$HAVE_BZLIB_H" != "1"; then
|
||||||
|
echo "** Cannot --enable-pcre2grep-libbz2 because bzlib.h was not found"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if test "$HAVE_LIBBZ2" != "1"; then
|
||||||
|
echo "** Cannot --enable-pcre2grep-libbz2 because libbz2 was not found"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
LIBBZ2="-lbz2"
|
||||||
|
fi
|
||||||
|
AC_SUBST(LIBBZ2)
|
||||||
|
|
||||||
|
# Similarly for --enable-pcre2test-readline
|
||||||
|
|
||||||
|
if test "$enable_pcre2test_libedit" = "yes"; then
|
||||||
|
if test "$enable_pcre2test_libreadline" = "yes"; then
|
||||||
|
echo "** Cannot use both --enable-pcre2test-libedit and --enable-pcre2test-readline"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if test -z "$HAVE_LIBEDIT_HEADER"; then
|
||||||
|
echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h,"
|
||||||
|
echo "** edit/readline/readline.h nor a compatible header was found."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if test -z "$LIBEDIT"; then
|
||||||
|
echo "** Cannot --enable-pcre2test-libedit because libedit library was not found."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$enable_pcre2test_libreadline" = "yes"; then
|
||||||
|
if test "$HAVE_READLINE_H" != "1"; then
|
||||||
|
echo "** Cannot --enable-pcre2test-readline because readline/readline.h was not found."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if test "$HAVE_HISTORY_H" != "1"; then
|
||||||
|
echo "** Cannot --enable-pcre2test-readline because readline/history.h was not found."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if test -z "$LIBREADLINE"; then
|
||||||
|
echo "** Cannot --enable-pcre2test-readline because readline library was not found."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Handle valgrind support
|
||||||
|
|
||||||
|
if test "$enable_valgrind" = "yes"; then
|
||||||
|
m4_ifdef([PKG_CHECK_MODULES],
|
||||||
|
[PKG_CHECK_MODULES([VALGRIND],[valgrind])],
|
||||||
|
[AC_MSG_ERROR([pkg-config not supported])])
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Handle code coverage reporting support
|
||||||
|
if test "$enable_coverage" = "yes"; then
|
||||||
|
if test "x$GCC" != "xyes"; then
|
||||||
|
AC_MSG_ERROR([Code coverage reports can only be generated when using GCC])
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ccache is incompatible with gcov
|
||||||
|
AC_PATH_PROG([SHTOOL],[shtool],[false])
|
||||||
|
case `$SHTOOL path $CC` in
|
||||||
|
*ccache*) cc_ccache=yes;;
|
||||||
|
*) cc_ccache=no;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
if test "$cc_ccache" = "yes"; then
|
||||||
|
if test -z "$CCACHE_DISABLE" -o "$CCACHE_DISABLE" != "1"; then
|
||||||
|
AC_MSG_ERROR([must export CCACHE_DISABLE=1 to disable ccache for code coverage])
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
AC_ARG_VAR([LCOV],[the ltp lcov program])
|
||||||
|
AC_PATH_PROG([LCOV],[lcov],[false])
|
||||||
|
if test "x$LCOV" = "xfalse"; then
|
||||||
|
AC_MSG_ERROR([lcov not found])
|
||||||
|
fi
|
||||||
|
|
||||||
|
AC_ARG_VAR([GENHTML],[the ltp genhtml program])
|
||||||
|
AC_PATH_PROG([GENHTML],[genhtml],[false])
|
||||||
|
if test "x$GENHTML" = "xfalse"; then
|
||||||
|
AC_MSG_ERROR([genhtml not found])
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Set flags needed for gcov
|
||||||
|
GCOV_CFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
||||||
|
GCOV_CXXFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
||||||
|
GCOV_LIBS="-lgcov"
|
||||||
|
AC_SUBST([GCOV_CFLAGS])
|
||||||
|
AC_SUBST([GCOV_CXXFLAGS])
|
||||||
|
AC_SUBST([GCOV_LIBS])
|
||||||
|
fi # enable_coverage
|
||||||
|
|
||||||
|
AM_CONDITIONAL([WITH_GCOV],[test "x$enable_coverage" = "xyes"])
|
||||||
|
|
||||||
|
AC_MSG_CHECKING([whether Intel CET is enabled])
|
||||||
|
AC_LANG_PUSH([C])
|
||||||
|
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,
|
||||||
|
[[#ifndef __CET__
|
||||||
|
# error CET is not enabled
|
||||||
|
#endif]])],
|
||||||
|
[pcre2_cc_cv_intel_cet_enabled=yes],
|
||||||
|
[pcre2_cc_cv_intel_cet_enabled=no])
|
||||||
|
AC_MSG_RESULT([$pcre2_cc_cv_intel_cet_enabled])
|
||||||
|
if test "$pcre2_cc_cv_intel_cet_enabled" = yes; then
|
||||||
|
CET_CFLAGS="-mshstk"
|
||||||
|
AC_SUBST([CET_CFLAGS])
|
||||||
|
fi
|
||||||
|
AC_LANG_POP([C])
|
||||||
|
|
||||||
|
# LIB_POSTFIX is used by CMakeLists.txt for Windows debug builds.
|
||||||
|
# Pass empty LIB_POSTFIX to *.pc files and pcre2-config here.
|
||||||
|
AC_SUBST(LIB_POSTFIX)
|
||||||
|
|
||||||
|
# Produce these files, in addition to config.h.
|
||||||
|
|
||||||
|
AC_CONFIG_FILES(
|
||||||
|
Makefile
|
||||||
|
libpcre2-8.pc
|
||||||
|
libpcre2-16.pc
|
||||||
|
libpcre2-32.pc
|
||||||
|
libpcre2-posix.pc
|
||||||
|
pcre2-config
|
||||||
|
src/pcre2.h
|
||||||
|
)
|
||||||
|
|
||||||
|
# Make the generated script files executable.
|
||||||
|
AC_CONFIG_COMMANDS([script-chmod], [chmod a+x pcre2-config])
|
||||||
|
|
||||||
|
# Make sure that pcre2_chartables.c is removed in case the method for
|
||||||
|
# creating it was changed by reconfiguration.
|
||||||
|
AC_CONFIG_COMMANDS([delete-old-chartables], [rm -f pcre2_chartables.c])
|
||||||
|
|
||||||
|
AC_OUTPUT
|
||||||
|
|
||||||
|
# --disable-stack-for-recursion is obsolete and has no effect.
|
||||||
|
|
||||||
|
if test "$enable_stack_for_recursion" = "no"; then
|
||||||
|
cat <<EOF
|
||||||
|
|
||||||
|
WARNING: --disable-stack-for-recursion is obsolete and has no effect.
|
||||||
|
EOF
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Print out a nice little message after configure is run displaying the
|
||||||
|
# chosen options.
|
||||||
|
|
||||||
|
ebcdic_nl_code=n/a
|
||||||
|
if test "$enable_ebcdic_nl25" = "yes"; then
|
||||||
|
ebcdic_nl_code=0x25
|
||||||
|
elif test "$enable_ebcdic" = "yes"; then
|
||||||
|
ebcdic_nl_code=0x15
|
||||||
|
fi
|
||||||
|
|
||||||
|
cat <<EOF
|
||||||
|
|
||||||
|
$PACKAGE-$VERSION configuration summary:
|
||||||
|
|
||||||
|
Install prefix ..................... : ${prefix}
|
||||||
|
C preprocessor ..................... : ${CPP}
|
||||||
|
C compiler ......................... : ${CC}
|
||||||
|
Linker ............................. : ${LD}
|
||||||
|
C preprocessor flags ............... : ${CPPFLAGS}
|
||||||
|
C compiler flags ................... : ${CFLAGS} ${VISIBILITY_CFLAGS}
|
||||||
|
Linker flags ....................... : ${LDFLAGS}
|
||||||
|
Extra libraries .................... : ${LIBS}
|
||||||
|
|
||||||
|
Build 8-bit pcre2 library .......... : ${enable_pcre2_8}
|
||||||
|
Build 16-bit pcre2 library ......... : ${enable_pcre2_16}
|
||||||
|
Build 32-bit pcre2 library ......... : ${enable_pcre2_32}
|
||||||
|
Include debugging code ............. : ${enable_debug}
|
||||||
|
Enable JIT compiling support ....... : ${enable_jit}
|
||||||
|
Use SELinux allocator in JIT ....... : ${enable_jit_sealloc}
|
||||||
|
Enable Unicode support ............. : ${enable_unicode}
|
||||||
|
Newline char/sequence .............. : ${enable_newline}
|
||||||
|
\R matches only ANYCRLF ............ : ${enable_bsr_anycrlf}
|
||||||
|
\C is disabled ..................... : ${enable_never_backslash_C}
|
||||||
|
EBCDIC coding ...................... : ${enable_ebcdic}
|
||||||
|
EBCDIC code for NL ................. : ${ebcdic_nl_code}
|
||||||
|
Rebuild char tables ................ : ${enable_rebuild_chartables}
|
||||||
|
Internal link size ................. : ${with_link_size}
|
||||||
|
Maximum variable lookbehind ........ : ${with_max_varlookbehind}
|
||||||
|
Nested parentheses limit ........... : ${with_parens_nest_limit}
|
||||||
|
Heap limit ......................... : ${with_heap_limit} kibibytes
|
||||||
|
Match limit ........................ : ${with_match_limit}
|
||||||
|
Match depth limit .................. : ${with_match_limit_depth}
|
||||||
|
Build shared libs .................. : ${enable_shared}
|
||||||
|
Build static libs .................. : ${enable_static}
|
||||||
|
Use JIT in pcre2grep ............... : ${enable_pcre2grep_jit}
|
||||||
|
Enable callouts in pcre2grep ....... : ${enable_pcre2grep_callout}
|
||||||
|
Enable fork in pcre2grep callouts .. : ${enable_pcre2grep_callout_fork}
|
||||||
|
Initial buffer size for pcre2grep .. : ${with_pcre2grep_bufsize}
|
||||||
|
Maximum buffer size for pcre2grep .. : ${with_pcre2grep_max_bufsize}
|
||||||
|
Link pcre2grep with libz ........... : ${enable_pcre2grep_libz}
|
||||||
|
Link pcre2grep with libbz2 ......... : ${enable_pcre2grep_libbz2}
|
||||||
|
Link pcre2test with libedit ........ : ${enable_pcre2test_libedit}
|
||||||
|
Link pcre2test with libreadline .... : ${enable_pcre2test_libreadline}
|
||||||
|
Valgrind support ................... : ${enable_valgrind}
|
||||||
|
Code coverage ...................... : ${enable_coverage}
|
||||||
|
Fuzzer support ..................... : ${enable_fuzz_support}
|
||||||
|
Differential fuzzer support ........ : ${enable_diff_fuzz_support}
|
||||||
|
Use %zu and %td .................... : ${enable_percent_zt}
|
||||||
|
|
||||||
|
EOF
|
||||||
|
|
||||||
|
dnl end configure.ac
|
||||||
13
3rd/pcre2/libpcre2-16.pc.in
Normal file
13
3rd/pcre2/libpcre2-16.pc.in
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
# Package Information for pkg-config
|
||||||
|
|
||||||
|
prefix=@prefix@
|
||||||
|
exec_prefix=@exec_prefix@
|
||||||
|
libdir=@libdir@
|
||||||
|
includedir=@includedir@
|
||||||
|
|
||||||
|
Name: libpcre2-16
|
||||||
|
Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 16 bit character support
|
||||||
|
Version: @PACKAGE_VERSION@
|
||||||
|
Libs: -L${libdir} -lpcre2-16@LIB_POSTFIX@
|
||||||
|
Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
|
||||||
|
Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
|
||||||
13
3rd/pcre2/libpcre2-32.pc.in
Normal file
13
3rd/pcre2/libpcre2-32.pc.in
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
# Package Information for pkg-config
|
||||||
|
|
||||||
|
prefix=@prefix@
|
||||||
|
exec_prefix=@exec_prefix@
|
||||||
|
libdir=@libdir@
|
||||||
|
includedir=@includedir@
|
||||||
|
|
||||||
|
Name: libpcre2-32
|
||||||
|
Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 32 bit character support
|
||||||
|
Version: @PACKAGE_VERSION@
|
||||||
|
Libs: -L${libdir} -lpcre2-32@LIB_POSTFIX@
|
||||||
|
Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
|
||||||
|
Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
|
||||||
13
3rd/pcre2/libpcre2-8.pc.in
Normal file
13
3rd/pcre2/libpcre2-8.pc.in
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
# Package Information for pkg-config
|
||||||
|
|
||||||
|
prefix=@prefix@
|
||||||
|
exec_prefix=@exec_prefix@
|
||||||
|
libdir=@libdir@
|
||||||
|
includedir=@includedir@
|
||||||
|
|
||||||
|
Name: libpcre2-8
|
||||||
|
Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 8 bit character support
|
||||||
|
Version: @PACKAGE_VERSION@
|
||||||
|
Libs: -L${libdir} -lpcre2-8@LIB_POSTFIX@
|
||||||
|
Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
|
||||||
|
Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
|
||||||
13
3rd/pcre2/libpcre2-posix.pc.in
Normal file
13
3rd/pcre2/libpcre2-posix.pc.in
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
# Package Information for pkg-config
|
||||||
|
|
||||||
|
prefix=@prefix@
|
||||||
|
exec_prefix=@exec_prefix@
|
||||||
|
libdir=@libdir@
|
||||||
|
includedir=@includedir@
|
||||||
|
|
||||||
|
Name: libpcre2-posix
|
||||||
|
Description: Posix compatible interface to libpcre2-8
|
||||||
|
Version: @PACKAGE_VERSION@
|
||||||
|
Libs: -L${libdir} -lpcre2-posix@LIB_POSTFIX@
|
||||||
|
Cflags: -I${includedir} @PCRE2POSIX_CFLAG@
|
||||||
|
Requires.private: libpcre2-8
|
||||||
121
3rd/pcre2/pcre2-config.in
Normal file
121
3rd/pcre2/pcre2-config.in
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
prefix=@prefix@
|
||||||
|
exec_prefix=@exec_prefix@
|
||||||
|
exec_prefix_set=no
|
||||||
|
|
||||||
|
cflags="[--cflags]"
|
||||||
|
libs=
|
||||||
|
|
||||||
|
if test @enable_pcre2_16@ = yes ; then
|
||||||
|
libs="[--libs16] $libs"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test @enable_pcre2_32@ = yes ; then
|
||||||
|
libs="[--libs32] $libs"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test @enable_pcre2_8@ = yes ; then
|
||||||
|
libs="[--libs8] [--libs-posix] $libs"
|
||||||
|
cflags="$cflags [--cflags-posix]"
|
||||||
|
fi
|
||||||
|
|
||||||
|
usage="Usage: pcre2-config [--prefix] [--exec-prefix] [--version] $libs $cflags"
|
||||||
|
|
||||||
|
if test $# -eq 0; then
|
||||||
|
echo "${usage}" 1>&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
libR=
|
||||||
|
case `uname -s` in
|
||||||
|
*SunOS*)
|
||||||
|
libR=" -R@libdir@"
|
||||||
|
;;
|
||||||
|
*BSD*)
|
||||||
|
libR=" -Wl,-R@libdir@"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
libS=
|
||||||
|
if test @libdir@ != /usr/lib ; then
|
||||||
|
libS=-L@libdir@
|
||||||
|
fi
|
||||||
|
|
||||||
|
while test $# -gt 0; do
|
||||||
|
case "$1" in
|
||||||
|
-*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
|
||||||
|
*) optarg= ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
case $1 in
|
||||||
|
--prefix=*)
|
||||||
|
prefix=$optarg
|
||||||
|
if test $exec_prefix_set = no ; then
|
||||||
|
exec_prefix=$optarg
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
--prefix)
|
||||||
|
echo $prefix
|
||||||
|
;;
|
||||||
|
--exec-prefix=*)
|
||||||
|
exec_prefix=$optarg
|
||||||
|
exec_prefix_set=yes
|
||||||
|
;;
|
||||||
|
--exec-prefix)
|
||||||
|
echo $exec_prefix
|
||||||
|
;;
|
||||||
|
--version)
|
||||||
|
echo @PACKAGE_VERSION@
|
||||||
|
;;
|
||||||
|
--cflags)
|
||||||
|
if test @includedir@ != /usr/include ; then
|
||||||
|
includes=-I@includedir@
|
||||||
|
fi
|
||||||
|
echo $includes @PCRE2_STATIC_CFLAG@
|
||||||
|
;;
|
||||||
|
--cflags-posix)
|
||||||
|
if test @enable_pcre2_8@ = yes ; then
|
||||||
|
if test @includedir@ != /usr/include ; then
|
||||||
|
includes=-I@includedir@
|
||||||
|
fi
|
||||||
|
echo $includes @PCRE2POSIX_CFLAG@
|
||||||
|
else
|
||||||
|
echo "${usage}" 1>&2
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
--libs-posix)
|
||||||
|
if test @enable_pcre2_8@ = yes ; then
|
||||||
|
echo $libS$libR -lpcre2-posix@LIB_POSTFIX@ -lpcre2-8@LIB_POSTFIX@
|
||||||
|
else
|
||||||
|
echo "${usage}" 1>&2
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
--libs8)
|
||||||
|
if test @enable_pcre2_8@ = yes ; then
|
||||||
|
echo $libS$libR -lpcre2-8@LIB_POSTFIX@
|
||||||
|
else
|
||||||
|
echo "${usage}" 1>&2
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
--libs16)
|
||||||
|
if test @enable_pcre2_16@ = yes ; then
|
||||||
|
echo $libS$libR -lpcre2-16@LIB_POSTFIX@
|
||||||
|
else
|
||||||
|
echo "${usage}" 1>&2
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
--libs32)
|
||||||
|
if test @enable_pcre2_32@ = yes ; then
|
||||||
|
echo $libS$libR -lpcre2-32@LIB_POSTFIX@
|
||||||
|
else
|
||||||
|
echo "${usage}" 1>&2
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "${usage}" 1>&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done
|
||||||
483
3rd/pcre2/src/config.h.generic
Normal file
483
3rd/pcre2/src/config.h.generic
Normal file
@@ -0,0 +1,483 @@
|
|||||||
|
/* src/config.h. Generated from config.h.in by configure. */
|
||||||
|
/* src/config.h.in. Generated from configure.ac by autoheader. */
|
||||||
|
|
||||||
|
/* PCRE2 is written in Standard C, but there are a few non-standard things it
|
||||||
|
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||||
|
systems.
|
||||||
|
|
||||||
|
In environments that support the GNU autotools, config.h.in is converted into
|
||||||
|
config.h by the "configure" script. In environments that use CMake,
|
||||||
|
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
|
||||||
|
hand" without using "configure" or CMake, you should copy the distributed
|
||||||
|
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||||
|
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||||
|
so that config.h is included at the start of every source.
|
||||||
|
|
||||||
|
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||||
|
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
||||||
|
but if you do, default values will be taken from config.h for non-boolean
|
||||||
|
macros that are not defined on the command line.
|
||||||
|
|
||||||
|
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be
|
||||||
|
defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All
|
||||||
|
such macros are listed as a commented #undef in config.h.generic. Macros such
|
||||||
|
as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||||
|
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||||
|
|
||||||
|
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||||
|
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||||
|
sure both macros are undefined; an emulation function will then be used. */
|
||||||
|
|
||||||
|
/* By default, the \R escape sequence matches any Unicode line ending
|
||||||
|
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||||
|
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||||
|
The build-time default can be overridden by the user of PCRE2 at runtime.
|
||||||
|
*/
|
||||||
|
/* #undef BSR_ANYCRLF */
|
||||||
|
|
||||||
|
/* Define to any value to disable the use of the z and t modifiers in
|
||||||
|
formatting settings such as %zu or %td (this is rarely needed). */
|
||||||
|
/* #undef DISABLE_PERCENT_ZT */
|
||||||
|
|
||||||
|
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||||
|
character codes, define this macro to any value. When EBCDIC is set, PCRE2
|
||||||
|
assumes that all input strings are in EBCDIC. If you do not define this
|
||||||
|
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
|
||||||
|
is not possible to build a version of PCRE2 that supports both EBCDIC and
|
||||||
|
UTF-8/16/32. */
|
||||||
|
/* #undef EBCDIC */
|
||||||
|
|
||||||
|
/* In an EBCDIC environment, define this macro to any value to arrange for the
|
||||||
|
NL character to be 0x25 instead of the default 0x15. NL plays the role that
|
||||||
|
LF does in an ASCII/Unicode environment. */
|
||||||
|
/* #undef EBCDIC_NL25 */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <assert.h> header file. */
|
||||||
|
/* #undef HAVE_ASSERT_H */
|
||||||
|
|
||||||
|
/* Define this if your compiler supports __attribute__((uninitialized)) */
|
||||||
|
/* #undef HAVE_ATTRIBUTE_UNINITIALIZED */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `bcopy' function. */
|
||||||
|
/* #undef HAVE_BCOPY */
|
||||||
|
|
||||||
|
/* Define this if your compiler provides __assume() */
|
||||||
|
/* #undef HAVE_BUILTIN_ASSUME */
|
||||||
|
|
||||||
|
/* Define this if your compiler provides __builtin_mul_overflow() */
|
||||||
|
/* #undef HAVE_BUILTIN_MUL_OVERFLOW */
|
||||||
|
|
||||||
|
/* Define this if your compiler provides __builtin_unreachable() */
|
||||||
|
/* #undef HAVE_BUILTIN_UNREACHABLE */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <bzlib.h> header file. */
|
||||||
|
/* #undef HAVE_BZLIB_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <dirent.h> header file. */
|
||||||
|
/* #undef HAVE_DIRENT_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||||
|
/* #undef HAVE_DLFCN_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <editline/readline.h> header file. */
|
||||||
|
/* #undef HAVE_EDITLINE_READLINE_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <edit/readline/readline.h> header file. */
|
||||||
|
/* #undef HAVE_EDIT_READLINE_READLINE_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||||
|
/* #undef HAVE_INTTYPES_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <limits.h> header file. */
|
||||||
|
/* #undef HAVE_LIMITS_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `memfd_create' function. */
|
||||||
|
/* #undef HAVE_MEMFD_CREATE */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `memmove' function. */
|
||||||
|
/* #undef HAVE_MEMMOVE */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <minix/config.h> header file. */
|
||||||
|
/* #undef HAVE_MINIX_CONFIG_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `mkostemp' function. */
|
||||||
|
/* #undef HAVE_MKOSTEMP */
|
||||||
|
|
||||||
|
/* Define if you have POSIX threads libraries and header files. */
|
||||||
|
/* #undef HAVE_PTHREAD */
|
||||||
|
|
||||||
|
/* Have PTHREAD_PRIO_INHERIT. */
|
||||||
|
/* #undef HAVE_PTHREAD_PRIO_INHERIT */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <readline.h> header file. */
|
||||||
|
/* #undef HAVE_READLINE_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <readline/history.h> header file. */
|
||||||
|
/* #undef HAVE_READLINE_HISTORY_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <readline/readline.h> header file. */
|
||||||
|
/* #undef HAVE_READLINE_READLINE_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `realpath' function. */
|
||||||
|
/* #undef HAVE_REALPATH */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `secure_getenv' function. */
|
||||||
|
/* #undef HAVE_SECURE_GETENV */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <stdint.h> header file. */
|
||||||
|
/* #undef HAVE_STDINT_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <stdio.h> header file. */
|
||||||
|
/* #undef HAVE_STDIO_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||||
|
/* #undef HAVE_STDLIB_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `strerror' function. */
|
||||||
|
/* #undef HAVE_STRERROR */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <strings.h> header file. */
|
||||||
|
/* #undef HAVE_STRINGS_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <string.h> header file. */
|
||||||
|
/* #undef HAVE_STRING_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||||
|
/* #undef HAVE_SYS_STAT_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||||
|
/* #undef HAVE_SYS_TYPES_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <sys/wait.h> header file. */
|
||||||
|
/* #undef HAVE_SYS_WAIT_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <unistd.h> header file. */
|
||||||
|
/* #undef HAVE_UNISTD_H */
|
||||||
|
|
||||||
|
/* Define to 1 if the compiler supports GCC compatible visibility
|
||||||
|
declarations. */
|
||||||
|
/* #undef HAVE_VISIBILITY */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <wchar.h> header file. */
|
||||||
|
/* #undef HAVE_WCHAR_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <windows.h> header file. */
|
||||||
|
/* #undef HAVE_WINDOWS_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <zlib.h> header file. */
|
||||||
|
/* #undef HAVE_ZLIB_H */
|
||||||
|
|
||||||
|
/* This limits the amount of memory that may be used while matching a pattern.
|
||||||
|
It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply
|
||||||
|
to JIT matching. The value is in kibibytes (units of 1024 bytes). */
|
||||||
|
#ifndef HEAP_LIMIT
|
||||||
|
#define HEAP_LIMIT 20000000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||||
|
as offsets within the compiled regex. The default is 2, which allows for
|
||||||
|
compiled patterns up to 65535 code units long. This covers the vast
|
||||||
|
majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes
|
||||||
|
instead. This allows for longer patterns in extreme cases. */
|
||||||
|
#ifndef LINK_SIZE
|
||||||
|
#define LINK_SIZE 2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Define to the sub-directory where libtool stores uninstalled libraries. */
|
||||||
|
/* This is ignored unless you are using libtool. */
|
||||||
|
#ifndef LT_OBJDIR
|
||||||
|
#define LT_OBJDIR ".libs/"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* The value of MATCH_LIMIT determines the default number of times the
|
||||||
|
pcre2_match() function can record a backtrack position during a single
|
||||||
|
matching attempt. The value is also used to limit a loop counter in
|
||||||
|
pcre2_dfa_match(). There is a runtime interface for setting a different
|
||||||
|
limit. The limit exists in order to catch runaway regular expressions that
|
||||||
|
take forever to determine that they do not match. The default is set very
|
||||||
|
large so that it does not accidentally catch legitimate cases. */
|
||||||
|
#ifndef MATCH_LIMIT
|
||||||
|
#define MATCH_LIMIT 10000000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* The above limit applies to all backtracks, whether or not they are nested.
|
||||||
|
In some environments it is desirable to limit the nesting of backtracking
|
||||||
|
(that is, the depth of tree that is searched) more strictly, in order to
|
||||||
|
restrict the maximum amount of heap memory that is used. The value of
|
||||||
|
MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it
|
||||||
|
must be less than the value of MATCH_LIMIT. The default is to use the same
|
||||||
|
value as MATCH_LIMIT. There is a runtime method for setting a different
|
||||||
|
limit. In the case of pcre2_dfa_match(), this limit controls the depth of
|
||||||
|
the internal nested function calls that are used for pattern recursions,
|
||||||
|
lookarounds, and atomic groups. */
|
||||||
|
#ifndef MATCH_LIMIT_DEPTH
|
||||||
|
#define MATCH_LIMIT_DEPTH MATCH_LIMIT
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||||
|
Care must be taken if it is increased, because it guards against integer
|
||||||
|
overflow caused by enormously large patterns. */
|
||||||
|
#ifndef MAX_NAME_COUNT
|
||||||
|
#define MAX_NAME_COUNT 10000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||||
|
Care must be taken if it is increased, because it guards against integer
|
||||||
|
overflow caused by enormously large patterns. */
|
||||||
|
#ifndef MAX_NAME_SIZE
|
||||||
|
#define MAX_NAME_SIZE 128
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* The value of MAX_VARLOOKBEHIND specifies the default maximum length, in
|
||||||
|
characters, for a variable-length lookbehind assertion. */
|
||||||
|
#ifndef MAX_VARLOOKBEHIND
|
||||||
|
#define MAX_VARLOOKBEHIND 255
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
|
||||||
|
/* #undef NEVER_BACKSLASH_C */
|
||||||
|
|
||||||
|
/* The value of NEWLINE_DEFAULT determines the default newline character
|
||||||
|
sequence. PCRE2 client programs can override this by selecting other values
|
||||||
|
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), 5
|
||||||
|
(ANYCRLF), and 6 (NUL). */
|
||||||
|
#ifndef NEWLINE_DEFAULT
|
||||||
|
#define NEWLINE_DEFAULT 2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Name of package */
|
||||||
|
#define PACKAGE "pcre2"
|
||||||
|
|
||||||
|
/* Define to the address where bug reports for this package should be sent. */
|
||||||
|
#define PACKAGE_BUGREPORT ""
|
||||||
|
|
||||||
|
/* Define to the full name of this package. */
|
||||||
|
#define PACKAGE_NAME "PCRE2"
|
||||||
|
|
||||||
|
/* Define to the full name and version of this package. */
|
||||||
|
#define PACKAGE_STRING "PCRE2 10.45"
|
||||||
|
|
||||||
|
/* Define to the one symbol short name of this package. */
|
||||||
|
#define PACKAGE_TARNAME "pcre2"
|
||||||
|
|
||||||
|
/* Define to the home page for this package. */
|
||||||
|
#define PACKAGE_URL ""
|
||||||
|
|
||||||
|
/* Define to the version of this package. */
|
||||||
|
#define PACKAGE_VERSION "10.45"
|
||||||
|
|
||||||
|
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||||
|
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||||
|
stack that is used while compiling a pattern. */
|
||||||
|
#ifndef PARENS_NEST_LIMIT
|
||||||
|
#define PARENS_NEST_LIMIT 250
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
|
||||||
|
pcre2grep to hold parts of the file it is searching. The buffer will be
|
||||||
|
expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing
|
||||||
|
very long lines. The actual amount of memory used by pcre2grep is three
|
||||||
|
times this number, because it allows for the buffering of "before" and
|
||||||
|
"after" lines. */
|
||||||
|
#ifndef PCRE2GREP_BUFSIZE
|
||||||
|
#define PCRE2GREP_BUFSIZE 20480
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer
|
||||||
|
used by pcre2grep to hold parts of the file it is searching. The actual
|
||||||
|
amount of memory used by pcre2grep is three times this number, because it
|
||||||
|
allows for the buffering of "before" and "after" lines. */
|
||||||
|
#ifndef PCRE2GREP_MAX_BUFSIZE
|
||||||
|
#define PCRE2GREP_MAX_BUFSIZE 1048576
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Define to any value to include debugging code. */
|
||||||
|
/* #undef PCRE2_DEBUG */
|
||||||
|
|
||||||
|
/* to make a symbol visible */
|
||||||
|
#define PCRE2_EXPORT
|
||||||
|
|
||||||
|
/* If you are compiling for a system other than a Unix-like system or
|
||||||
|
Win32, and it needs some magic to be inserted before the definition
|
||||||
|
of a function that is exported by the library, define this macro to
|
||||||
|
contain the relevant magic. If you do not define this macro, a suitable
|
||||||
|
__declspec value is used for Windows systems; in other environments
|
||||||
|
a compiler relevant "extern" is used with any "visibility" related
|
||||||
|
attributes from PCRE2_EXPORT included.
|
||||||
|
This macro apears at the start of every exported function that is part
|
||||||
|
of the external API. It does not appear on functions that are "external"
|
||||||
|
in the C sense, but which are internal to the library. */
|
||||||
|
/* #undef PCRE2_EXP_DEFN */
|
||||||
|
|
||||||
|
/* Define to any value if linking statically (TODO: make nice with Libtool) */
|
||||||
|
/* #undef PCRE2_STATIC */
|
||||||
|
|
||||||
|
/* Define to necessary symbol if this constant uses a non-standard name on
|
||||||
|
your system. */
|
||||||
|
/* #undef PTHREAD_CREATE_JOINABLE */
|
||||||
|
|
||||||
|
/* Define to any non-zero number to enable support for SELinux compatible
|
||||||
|
executable memory allocator in JIT. Note that this will have no effect
|
||||||
|
unless SUPPORT_JIT is also defined. */
|
||||||
|
/* #undef SLJIT_PROT_EXECUTABLE_ALLOCATOR */
|
||||||
|
|
||||||
|
/* Define to 1 if all of the C90 standard headers exist (not just the ones
|
||||||
|
required in a freestanding environment). This macro is provided for
|
||||||
|
backward compatibility; new code need not use it. */
|
||||||
|
/* #undef STDC_HEADERS */
|
||||||
|
|
||||||
|
/* Define to any value to enable differential fuzzing support. */
|
||||||
|
/* #undef SUPPORT_DIFF_FUZZ */
|
||||||
|
|
||||||
|
/* Define to any value to enable support for Just-In-Time compiling. */
|
||||||
|
/* #undef SUPPORT_JIT */
|
||||||
|
|
||||||
|
/* Define to any value to allow pcre2grep to be linked with libbz2, so that it
|
||||||
|
is able to handle .bz2 files. */
|
||||||
|
/* #undef SUPPORT_LIBBZ2 */
|
||||||
|
|
||||||
|
/* Define to any value to allow pcre2test to be linked with libedit. */
|
||||||
|
/* #undef SUPPORT_LIBEDIT */
|
||||||
|
|
||||||
|
/* Define to any value to allow pcre2test to be linked with libreadline. */
|
||||||
|
/* #undef SUPPORT_LIBREADLINE */
|
||||||
|
|
||||||
|
/* Define to any value to allow pcre2grep to be linked with libz, so that it
|
||||||
|
is able to handle .gz files. */
|
||||||
|
/* #undef SUPPORT_LIBZ */
|
||||||
|
|
||||||
|
/* Define to any value to enable callout script support in pcre2grep. */
|
||||||
|
/* #undef SUPPORT_PCRE2GREP_CALLOUT */
|
||||||
|
|
||||||
|
/* Define to any value to enable fork support in pcre2grep callout scripts.
|
||||||
|
This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined.
|
||||||
|
*/
|
||||||
|
/* #undef SUPPORT_PCRE2GREP_CALLOUT_FORK */
|
||||||
|
|
||||||
|
/* Define to any value to enable JIT support in pcre2grep. Note that this will
|
||||||
|
have no effect unless SUPPORT_JIT is also defined. */
|
||||||
|
/* #undef SUPPORT_PCRE2GREP_JIT */
|
||||||
|
|
||||||
|
/* Define to any value to enable the 16 bit PCRE2 library. */
|
||||||
|
/* #undef SUPPORT_PCRE2_16 */
|
||||||
|
|
||||||
|
/* Define to any value to enable the 32 bit PCRE2 library. */
|
||||||
|
/* #undef SUPPORT_PCRE2_32 */
|
||||||
|
|
||||||
|
/* Define to any value to enable the 8 bit PCRE2 library. */
|
||||||
|
/* #undef SUPPORT_PCRE2_8 */
|
||||||
|
|
||||||
|
/* Define to any value to enable support for Unicode and UTF encoding. This
|
||||||
|
will work even in an EBCDIC environment, but it is incompatible with the
|
||||||
|
EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
|
||||||
|
ASCII/Unicode, but not both at once. */
|
||||||
|
/* #undef SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
/* Define to any value for valgrind support to find invalid memory reads. */
|
||||||
|
/* #undef SUPPORT_VALGRIND */
|
||||||
|
|
||||||
|
/* Enable extensions on AIX 3, Interix. */
|
||||||
|
#ifndef _ALL_SOURCE
|
||||||
|
# define _ALL_SOURCE 1
|
||||||
|
#endif
|
||||||
|
/* Enable general extensions on macOS. */
|
||||||
|
#ifndef _DARWIN_C_SOURCE
|
||||||
|
# define _DARWIN_C_SOURCE 1
|
||||||
|
#endif
|
||||||
|
/* Enable general extensions on Solaris. */
|
||||||
|
#ifndef __EXTENSIONS__
|
||||||
|
# define __EXTENSIONS__ 1
|
||||||
|
#endif
|
||||||
|
/* Enable GNU extensions on systems that have them. */
|
||||||
|
#ifndef _GNU_SOURCE
|
||||||
|
# define _GNU_SOURCE 1
|
||||||
|
#endif
|
||||||
|
/* Enable X/Open compliant socket functions that do not require linking
|
||||||
|
with -lxnet on HP-UX 11.11. */
|
||||||
|
#ifndef _HPUX_ALT_XOPEN_SOCKET_API
|
||||||
|
# define _HPUX_ALT_XOPEN_SOCKET_API 1
|
||||||
|
#endif
|
||||||
|
/* Identify the host operating system as Minix.
|
||||||
|
This macro does not affect the system headers' behavior.
|
||||||
|
A future release of Autoconf may stop defining this macro. */
|
||||||
|
#ifndef _MINIX
|
||||||
|
/* # undef _MINIX */
|
||||||
|
#endif
|
||||||
|
/* Enable general extensions on NetBSD.
|
||||||
|
Enable NetBSD compatibility extensions on Minix. */
|
||||||
|
#ifndef _NETBSD_SOURCE
|
||||||
|
# define _NETBSD_SOURCE 1
|
||||||
|
#endif
|
||||||
|
/* Enable OpenBSD compatibility extensions on NetBSD.
|
||||||
|
Oddly enough, this does nothing on OpenBSD. */
|
||||||
|
#ifndef _OPENBSD_SOURCE
|
||||||
|
# define _OPENBSD_SOURCE 1
|
||||||
|
#endif
|
||||||
|
/* Define to 1 if needed for POSIX-compatible behavior. */
|
||||||
|
#ifndef _POSIX_SOURCE
|
||||||
|
/* # undef _POSIX_SOURCE */
|
||||||
|
#endif
|
||||||
|
/* Define to 2 if needed for POSIX-compatible behavior. */
|
||||||
|
#ifndef _POSIX_1_SOURCE
|
||||||
|
/* # undef _POSIX_1_SOURCE */
|
||||||
|
#endif
|
||||||
|
/* Enable POSIX-compatible threading on Solaris. */
|
||||||
|
#ifndef _POSIX_PTHREAD_SEMANTICS
|
||||||
|
# define _POSIX_PTHREAD_SEMANTICS 1
|
||||||
|
#endif
|
||||||
|
/* Enable extensions specified by ISO/IEC TS 18661-5:2014. */
|
||||||
|
#ifndef __STDC_WANT_IEC_60559_ATTRIBS_EXT__
|
||||||
|
# define __STDC_WANT_IEC_60559_ATTRIBS_EXT__ 1
|
||||||
|
#endif
|
||||||
|
/* Enable extensions specified by ISO/IEC TS 18661-1:2014. */
|
||||||
|
#ifndef __STDC_WANT_IEC_60559_BFP_EXT__
|
||||||
|
# define __STDC_WANT_IEC_60559_BFP_EXT__ 1
|
||||||
|
#endif
|
||||||
|
/* Enable extensions specified by ISO/IEC TS 18661-2:2015. */
|
||||||
|
#ifndef __STDC_WANT_IEC_60559_DFP_EXT__
|
||||||
|
# define __STDC_WANT_IEC_60559_DFP_EXT__ 1
|
||||||
|
#endif
|
||||||
|
/* Enable extensions specified by ISO/IEC TS 18661-4:2015. */
|
||||||
|
#ifndef __STDC_WANT_IEC_60559_FUNCS_EXT__
|
||||||
|
# define __STDC_WANT_IEC_60559_FUNCS_EXT__ 1
|
||||||
|
#endif
|
||||||
|
/* Enable extensions specified by ISO/IEC TS 18661-3:2015. */
|
||||||
|
#ifndef __STDC_WANT_IEC_60559_TYPES_EXT__
|
||||||
|
# define __STDC_WANT_IEC_60559_TYPES_EXT__ 1
|
||||||
|
#endif
|
||||||
|
/* Enable extensions specified by ISO/IEC TR 24731-2:2010. */
|
||||||
|
#ifndef __STDC_WANT_LIB_EXT2__
|
||||||
|
# define __STDC_WANT_LIB_EXT2__ 1
|
||||||
|
#endif
|
||||||
|
/* Enable extensions specified by ISO/IEC 24747:2009. */
|
||||||
|
#ifndef __STDC_WANT_MATH_SPEC_FUNCS__
|
||||||
|
# define __STDC_WANT_MATH_SPEC_FUNCS__ 1
|
||||||
|
#endif
|
||||||
|
/* Enable extensions on HP NonStop. */
|
||||||
|
#ifndef _TANDEM_SOURCE
|
||||||
|
# define _TANDEM_SOURCE 1
|
||||||
|
#endif
|
||||||
|
/* Enable X/Open extensions. Define to 500 only if necessary
|
||||||
|
to make mbstate_t available. */
|
||||||
|
#ifndef _XOPEN_SOURCE
|
||||||
|
/* # undef _XOPEN_SOURCE */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Version number of package */
|
||||||
|
#define VERSION "10.45"
|
||||||
|
|
||||||
|
/* Number of bits in a file offset, on hosts where this is settable. */
|
||||||
|
/* #undef _FILE_OFFSET_BITS */
|
||||||
|
|
||||||
|
/* Define for large files, on AIX-style hosts. */
|
||||||
|
/* #undef _LARGE_FILES */
|
||||||
|
|
||||||
|
/* Define to empty if `const' does not conform to ANSI C. */
|
||||||
|
/* #undef const */
|
||||||
|
|
||||||
|
/* Define to the type of a signed integer type of width exactly 64 bits if
|
||||||
|
such a type exists and the standard includes do not define it. */
|
||||||
|
/* #undef int64_t */
|
||||||
|
|
||||||
|
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
||||||
|
/* #undef size_t */
|
||||||
460
3rd/pcre2/src/config.h.in
Normal file
460
3rd/pcre2/src/config.h.in
Normal file
@@ -0,0 +1,460 @@
|
|||||||
|
/* src/config.h.in. Generated from configure.ac by autoheader. */
|
||||||
|
|
||||||
|
|
||||||
|
/* PCRE2 is written in Standard C, but there are a few non-standard things it
|
||||||
|
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||||
|
systems.
|
||||||
|
|
||||||
|
In environments that support the GNU autotools, config.h.in is converted into
|
||||||
|
config.h by the "configure" script. In environments that use CMake,
|
||||||
|
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
|
||||||
|
hand" without using "configure" or CMake, you should copy the distributed
|
||||||
|
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||||
|
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||||
|
so that config.h is included at the start of every source.
|
||||||
|
|
||||||
|
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||||
|
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
||||||
|
but if you do, default values will be taken from config.h for non-boolean
|
||||||
|
macros that are not defined on the command line.
|
||||||
|
|
||||||
|
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be
|
||||||
|
defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All
|
||||||
|
such macros are listed as a commented #undef in config.h.generic. Macros such
|
||||||
|
as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||||
|
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||||
|
|
||||||
|
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||||
|
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||||
|
sure both macros are undefined; an emulation function will then be used. */
|
||||||
|
|
||||||
|
/* By default, the \R escape sequence matches any Unicode line ending
|
||||||
|
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||||
|
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||||
|
The build-time default can be overridden by the user of PCRE2 at runtime.
|
||||||
|
*/
|
||||||
|
#undef BSR_ANYCRLF
|
||||||
|
|
||||||
|
/* Define to any value to disable the use of the z and t modifiers in
|
||||||
|
formatting settings such as %zu or %td (this is rarely needed). */
|
||||||
|
#undef DISABLE_PERCENT_ZT
|
||||||
|
|
||||||
|
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||||
|
character codes, define this macro to any value. When EBCDIC is set, PCRE2
|
||||||
|
assumes that all input strings are in EBCDIC. If you do not define this
|
||||||
|
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
|
||||||
|
is not possible to build a version of PCRE2 that supports both EBCDIC and
|
||||||
|
UTF-8/16/32. */
|
||||||
|
#undef EBCDIC
|
||||||
|
|
||||||
|
/* In an EBCDIC environment, define this macro to any value to arrange for the
|
||||||
|
NL character to be 0x25 instead of the default 0x15. NL plays the role that
|
||||||
|
LF does in an ASCII/Unicode environment. */
|
||||||
|
#undef EBCDIC_NL25
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <assert.h> header file. */
|
||||||
|
#undef HAVE_ASSERT_H
|
||||||
|
|
||||||
|
/* Define this if your compiler supports __attribute__((uninitialized)) */
|
||||||
|
#undef HAVE_ATTRIBUTE_UNINITIALIZED
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `bcopy' function. */
|
||||||
|
#undef HAVE_BCOPY
|
||||||
|
|
||||||
|
/* Define this if your compiler provides __assume() */
|
||||||
|
#undef HAVE_BUILTIN_ASSUME
|
||||||
|
|
||||||
|
/* Define this if your compiler provides __builtin_mul_overflow() */
|
||||||
|
#undef HAVE_BUILTIN_MUL_OVERFLOW
|
||||||
|
|
||||||
|
/* Define this if your compiler provides __builtin_unreachable() */
|
||||||
|
#undef HAVE_BUILTIN_UNREACHABLE
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <bzlib.h> header file. */
|
||||||
|
#undef HAVE_BZLIB_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <dirent.h> header file. */
|
||||||
|
#undef HAVE_DIRENT_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||||
|
#undef HAVE_DLFCN_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <editline/readline.h> header file. */
|
||||||
|
#undef HAVE_EDITLINE_READLINE_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <edit/readline/readline.h> header file. */
|
||||||
|
#undef HAVE_EDIT_READLINE_READLINE_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||||
|
#undef HAVE_INTTYPES_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <limits.h> header file. */
|
||||||
|
#undef HAVE_LIMITS_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `memfd_create' function. */
|
||||||
|
#undef HAVE_MEMFD_CREATE
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `memmove' function. */
|
||||||
|
#undef HAVE_MEMMOVE
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <minix/config.h> header file. */
|
||||||
|
#undef HAVE_MINIX_CONFIG_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `mkostemp' function. */
|
||||||
|
#undef HAVE_MKOSTEMP
|
||||||
|
|
||||||
|
/* Define if you have POSIX threads libraries and header files. */
|
||||||
|
#undef HAVE_PTHREAD
|
||||||
|
|
||||||
|
/* Have PTHREAD_PRIO_INHERIT. */
|
||||||
|
#undef HAVE_PTHREAD_PRIO_INHERIT
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <readline.h> header file. */
|
||||||
|
#undef HAVE_READLINE_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <readline/history.h> header file. */
|
||||||
|
#undef HAVE_READLINE_HISTORY_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <readline/readline.h> header file. */
|
||||||
|
#undef HAVE_READLINE_READLINE_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `realpath' function. */
|
||||||
|
#undef HAVE_REALPATH
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `secure_getenv' function. */
|
||||||
|
#undef HAVE_SECURE_GETENV
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <stdint.h> header file. */
|
||||||
|
#undef HAVE_STDINT_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <stdio.h> header file. */
|
||||||
|
#undef HAVE_STDIO_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||||
|
#undef HAVE_STDLIB_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `strerror' function. */
|
||||||
|
#undef HAVE_STRERROR
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <strings.h> header file. */
|
||||||
|
#undef HAVE_STRINGS_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <string.h> header file. */
|
||||||
|
#undef HAVE_STRING_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||||
|
#undef HAVE_SYS_STAT_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||||
|
#undef HAVE_SYS_TYPES_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <sys/wait.h> header file. */
|
||||||
|
#undef HAVE_SYS_WAIT_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <unistd.h> header file. */
|
||||||
|
#undef HAVE_UNISTD_H
|
||||||
|
|
||||||
|
/* Define to 1 if the compiler supports GCC compatible visibility
|
||||||
|
declarations. */
|
||||||
|
#undef HAVE_VISIBILITY
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <wchar.h> header file. */
|
||||||
|
#undef HAVE_WCHAR_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <windows.h> header file. */
|
||||||
|
#undef HAVE_WINDOWS_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <zlib.h> header file. */
|
||||||
|
#undef HAVE_ZLIB_H
|
||||||
|
|
||||||
|
/* This limits the amount of memory that may be used while matching a pattern.
|
||||||
|
It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply
|
||||||
|
to JIT matching. The value is in kibibytes (units of 1024 bytes). */
|
||||||
|
#undef HEAP_LIMIT
|
||||||
|
|
||||||
|
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||||
|
as offsets within the compiled regex. The default is 2, which allows for
|
||||||
|
compiled patterns up to 65535 code units long. This covers the vast
|
||||||
|
majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes
|
||||||
|
instead. This allows for longer patterns in extreme cases. */
|
||||||
|
#undef LINK_SIZE
|
||||||
|
|
||||||
|
/* Define to the sub-directory where libtool stores uninstalled libraries. */
|
||||||
|
#undef LT_OBJDIR
|
||||||
|
|
||||||
|
/* The value of MATCH_LIMIT determines the default number of times the
|
||||||
|
pcre2_match() function can record a backtrack position during a single
|
||||||
|
matching attempt. The value is also used to limit a loop counter in
|
||||||
|
pcre2_dfa_match(). There is a runtime interface for setting a different
|
||||||
|
limit. The limit exists in order to catch runaway regular expressions that
|
||||||
|
take forever to determine that they do not match. The default is set very
|
||||||
|
large so that it does not accidentally catch legitimate cases. */
|
||||||
|
#undef MATCH_LIMIT
|
||||||
|
|
||||||
|
/* The above limit applies to all backtracks, whether or not they are nested.
|
||||||
|
In some environments it is desirable to limit the nesting of backtracking
|
||||||
|
(that is, the depth of tree that is searched) more strictly, in order to
|
||||||
|
restrict the maximum amount of heap memory that is used. The value of
|
||||||
|
MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it
|
||||||
|
must be less than the value of MATCH_LIMIT. The default is to use the same
|
||||||
|
value as MATCH_LIMIT. There is a runtime method for setting a different
|
||||||
|
limit. In the case of pcre2_dfa_match(), this limit controls the depth of
|
||||||
|
the internal nested function calls that are used for pattern recursions,
|
||||||
|
lookarounds, and atomic groups. */
|
||||||
|
#undef MATCH_LIMIT_DEPTH
|
||||||
|
|
||||||
|
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||||
|
Care must be taken if it is increased, because it guards against integer
|
||||||
|
overflow caused by enormously large patterns. */
|
||||||
|
#undef MAX_NAME_COUNT
|
||||||
|
|
||||||
|
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||||
|
Care must be taken if it is increased, because it guards against integer
|
||||||
|
overflow caused by enormously large patterns. */
|
||||||
|
#undef MAX_NAME_SIZE
|
||||||
|
|
||||||
|
/* The value of MAX_VARLOOKBEHIND specifies the default maximum length, in
|
||||||
|
characters, for a variable-length lookbehind assertion. */
|
||||||
|
#undef MAX_VARLOOKBEHIND
|
||||||
|
|
||||||
|
/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
|
||||||
|
#undef NEVER_BACKSLASH_C
|
||||||
|
|
||||||
|
/* The value of NEWLINE_DEFAULT determines the default newline character
|
||||||
|
sequence. PCRE2 client programs can override this by selecting other values
|
||||||
|
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), 5
|
||||||
|
(ANYCRLF), and 6 (NUL). */
|
||||||
|
#undef NEWLINE_DEFAULT
|
||||||
|
|
||||||
|
/* Name of package */
|
||||||
|
#undef PACKAGE
|
||||||
|
|
||||||
|
/* Define to the address where bug reports for this package should be sent. */
|
||||||
|
#undef PACKAGE_BUGREPORT
|
||||||
|
|
||||||
|
/* Define to the full name of this package. */
|
||||||
|
#undef PACKAGE_NAME
|
||||||
|
|
||||||
|
/* Define to the full name and version of this package. */
|
||||||
|
#undef PACKAGE_STRING
|
||||||
|
|
||||||
|
/* Define to the one symbol short name of this package. */
|
||||||
|
#undef PACKAGE_TARNAME
|
||||||
|
|
||||||
|
/* Define to the home page for this package. */
|
||||||
|
#undef PACKAGE_URL
|
||||||
|
|
||||||
|
/* Define to the version of this package. */
|
||||||
|
#undef PACKAGE_VERSION
|
||||||
|
|
||||||
|
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||||
|
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||||
|
stack that is used while compiling a pattern. */
|
||||||
|
#undef PARENS_NEST_LIMIT
|
||||||
|
|
||||||
|
/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
|
||||||
|
pcre2grep to hold parts of the file it is searching. The buffer will be
|
||||||
|
expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing
|
||||||
|
very long lines. The actual amount of memory used by pcre2grep is three
|
||||||
|
times this number, because it allows for the buffering of "before" and
|
||||||
|
"after" lines. */
|
||||||
|
#undef PCRE2GREP_BUFSIZE
|
||||||
|
|
||||||
|
/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer
|
||||||
|
used by pcre2grep to hold parts of the file it is searching. The actual
|
||||||
|
amount of memory used by pcre2grep is three times this number, because it
|
||||||
|
allows for the buffering of "before" and "after" lines. */
|
||||||
|
#undef PCRE2GREP_MAX_BUFSIZE
|
||||||
|
|
||||||
|
/* Define to any value to include debugging code. */
|
||||||
|
#undef PCRE2_DEBUG
|
||||||
|
|
||||||
|
/* to make a symbol visible */
|
||||||
|
#undef PCRE2_EXPORT
|
||||||
|
|
||||||
|
|
||||||
|
/* If you are compiling for a system other than a Unix-like system or
|
||||||
|
Win32, and it needs some magic to be inserted before the definition
|
||||||
|
of a function that is exported by the library, define this macro to
|
||||||
|
contain the relevant magic. If you do not define this macro, a suitable
|
||||||
|
__declspec value is used for Windows systems; in other environments
|
||||||
|
a compiler relevant "extern" is used with any "visibility" related
|
||||||
|
attributes from PCRE2_EXPORT included.
|
||||||
|
This macro apears at the start of every exported function that is part
|
||||||
|
of the external API. It does not appear on functions that are "external"
|
||||||
|
in the C sense, but which are internal to the library. */
|
||||||
|
#undef PCRE2_EXP_DEFN
|
||||||
|
|
||||||
|
/* Define to any value if linking statically (TODO: make nice with Libtool) */
|
||||||
|
#undef PCRE2_STATIC
|
||||||
|
|
||||||
|
/* Define to necessary symbol if this constant uses a non-standard name on
|
||||||
|
your system. */
|
||||||
|
#undef PTHREAD_CREATE_JOINABLE
|
||||||
|
|
||||||
|
/* Define to any non-zero number to enable support for SELinux compatible
|
||||||
|
executable memory allocator in JIT. Note that this will have no effect
|
||||||
|
unless SUPPORT_JIT is also defined. */
|
||||||
|
#undef SLJIT_PROT_EXECUTABLE_ALLOCATOR
|
||||||
|
|
||||||
|
/* Define to 1 if all of the C90 standard headers exist (not just the ones
|
||||||
|
required in a freestanding environment). This macro is provided for
|
||||||
|
backward compatibility; new code need not use it. */
|
||||||
|
#undef STDC_HEADERS
|
||||||
|
|
||||||
|
/* Define to any value to enable differential fuzzing support. */
|
||||||
|
#undef SUPPORT_DIFF_FUZZ
|
||||||
|
|
||||||
|
/* Define to any value to enable support for Just-In-Time compiling. */
|
||||||
|
#undef SUPPORT_JIT
|
||||||
|
|
||||||
|
/* Define to any value to allow pcre2grep to be linked with libbz2, so that it
|
||||||
|
is able to handle .bz2 files. */
|
||||||
|
#undef SUPPORT_LIBBZ2
|
||||||
|
|
||||||
|
/* Define to any value to allow pcre2test to be linked with libedit. */
|
||||||
|
#undef SUPPORT_LIBEDIT
|
||||||
|
|
||||||
|
/* Define to any value to allow pcre2test to be linked with libreadline. */
|
||||||
|
#undef SUPPORT_LIBREADLINE
|
||||||
|
|
||||||
|
/* Define to any value to allow pcre2grep to be linked with libz, so that it
|
||||||
|
is able to handle .gz files. */
|
||||||
|
#undef SUPPORT_LIBZ
|
||||||
|
|
||||||
|
/* Define to any value to enable callout script support in pcre2grep. */
|
||||||
|
#undef SUPPORT_PCRE2GREP_CALLOUT
|
||||||
|
|
||||||
|
/* Define to any value to enable fork support in pcre2grep callout scripts.
|
||||||
|
This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined.
|
||||||
|
*/
|
||||||
|
#undef SUPPORT_PCRE2GREP_CALLOUT_FORK
|
||||||
|
|
||||||
|
/* Define to any value to enable JIT support in pcre2grep. Note that this will
|
||||||
|
have no effect unless SUPPORT_JIT is also defined. */
|
||||||
|
#undef SUPPORT_PCRE2GREP_JIT
|
||||||
|
|
||||||
|
/* Define to any value to enable the 16 bit PCRE2 library. */
|
||||||
|
#undef SUPPORT_PCRE2_16
|
||||||
|
|
||||||
|
/* Define to any value to enable the 32 bit PCRE2 library. */
|
||||||
|
#undef SUPPORT_PCRE2_32
|
||||||
|
|
||||||
|
/* Define to any value to enable the 8 bit PCRE2 library. */
|
||||||
|
#undef SUPPORT_PCRE2_8
|
||||||
|
|
||||||
|
/* Define to any value to enable support for Unicode and UTF encoding. This
|
||||||
|
will work even in an EBCDIC environment, but it is incompatible with the
|
||||||
|
EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
|
||||||
|
ASCII/Unicode, but not both at once. */
|
||||||
|
#undef SUPPORT_UNICODE
|
||||||
|
|
||||||
|
/* Define to any value for valgrind support to find invalid memory reads. */
|
||||||
|
#undef SUPPORT_VALGRIND
|
||||||
|
|
||||||
|
/* Enable extensions on AIX 3, Interix. */
|
||||||
|
#ifndef _ALL_SOURCE
|
||||||
|
# undef _ALL_SOURCE
|
||||||
|
#endif
|
||||||
|
/* Enable general extensions on macOS. */
|
||||||
|
#ifndef _DARWIN_C_SOURCE
|
||||||
|
# undef _DARWIN_C_SOURCE
|
||||||
|
#endif
|
||||||
|
/* Enable general extensions on Solaris. */
|
||||||
|
#ifndef __EXTENSIONS__
|
||||||
|
# undef __EXTENSIONS__
|
||||||
|
#endif
|
||||||
|
/* Enable GNU extensions on systems that have them. */
|
||||||
|
#ifndef _GNU_SOURCE
|
||||||
|
# undef _GNU_SOURCE
|
||||||
|
#endif
|
||||||
|
/* Enable X/Open compliant socket functions that do not require linking
|
||||||
|
with -lxnet on HP-UX 11.11. */
|
||||||
|
#ifndef _HPUX_ALT_XOPEN_SOCKET_API
|
||||||
|
# undef _HPUX_ALT_XOPEN_SOCKET_API
|
||||||
|
#endif
|
||||||
|
/* Identify the host operating system as Minix.
|
||||||
|
This macro does not affect the system headers' behavior.
|
||||||
|
A future release of Autoconf may stop defining this macro. */
|
||||||
|
#ifndef _MINIX
|
||||||
|
# undef _MINIX
|
||||||
|
#endif
|
||||||
|
/* Enable general extensions on NetBSD.
|
||||||
|
Enable NetBSD compatibility extensions on Minix. */
|
||||||
|
#ifndef _NETBSD_SOURCE
|
||||||
|
# undef _NETBSD_SOURCE
|
||||||
|
#endif
|
||||||
|
/* Enable OpenBSD compatibility extensions on NetBSD.
|
||||||
|
Oddly enough, this does nothing on OpenBSD. */
|
||||||
|
#ifndef _OPENBSD_SOURCE
|
||||||
|
# undef _OPENBSD_SOURCE
|
||||||
|
#endif
|
||||||
|
/* Define to 1 if needed for POSIX-compatible behavior. */
|
||||||
|
#ifndef _POSIX_SOURCE
|
||||||
|
# undef _POSIX_SOURCE
|
||||||
|
#endif
|
||||||
|
/* Define to 2 if needed for POSIX-compatible behavior. */
|
||||||
|
#ifndef _POSIX_1_SOURCE
|
||||||
|
# undef _POSIX_1_SOURCE
|
||||||
|
#endif
|
||||||
|
/* Enable POSIX-compatible threading on Solaris. */
|
||||||
|
#ifndef _POSIX_PTHREAD_SEMANTICS
|
||||||
|
# undef _POSIX_PTHREAD_SEMANTICS
|
||||||
|
#endif
|
||||||
|
/* Enable extensions specified by ISO/IEC TS 18661-5:2014. */
|
||||||
|
#ifndef __STDC_WANT_IEC_60559_ATTRIBS_EXT__
|
||||||
|
# undef __STDC_WANT_IEC_60559_ATTRIBS_EXT__
|
||||||
|
#endif
|
||||||
|
/* Enable extensions specified by ISO/IEC TS 18661-1:2014. */
|
||||||
|
#ifndef __STDC_WANT_IEC_60559_BFP_EXT__
|
||||||
|
# undef __STDC_WANT_IEC_60559_BFP_EXT__
|
||||||
|
#endif
|
||||||
|
/* Enable extensions specified by ISO/IEC TS 18661-2:2015. */
|
||||||
|
#ifndef __STDC_WANT_IEC_60559_DFP_EXT__
|
||||||
|
# undef __STDC_WANT_IEC_60559_DFP_EXT__
|
||||||
|
#endif
|
||||||
|
/* Enable extensions specified by ISO/IEC TS 18661-4:2015. */
|
||||||
|
#ifndef __STDC_WANT_IEC_60559_FUNCS_EXT__
|
||||||
|
# undef __STDC_WANT_IEC_60559_FUNCS_EXT__
|
||||||
|
#endif
|
||||||
|
/* Enable extensions specified by ISO/IEC TS 18661-3:2015. */
|
||||||
|
#ifndef __STDC_WANT_IEC_60559_TYPES_EXT__
|
||||||
|
# undef __STDC_WANT_IEC_60559_TYPES_EXT__
|
||||||
|
#endif
|
||||||
|
/* Enable extensions specified by ISO/IEC TR 24731-2:2010. */
|
||||||
|
#ifndef __STDC_WANT_LIB_EXT2__
|
||||||
|
# undef __STDC_WANT_LIB_EXT2__
|
||||||
|
#endif
|
||||||
|
/* Enable extensions specified by ISO/IEC 24747:2009. */
|
||||||
|
#ifndef __STDC_WANT_MATH_SPEC_FUNCS__
|
||||||
|
# undef __STDC_WANT_MATH_SPEC_FUNCS__
|
||||||
|
#endif
|
||||||
|
/* Enable extensions on HP NonStop. */
|
||||||
|
#ifndef _TANDEM_SOURCE
|
||||||
|
# undef _TANDEM_SOURCE
|
||||||
|
#endif
|
||||||
|
/* Enable X/Open extensions. Define to 500 only if necessary
|
||||||
|
to make mbstate_t available. */
|
||||||
|
#ifndef _XOPEN_SOURCE
|
||||||
|
# undef _XOPEN_SOURCE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/* Version number of package */
|
||||||
|
#undef VERSION
|
||||||
|
|
||||||
|
/* Number of bits in a file offset, on hosts where this is settable. */
|
||||||
|
#undef _FILE_OFFSET_BITS
|
||||||
|
|
||||||
|
/* Define for large files, on AIX-style hosts. */
|
||||||
|
#undef _LARGE_FILES
|
||||||
|
|
||||||
|
/* Define to empty if `const' does not conform to ANSI C. */
|
||||||
|
#undef const
|
||||||
|
|
||||||
|
/* Define to the type of a signed integer type of width exactly 64 bits if
|
||||||
|
such a type exists and the standard includes do not define it. */
|
||||||
|
#undef int64_t
|
||||||
|
|
||||||
|
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
||||||
|
#undef size_t
|
||||||
1069
3rd/pcre2/src/pcre2.h.generic
Normal file
1069
3rd/pcre2/src/pcre2.h.generic
Normal file
@@ -0,0 +1,1069 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This is the public header file for the PCRE library, second API, to be
|
||||||
|
#included by applications that call PCRE2 functions.
|
||||||
|
|
||||||
|
Copyright (c) 2016-2024 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef PCRE2_H_IDEMPOTENT_GUARD
|
||||||
|
#define PCRE2_H_IDEMPOTENT_GUARD
|
||||||
|
|
||||||
|
/* The current PCRE version information. */
|
||||||
|
|
||||||
|
#define PCRE2_MAJOR 10
|
||||||
|
#define PCRE2_MINOR 45
|
||||||
|
#define PCRE2_PRERELEASE
|
||||||
|
#define PCRE2_DATE 2025-02-05
|
||||||
|
|
||||||
|
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||||
|
imported have to be identified as such. When building PCRE2, the appropriate
|
||||||
|
export setting is defined in pcre2_internal.h, which includes this file. So we
|
||||||
|
don't change existing definitions of PCRE2_EXP_DECL. */
|
||||||
|
|
||||||
|
#if defined(_WIN32) && !defined(PCRE2_STATIC)
|
||||||
|
# ifndef PCRE2_EXP_DECL
|
||||||
|
# define PCRE2_EXP_DECL extern __declspec(dllimport)
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* By default, we use the standard "extern" declarations. */
|
||||||
|
|
||||||
|
#ifndef PCRE2_EXP_DECL
|
||||||
|
# ifdef __cplusplus
|
||||||
|
# define PCRE2_EXP_DECL extern "C"
|
||||||
|
# else
|
||||||
|
# define PCRE2_EXP_DECL extern
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* When compiling with the MSVC compiler, it is sometimes necessary to include
|
||||||
|
a "calling convention" before exported function names. (This is secondhand
|
||||||
|
information; I know nothing about MSVC myself). For example, something like
|
||||||
|
|
||||||
|
void __cdecl function(....)
|
||||||
|
|
||||||
|
might be needed. In order so make this easy, all the exported functions have
|
||||||
|
PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
|
||||||
|
set, we ensure here that it has no effect. */
|
||||||
|
|
||||||
|
#ifndef PCRE2_CALL_CONVENTION
|
||||||
|
#define PCRE2_CALL_CONVENTION
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and
|
||||||
|
uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do
|
||||||
|
not have stdint.h, which is why we use inttypes.h, which according to the C
|
||||||
|
standard is a superset of stdint.h. If inttypes.h is not available the build
|
||||||
|
will break and the relevant values must be provided by some other means. */
|
||||||
|
|
||||||
|
#include <limits.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <inttypes.h>
|
||||||
|
|
||||||
|
/* Allow for C++ users compiling this directly. */
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* The following option bits can be passed to pcre2_compile(), pcre2_match(),
|
||||||
|
or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it
|
||||||
|
is passed. Put these bits at the most significant end of the options word so
|
||||||
|
others can be added next to them */
|
||||||
|
|
||||||
|
#define PCRE2_ANCHORED 0x80000000u
|
||||||
|
#define PCRE2_NO_UTF_CHECK 0x40000000u
|
||||||
|
#define PCRE2_ENDANCHORED 0x20000000u
|
||||||
|
|
||||||
|
/* The following option bits can be passed only to pcre2_compile(). However,
|
||||||
|
they may affect compilation, JIT compilation, and/or interpretive execution.
|
||||||
|
The following tags indicate which:
|
||||||
|
|
||||||
|
C alters what is compiled by pcre2_compile()
|
||||||
|
J alters what is compiled by pcre2_jit_compile()
|
||||||
|
M is inspected during pcre2_match() execution
|
||||||
|
D is inspected during pcre2_dfa_match() execution
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define PCRE2_ALLOW_EMPTY_CLASS 0x00000001u /* C */
|
||||||
|
#define PCRE2_ALT_BSUX 0x00000002u /* C */
|
||||||
|
#define PCRE2_AUTO_CALLOUT 0x00000004u /* C */
|
||||||
|
#define PCRE2_CASELESS 0x00000008u /* C */
|
||||||
|
#define PCRE2_DOLLAR_ENDONLY 0x00000010u /* J M D */
|
||||||
|
#define PCRE2_DOTALL 0x00000020u /* C */
|
||||||
|
#define PCRE2_DUPNAMES 0x00000040u /* C */
|
||||||
|
#define PCRE2_EXTENDED 0x00000080u /* C */
|
||||||
|
#define PCRE2_FIRSTLINE 0x00000100u /* J M D */
|
||||||
|
#define PCRE2_MATCH_UNSET_BACKREF 0x00000200u /* C J M */
|
||||||
|
#define PCRE2_MULTILINE 0x00000400u /* C */
|
||||||
|
#define PCRE2_NEVER_UCP 0x00000800u /* C */
|
||||||
|
#define PCRE2_NEVER_UTF 0x00001000u /* C */
|
||||||
|
#define PCRE2_NO_AUTO_CAPTURE 0x00002000u /* C */
|
||||||
|
#define PCRE2_NO_AUTO_POSSESS 0x00004000u /* C */
|
||||||
|
#define PCRE2_NO_DOTSTAR_ANCHOR 0x00008000u /* C */
|
||||||
|
#define PCRE2_NO_START_OPTIMIZE 0x00010000u /* J M D */
|
||||||
|
#define PCRE2_UCP 0x00020000u /* C J M D */
|
||||||
|
#define PCRE2_UNGREEDY 0x00040000u /* C */
|
||||||
|
#define PCRE2_UTF 0x00080000u /* C J M D */
|
||||||
|
#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */
|
||||||
|
#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */
|
||||||
|
#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */
|
||||||
|
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
|
||||||
|
#define PCRE2_EXTENDED_MORE 0x01000000u /* C */
|
||||||
|
#define PCRE2_LITERAL 0x02000000u /* C */
|
||||||
|
#define PCRE2_MATCH_INVALID_UTF 0x04000000u /* J M D */
|
||||||
|
#define PCRE2_ALT_EXTENDED_CLASS 0x08000000u /* C */
|
||||||
|
|
||||||
|
/* An additional compile options word is available in the compile context. */
|
||||||
|
|
||||||
|
#define PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES 0x00000001u /* C */
|
||||||
|
#define PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL 0x00000002u /* C */
|
||||||
|
#define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */
|
||||||
|
#define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */
|
||||||
|
#define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */
|
||||||
|
#define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */
|
||||||
|
#define PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK 0x00000040u /* C */
|
||||||
|
#define PCRE2_EXTRA_CASELESS_RESTRICT 0x00000080u /* C */
|
||||||
|
#define PCRE2_EXTRA_ASCII_BSD 0x00000100u /* C */
|
||||||
|
#define PCRE2_EXTRA_ASCII_BSS 0x00000200u /* C */
|
||||||
|
#define PCRE2_EXTRA_ASCII_BSW 0x00000400u /* C */
|
||||||
|
#define PCRE2_EXTRA_ASCII_POSIX 0x00000800u /* C */
|
||||||
|
#define PCRE2_EXTRA_ASCII_DIGIT 0x00001000u /* C */
|
||||||
|
#define PCRE2_EXTRA_PYTHON_OCTAL 0x00002000u /* C */
|
||||||
|
#define PCRE2_EXTRA_NO_BS0 0x00004000u /* C */
|
||||||
|
#define PCRE2_EXTRA_NEVER_CALLOUT 0x00008000u /* C */
|
||||||
|
#define PCRE2_EXTRA_TURKISH_CASING 0x00010000u /* C */
|
||||||
|
|
||||||
|
/* These are for pcre2_jit_compile(). */
|
||||||
|
|
||||||
|
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
|
||||||
|
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
|
||||||
|
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
|
||||||
|
#define PCRE2_JIT_INVALID_UTF 0x00000100u
|
||||||
|
#define PCRE2_JIT_TEST_ALLOC 0x00000200u
|
||||||
|
|
||||||
|
/* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and
|
||||||
|
pcre2_substitute(). Some are allowed only for one of the functions, and in
|
||||||
|
these cases it is noted below. Note that PCRE2_ANCHORED, PCRE2_ENDANCHORED and
|
||||||
|
PCRE2_NO_UTF_CHECK can also be passed to these functions (though
|
||||||
|
pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */
|
||||||
|
|
||||||
|
#define PCRE2_NOTBOL 0x00000001u
|
||||||
|
#define PCRE2_NOTEOL 0x00000002u
|
||||||
|
#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
|
||||||
|
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
|
||||||
|
#define PCRE2_PARTIAL_SOFT 0x00000010u
|
||||||
|
#define PCRE2_PARTIAL_HARD 0x00000020u
|
||||||
|
#define PCRE2_DFA_RESTART 0x00000040u /* pcre2_dfa_match() only */
|
||||||
|
#define PCRE2_DFA_SHORTEST 0x00000080u /* pcre2_dfa_match() only */
|
||||||
|
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u /* pcre2_substitute() only */
|
||||||
|
#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u /* pcre2_substitute() only */
|
||||||
|
#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u /* pcre2_substitute() only */
|
||||||
|
#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u /* pcre2_substitute() only */
|
||||||
|
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */
|
||||||
|
#define PCRE2_NO_JIT 0x00002000u /* not for pcre2_dfa_match() */
|
||||||
|
#define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u
|
||||||
|
#define PCRE2_SUBSTITUTE_LITERAL 0x00008000u /* pcre2_substitute() only */
|
||||||
|
#define PCRE2_SUBSTITUTE_MATCHED 0x00010000u /* pcre2_substitute() only */
|
||||||
|
#define PCRE2_SUBSTITUTE_REPLACEMENT_ONLY 0x00020000u /* pcre2_substitute() only */
|
||||||
|
#define PCRE2_DISABLE_RECURSELOOP_CHECK 0x00040000u /* not for pcre2_dfa_match() or pcre2_jit_match() */
|
||||||
|
|
||||||
|
/* Options for pcre2_pattern_convert(). */
|
||||||
|
|
||||||
|
#define PCRE2_CONVERT_UTF 0x00000001u
|
||||||
|
#define PCRE2_CONVERT_NO_UTF_CHECK 0x00000002u
|
||||||
|
#define PCRE2_CONVERT_POSIX_BASIC 0x00000004u
|
||||||
|
#define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u
|
||||||
|
#define PCRE2_CONVERT_GLOB 0x00000010u
|
||||||
|
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000030u
|
||||||
|
#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000050u
|
||||||
|
|
||||||
|
/* Newline and \R settings, for use in compile contexts. The newline values
|
||||||
|
must be kept in step with values set in config.h and both sets must all be
|
||||||
|
greater than zero. */
|
||||||
|
|
||||||
|
#define PCRE2_NEWLINE_CR 1
|
||||||
|
#define PCRE2_NEWLINE_LF 2
|
||||||
|
#define PCRE2_NEWLINE_CRLF 3
|
||||||
|
#define PCRE2_NEWLINE_ANY 4
|
||||||
|
#define PCRE2_NEWLINE_ANYCRLF 5
|
||||||
|
#define PCRE2_NEWLINE_NUL 6
|
||||||
|
|
||||||
|
#define PCRE2_BSR_UNICODE 1
|
||||||
|
#define PCRE2_BSR_ANYCRLF 2
|
||||||
|
|
||||||
|
/* Error codes for pcre2_compile(). Some of these are also used by
|
||||||
|
pcre2_pattern_convert(). */
|
||||||
|
|
||||||
|
#define PCRE2_ERROR_END_BACKSLASH 101
|
||||||
|
#define PCRE2_ERROR_END_BACKSLASH_C 102
|
||||||
|
#define PCRE2_ERROR_UNKNOWN_ESCAPE 103
|
||||||
|
#define PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER 104
|
||||||
|
#define PCRE2_ERROR_QUANTIFIER_TOO_BIG 105
|
||||||
|
#define PCRE2_ERROR_MISSING_SQUARE_BRACKET 106
|
||||||
|
#define PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS 107
|
||||||
|
#define PCRE2_ERROR_CLASS_RANGE_ORDER 108
|
||||||
|
#define PCRE2_ERROR_QUANTIFIER_INVALID 109
|
||||||
|
#define PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT 110
|
||||||
|
#define PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY 111
|
||||||
|
#define PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS 112
|
||||||
|
#define PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING 113
|
||||||
|
#define PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS 114
|
||||||
|
#define PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE 115
|
||||||
|
#define PCRE2_ERROR_NULL_PATTERN 116
|
||||||
|
#define PCRE2_ERROR_BAD_OPTIONS 117
|
||||||
|
#define PCRE2_ERROR_MISSING_COMMENT_CLOSING 118
|
||||||
|
#define PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP 119
|
||||||
|
#define PCRE2_ERROR_PATTERN_TOO_LARGE 120
|
||||||
|
#define PCRE2_ERROR_HEAP_FAILED 121
|
||||||
|
#define PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS 122
|
||||||
|
#define PCRE2_ERROR_INTERNAL_CODE_OVERFLOW 123
|
||||||
|
#define PCRE2_ERROR_MISSING_CONDITION_CLOSING 124
|
||||||
|
#define PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH 125
|
||||||
|
#define PCRE2_ERROR_ZERO_RELATIVE_REFERENCE 126
|
||||||
|
#define PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES 127
|
||||||
|
#define PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED 128
|
||||||
|
#define PCRE2_ERROR_BAD_RELATIVE_REFERENCE 129
|
||||||
|
#define PCRE2_ERROR_UNKNOWN_POSIX_CLASS 130
|
||||||
|
#define PCRE2_ERROR_INTERNAL_STUDY_ERROR 131
|
||||||
|
#define PCRE2_ERROR_UNICODE_NOT_SUPPORTED 132
|
||||||
|
#define PCRE2_ERROR_PARENTHESES_STACK_CHECK 133
|
||||||
|
#define PCRE2_ERROR_CODE_POINT_TOO_BIG 134
|
||||||
|
#define PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED 135
|
||||||
|
#define PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C 136
|
||||||
|
#define PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE 137
|
||||||
|
#define PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG 138
|
||||||
|
#define PCRE2_ERROR_MISSING_CALLOUT_CLOSING 139
|
||||||
|
#define PCRE2_ERROR_ESCAPE_INVALID_IN_VERB 140
|
||||||
|
#define PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P 141
|
||||||
|
#define PCRE2_ERROR_MISSING_NAME_TERMINATOR 142
|
||||||
|
#define PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME 143
|
||||||
|
#define PCRE2_ERROR_INVALID_SUBPATTERN_NAME 144
|
||||||
|
#define PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE 145
|
||||||
|
#define PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY 146
|
||||||
|
#define PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY 147
|
||||||
|
#define PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG 148
|
||||||
|
#define PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS 149
|
||||||
|
#define PCRE2_ERROR_CLASS_INVALID_RANGE 150
|
||||||
|
#define PCRE2_ERROR_OCTAL_BYTE_TOO_BIG 151
|
||||||
|
#define PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE 152
|
||||||
|
#define PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN 153
|
||||||
|
#define PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES 154
|
||||||
|
#define PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE 155
|
||||||
|
#define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE 156
|
||||||
|
#define PCRE2_ERROR_BACKSLASH_G_SYNTAX 157
|
||||||
|
#define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158
|
||||||
|
/* Error 159 is obsolete and should now never occur */
|
||||||
|
#define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED 159
|
||||||
|
#define PCRE2_ERROR_VERB_UNKNOWN 160
|
||||||
|
#define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG 161
|
||||||
|
#define PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED 162
|
||||||
|
#define PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW 163
|
||||||
|
#define PCRE2_ERROR_INVALID_OCTAL 164
|
||||||
|
#define PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH 165
|
||||||
|
#define PCRE2_ERROR_MARK_MISSING_ARGUMENT 166
|
||||||
|
#define PCRE2_ERROR_INVALID_HEXADECIMAL 167
|
||||||
|
#define PCRE2_ERROR_BACKSLASH_C_SYNTAX 168
|
||||||
|
#define PCRE2_ERROR_BACKSLASH_K_SYNTAX 169
|
||||||
|
#define PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS 170
|
||||||
|
#define PCRE2_ERROR_BACKSLASH_N_IN_CLASS 171
|
||||||
|
#define PCRE2_ERROR_CALLOUT_STRING_TOO_LONG 172
|
||||||
|
#define PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT 173
|
||||||
|
#define PCRE2_ERROR_UTF_IS_DISABLED 174
|
||||||
|
#define PCRE2_ERROR_UCP_IS_DISABLED 175
|
||||||
|
#define PCRE2_ERROR_VERB_NAME_TOO_LONG 176
|
||||||
|
#define PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG 177
|
||||||
|
#define PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS 178
|
||||||
|
#define PCRE2_ERROR_VERSION_CONDITION_SYNTAX 179
|
||||||
|
#define PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS 180
|
||||||
|
#define PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER 181
|
||||||
|
#define PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER 182
|
||||||
|
#define PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED 183
|
||||||
|
#define PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP 184
|
||||||
|
#define PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED 185
|
||||||
|
#define PCRE2_ERROR_PATTERN_TOO_COMPLICATED 186
|
||||||
|
#define PCRE2_ERROR_LOOKBEHIND_TOO_LONG 187
|
||||||
|
#define PCRE2_ERROR_PATTERN_STRING_TOO_LONG 188
|
||||||
|
#define PCRE2_ERROR_INTERNAL_BAD_CODE 189
|
||||||
|
#define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190
|
||||||
|
#define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191
|
||||||
|
#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192
|
||||||
|
#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193
|
||||||
|
#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194
|
||||||
|
#define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195
|
||||||
|
#define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196
|
||||||
|
#define PCRE2_ERROR_TOO_MANY_CAPTURES 197
|
||||||
|
#define PCRE2_ERROR_MISSING_OCTAL_DIGIT 198
|
||||||
|
#define PCRE2_ERROR_BACKSLASH_K_IN_LOOKAROUND 199
|
||||||
|
#define PCRE2_ERROR_MAX_VAR_LOOKBEHIND_EXCEEDED 200
|
||||||
|
#define PCRE2_ERROR_PATTERN_COMPILED_SIZE_TOO_BIG 201
|
||||||
|
#define PCRE2_ERROR_OVERSIZE_PYTHON_OCTAL 202
|
||||||
|
#define PCRE2_ERROR_CALLOUT_CALLER_DISABLED 203
|
||||||
|
#define PCRE2_ERROR_EXTRA_CASING_REQUIRES_UNICODE 204
|
||||||
|
#define PCRE2_ERROR_TURKISH_CASING_REQUIRES_UTF 205
|
||||||
|
#define PCRE2_ERROR_EXTRA_CASING_INCOMPATIBLE 206
|
||||||
|
#define PCRE2_ERROR_ECLASS_NEST_TOO_DEEP 207
|
||||||
|
#define PCRE2_ERROR_ECLASS_INVALID_OPERATOR 208
|
||||||
|
#define PCRE2_ERROR_ECLASS_UNEXPECTED_OPERATOR 209
|
||||||
|
#define PCRE2_ERROR_ECLASS_EXPECTED_OPERAND 210
|
||||||
|
#define PCRE2_ERROR_ECLASS_MIXED_OPERATORS 211
|
||||||
|
#define PCRE2_ERROR_ECLASS_HINT_SQUARE_BRACKET 212
|
||||||
|
#define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_EXPR 213
|
||||||
|
#define PCRE2_ERROR_PERL_ECLASS_EMPTY_EXPR 214
|
||||||
|
#define PCRE2_ERROR_PERL_ECLASS_MISSING_CLOSE 215
|
||||||
|
#define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_CHAR 216
|
||||||
|
|
||||||
|
/* "Expected" matching error codes: no match and partial match. */
|
||||||
|
|
||||||
|
#define PCRE2_ERROR_NOMATCH (-1)
|
||||||
|
#define PCRE2_ERROR_PARTIAL (-2)
|
||||||
|
|
||||||
|
/* Error codes for UTF-8 validity checks */
|
||||||
|
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR1 (-3)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR2 (-4)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR3 (-5)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR4 (-6)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR5 (-7)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR6 (-8)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR7 (-9)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR8 (-10)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR9 (-11)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR10 (-12)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR11 (-13)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR12 (-14)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR13 (-15)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR14 (-16)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR15 (-17)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR16 (-18)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR17 (-19)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR18 (-20)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR19 (-21)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR20 (-22)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR21 (-23)
|
||||||
|
|
||||||
|
/* Error codes for UTF-16 validity checks */
|
||||||
|
|
||||||
|
#define PCRE2_ERROR_UTF16_ERR1 (-24)
|
||||||
|
#define PCRE2_ERROR_UTF16_ERR2 (-25)
|
||||||
|
#define PCRE2_ERROR_UTF16_ERR3 (-26)
|
||||||
|
|
||||||
|
/* Error codes for UTF-32 validity checks */
|
||||||
|
|
||||||
|
#define PCRE2_ERROR_UTF32_ERR1 (-27)
|
||||||
|
#define PCRE2_ERROR_UTF32_ERR2 (-28)
|
||||||
|
|
||||||
|
/* Miscellaneous error codes for pcre2[_dfa]_match(), substring extraction
|
||||||
|
functions, context functions, and serializing functions. They are in numerical
|
||||||
|
order. Originally they were in alphabetical order too, but now that PCRE2 is
|
||||||
|
released, the numbers must not be changed. */
|
||||||
|
|
||||||
|
#define PCRE2_ERROR_BADDATA (-29)
|
||||||
|
#define PCRE2_ERROR_MIXEDTABLES (-30) /* Name was changed */
|
||||||
|
#define PCRE2_ERROR_BADMAGIC (-31)
|
||||||
|
#define PCRE2_ERROR_BADMODE (-32)
|
||||||
|
#define PCRE2_ERROR_BADOFFSET (-33)
|
||||||
|
#define PCRE2_ERROR_BADOPTION (-34)
|
||||||
|
#define PCRE2_ERROR_BADREPLACEMENT (-35)
|
||||||
|
#define PCRE2_ERROR_BADUTFOFFSET (-36)
|
||||||
|
#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
|
||||||
|
#define PCRE2_ERROR_DFA_BADRESTART (-38)
|
||||||
|
#define PCRE2_ERROR_DFA_RECURSE (-39)
|
||||||
|
#define PCRE2_ERROR_DFA_UCOND (-40)
|
||||||
|
#define PCRE2_ERROR_DFA_UFUNC (-41)
|
||||||
|
#define PCRE2_ERROR_DFA_UITEM (-42)
|
||||||
|
#define PCRE2_ERROR_DFA_WSSIZE (-43)
|
||||||
|
#define PCRE2_ERROR_INTERNAL (-44)
|
||||||
|
#define PCRE2_ERROR_JIT_BADOPTION (-45)
|
||||||
|
#define PCRE2_ERROR_JIT_STACKLIMIT (-46)
|
||||||
|
#define PCRE2_ERROR_MATCHLIMIT (-47)
|
||||||
|
#define PCRE2_ERROR_NOMEMORY (-48)
|
||||||
|
#define PCRE2_ERROR_NOSUBSTRING (-49)
|
||||||
|
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-50)
|
||||||
|
#define PCRE2_ERROR_NULL (-51)
|
||||||
|
#define PCRE2_ERROR_RECURSELOOP (-52)
|
||||||
|
#define PCRE2_ERROR_DEPTHLIMIT (-53)
|
||||||
|
#define PCRE2_ERROR_RECURSIONLIMIT (-53) /* Obsolete synonym */
|
||||||
|
#define PCRE2_ERROR_UNAVAILABLE (-54)
|
||||||
|
#define PCRE2_ERROR_UNSET (-55)
|
||||||
|
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
|
||||||
|
#define PCRE2_ERROR_BADREPESCAPE (-57)
|
||||||
|
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
|
||||||
|
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
|
||||||
|
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||||
|
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
|
||||||
|
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
|
||||||
|
#define PCRE2_ERROR_HEAPLIMIT (-63)
|
||||||
|
#define PCRE2_ERROR_CONVERT_SYNTAX (-64)
|
||||||
|
#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65)
|
||||||
|
#define PCRE2_ERROR_DFA_UINVALID_UTF (-66)
|
||||||
|
#define PCRE2_ERROR_INVALIDOFFSET (-67)
|
||||||
|
#define PCRE2_ERROR_JIT_UNSUPPORTED (-68)
|
||||||
|
#define PCRE2_ERROR_REPLACECASE (-69)
|
||||||
|
#define PCRE2_ERROR_TOOLARGEREPLACE (-70)
|
||||||
|
|
||||||
|
|
||||||
|
/* Request types for pcre2_pattern_info() */
|
||||||
|
|
||||||
|
#define PCRE2_INFO_ALLOPTIONS 0
|
||||||
|
#define PCRE2_INFO_ARGOPTIONS 1
|
||||||
|
#define PCRE2_INFO_BACKREFMAX 2
|
||||||
|
#define PCRE2_INFO_BSR 3
|
||||||
|
#define PCRE2_INFO_CAPTURECOUNT 4
|
||||||
|
#define PCRE2_INFO_FIRSTCODEUNIT 5
|
||||||
|
#define PCRE2_INFO_FIRSTCODETYPE 6
|
||||||
|
#define PCRE2_INFO_FIRSTBITMAP 7
|
||||||
|
#define PCRE2_INFO_HASCRORLF 8
|
||||||
|
#define PCRE2_INFO_JCHANGED 9
|
||||||
|
#define PCRE2_INFO_JITSIZE 10
|
||||||
|
#define PCRE2_INFO_LASTCODEUNIT 11
|
||||||
|
#define PCRE2_INFO_LASTCODETYPE 12
|
||||||
|
#define PCRE2_INFO_MATCHEMPTY 13
|
||||||
|
#define PCRE2_INFO_MATCHLIMIT 14
|
||||||
|
#define PCRE2_INFO_MAXLOOKBEHIND 15
|
||||||
|
#define PCRE2_INFO_MINLENGTH 16
|
||||||
|
#define PCRE2_INFO_NAMECOUNT 17
|
||||||
|
#define PCRE2_INFO_NAMEENTRYSIZE 18
|
||||||
|
#define PCRE2_INFO_NAMETABLE 19
|
||||||
|
#define PCRE2_INFO_NEWLINE 20
|
||||||
|
#define PCRE2_INFO_DEPTHLIMIT 21
|
||||||
|
#define PCRE2_INFO_RECURSIONLIMIT 21 /* Obsolete synonym */
|
||||||
|
#define PCRE2_INFO_SIZE 22
|
||||||
|
#define PCRE2_INFO_HASBACKSLASHC 23
|
||||||
|
#define PCRE2_INFO_FRAMESIZE 24
|
||||||
|
#define PCRE2_INFO_HEAPLIMIT 25
|
||||||
|
#define PCRE2_INFO_EXTRAOPTIONS 26
|
||||||
|
|
||||||
|
/* Request types for pcre2_config(). */
|
||||||
|
|
||||||
|
#define PCRE2_CONFIG_BSR 0
|
||||||
|
#define PCRE2_CONFIG_JIT 1
|
||||||
|
#define PCRE2_CONFIG_JITTARGET 2
|
||||||
|
#define PCRE2_CONFIG_LINKSIZE 3
|
||||||
|
#define PCRE2_CONFIG_MATCHLIMIT 4
|
||||||
|
#define PCRE2_CONFIG_NEWLINE 5
|
||||||
|
#define PCRE2_CONFIG_PARENSLIMIT 6
|
||||||
|
#define PCRE2_CONFIG_DEPTHLIMIT 7
|
||||||
|
#define PCRE2_CONFIG_RECURSIONLIMIT 7 /* Obsolete synonym */
|
||||||
|
#define PCRE2_CONFIG_STACKRECURSE 8 /* Obsolete */
|
||||||
|
#define PCRE2_CONFIG_UNICODE 9
|
||||||
|
#define PCRE2_CONFIG_UNICODE_VERSION 10
|
||||||
|
#define PCRE2_CONFIG_VERSION 11
|
||||||
|
#define PCRE2_CONFIG_HEAPLIMIT 12
|
||||||
|
#define PCRE2_CONFIG_NEVER_BACKSLASH_C 13
|
||||||
|
#define PCRE2_CONFIG_COMPILED_WIDTHS 14
|
||||||
|
#define PCRE2_CONFIG_TABLES_LENGTH 15
|
||||||
|
|
||||||
|
/* Optimization directives for pcre2_set_optimize().
|
||||||
|
For binary compatibility, only add to this list; do not renumber. */
|
||||||
|
|
||||||
|
#define PCRE2_OPTIMIZATION_NONE 0
|
||||||
|
#define PCRE2_OPTIMIZATION_FULL 1
|
||||||
|
|
||||||
|
#define PCRE2_AUTO_POSSESS 64
|
||||||
|
#define PCRE2_AUTO_POSSESS_OFF 65
|
||||||
|
#define PCRE2_DOTSTAR_ANCHOR 66
|
||||||
|
#define PCRE2_DOTSTAR_ANCHOR_OFF 67
|
||||||
|
#define PCRE2_START_OPTIMIZE 68
|
||||||
|
#define PCRE2_START_OPTIMIZE_OFF 69
|
||||||
|
|
||||||
|
/* Types used in pcre2_set_substitute_case_callout().
|
||||||
|
|
||||||
|
PCRE2_SUBSTITUTE_CASE_LOWER and PCRE2_SUBSTITUTE_CASE_UPPER are passed to the
|
||||||
|
callout to indicate that the case of the entire callout input should be
|
||||||
|
case-transformed. PCRE2_SUBSTITUTE_CASE_TITLE_FIRST is passed to indicate that
|
||||||
|
only the first character or glyph should be transformed to Unicode titlecase,
|
||||||
|
and the rest to lowercase. */
|
||||||
|
|
||||||
|
#define PCRE2_SUBSTITUTE_CASE_LOWER 1
|
||||||
|
#define PCRE2_SUBSTITUTE_CASE_UPPER 2
|
||||||
|
#define PCRE2_SUBSTITUTE_CASE_TITLE_FIRST 3
|
||||||
|
|
||||||
|
/* Types for code units in patterns and subject strings. */
|
||||||
|
|
||||||
|
typedef uint8_t PCRE2_UCHAR8;
|
||||||
|
typedef uint16_t PCRE2_UCHAR16;
|
||||||
|
typedef uint32_t PCRE2_UCHAR32;
|
||||||
|
|
||||||
|
typedef const PCRE2_UCHAR8 *PCRE2_SPTR8;
|
||||||
|
typedef const PCRE2_UCHAR16 *PCRE2_SPTR16;
|
||||||
|
typedef const PCRE2_UCHAR32 *PCRE2_SPTR32;
|
||||||
|
|
||||||
|
/* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE2,
|
||||||
|
including pattern offsets for errors and subject offsets after a match. We
|
||||||
|
define special values to indicate zero-terminated strings and unset offsets in
|
||||||
|
the offset vector (ovector). */
|
||||||
|
|
||||||
|
#define PCRE2_SIZE size_t
|
||||||
|
#define PCRE2_SIZE_MAX SIZE_MAX
|
||||||
|
#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0)
|
||||||
|
#define PCRE2_UNSET (~(PCRE2_SIZE)0)
|
||||||
|
|
||||||
|
/* Generic types for opaque structures and JIT callback functions. These
|
||||||
|
declarations are defined in a macro that is expanded for each width later. */
|
||||||
|
|
||||||
|
#define PCRE2_TYPES_LIST \
|
||||||
|
struct pcre2_real_general_context; \
|
||||||
|
typedef struct pcre2_real_general_context pcre2_general_context; \
|
||||||
|
\
|
||||||
|
struct pcre2_real_compile_context; \
|
||||||
|
typedef struct pcre2_real_compile_context pcre2_compile_context; \
|
||||||
|
\
|
||||||
|
struct pcre2_real_match_context; \
|
||||||
|
typedef struct pcre2_real_match_context pcre2_match_context; \
|
||||||
|
\
|
||||||
|
struct pcre2_real_convert_context; \
|
||||||
|
typedef struct pcre2_real_convert_context pcre2_convert_context; \
|
||||||
|
\
|
||||||
|
struct pcre2_real_code; \
|
||||||
|
typedef struct pcre2_real_code pcre2_code; \
|
||||||
|
\
|
||||||
|
struct pcre2_real_match_data; \
|
||||||
|
typedef struct pcre2_real_match_data pcre2_match_data; \
|
||||||
|
\
|
||||||
|
struct pcre2_real_jit_stack; \
|
||||||
|
typedef struct pcre2_real_jit_stack pcre2_jit_stack; \
|
||||||
|
\
|
||||||
|
typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *);
|
||||||
|
|
||||||
|
|
||||||
|
/* The structures for passing out data via callout functions. We use structures
|
||||||
|
so that new fields can be added on the end in future versions, without changing
|
||||||
|
the API of the function, thereby allowing old clients to work without
|
||||||
|
modification. Define the generic versions in a macro; the width-specific
|
||||||
|
versions are generated from this macro below. */
|
||||||
|
|
||||||
|
/* Flags for the callout_flags field. These are cleared after a callout. */
|
||||||
|
|
||||||
|
#define PCRE2_CALLOUT_STARTMATCH 0x00000001u /* Set for each bumpalong */
|
||||||
|
#define PCRE2_CALLOUT_BACKTRACK 0x00000002u /* Set after a backtrack */
|
||||||
|
|
||||||
|
#define PCRE2_STRUCTURE_LIST \
|
||||||
|
typedef struct pcre2_callout_block { \
|
||||||
|
uint32_t version; /* Identifies version of block */ \
|
||||||
|
/* ------------------------ Version 0 ------------------------------- */ \
|
||||||
|
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||||
|
uint32_t capture_top; /* Max current capture */ \
|
||||||
|
uint32_t capture_last; /* Most recently closed capture */ \
|
||||||
|
PCRE2_SIZE *offset_vector; /* The offset vector */ \
|
||||||
|
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
|
||||||
|
PCRE2_SPTR subject; /* The subject being matched */ \
|
||||||
|
PCRE2_SIZE subject_length; /* The length of the subject */ \
|
||||||
|
PCRE2_SIZE start_match; /* Offset to start of this match attempt */ \
|
||||||
|
PCRE2_SIZE current_position; /* Where we currently are in the subject */ \
|
||||||
|
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||||
|
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||||
|
/* ------------------- Added for Version 1 -------------------------- */ \
|
||||||
|
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
|
||||||
|
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||||
|
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||||
|
/* ------------------- Added for Version 2 -------------------------- */ \
|
||||||
|
uint32_t callout_flags; /* See above for list */ \
|
||||||
|
/* ------------------------------------------------------------------ */ \
|
||||||
|
} pcre2_callout_block; \
|
||||||
|
\
|
||||||
|
typedef struct pcre2_callout_enumerate_block { \
|
||||||
|
uint32_t version; /* Identifies version of block */ \
|
||||||
|
/* ------------------------ Version 0 ------------------------------- */ \
|
||||||
|
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||||
|
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||||
|
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||||
|
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
|
||||||
|
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||||
|
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||||
|
/* ------------------------------------------------------------------ */ \
|
||||||
|
} pcre2_callout_enumerate_block; \
|
||||||
|
\
|
||||||
|
typedef struct pcre2_substitute_callout_block { \
|
||||||
|
uint32_t version; /* Identifies version of block */ \
|
||||||
|
/* ------------------------ Version 0 ------------------------------- */ \
|
||||||
|
PCRE2_SPTR input; /* Pointer to input subject string */ \
|
||||||
|
PCRE2_SPTR output; /* Pointer to output buffer */ \
|
||||||
|
PCRE2_SIZE output_offsets[2]; /* Changed portion of the output */ \
|
||||||
|
PCRE2_SIZE *ovector; /* Pointer to current ovector */ \
|
||||||
|
uint32_t oveccount; /* Count of pairs set in ovector */ \
|
||||||
|
uint32_t subscount; /* Substitution number */ \
|
||||||
|
/* ------------------------------------------------------------------ */ \
|
||||||
|
} pcre2_substitute_callout_block;
|
||||||
|
|
||||||
|
|
||||||
|
/* List the generic forms of all other functions in macros, which will be
|
||||||
|
expanded for each width below. Start with functions that give general
|
||||||
|
information. */
|
||||||
|
|
||||||
|
#define PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_config(uint32_t, void *);
|
||||||
|
|
||||||
|
|
||||||
|
/* Functions for manipulating contexts. */
|
||||||
|
|
||||||
|
#define PCRE2_GENERAL_CONTEXT_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_general_context_copy(pcre2_general_context *); \
|
||||||
|
PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_general_context_create(void *(*)(size_t, void *), \
|
||||||
|
void (*)(void *, void *), void *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_general_context_free(pcre2_general_context *);
|
||||||
|
|
||||||
|
#define PCRE2_COMPILE_CONTEXT_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL pcre2_compile_context *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_compile_context_copy(pcre2_compile_context *); \
|
||||||
|
PCRE2_EXP_DECL pcre2_compile_context *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_compile_context_create(pcre2_general_context *);\
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_compile_context_free(pcre2_compile_context *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_character_tables(pcre2_compile_context *, const uint8_t *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_max_pattern_compiled_length(pcre2_compile_context *, PCRE2_SIZE); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_max_varlookbehind(pcre2_compile_context *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_newline(pcre2_compile_context *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_compile_recursion_guard(pcre2_compile_context *, \
|
||||||
|
int (*)(uint32_t, void *), void *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_optimize(pcre2_compile_context *, uint32_t);
|
||||||
|
|
||||||
|
#define PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_match_context_copy(pcre2_match_context *); \
|
||||||
|
PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_match_context_create(pcre2_general_context *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_match_context_free(pcre2_match_context *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_callout(pcre2_match_context *, \
|
||||||
|
int (*)(pcre2_callout_block *, void *), void *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_substitute_callout(pcre2_match_context *, \
|
||||||
|
int (*)(pcre2_substitute_callout_block *, void *), void *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_substitute_case_callout(pcre2_match_context *, \
|
||||||
|
PCRE2_SIZE (*)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE, int, \
|
||||||
|
void *), \
|
||||||
|
void *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_offset_limit(pcre2_match_context *, PCRE2_SIZE); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_recursion_limit(pcre2_match_context *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_recursion_memory_management(pcre2_match_context *, \
|
||||||
|
void *(*)(size_t, void *), void (*)(void *, void *), void *);
|
||||||
|
|
||||||
|
#define PCRE2_CONVERT_CONTEXT_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_convert_context_copy(pcre2_convert_context *); \
|
||||||
|
PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_convert_context_create(pcre2_general_context *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_convert_context_free(pcre2_convert_context *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_glob_separator(pcre2_convert_context *, uint32_t);
|
||||||
|
|
||||||
|
|
||||||
|
/* Functions concerned with compiling a pattern to PCRE internal code. */
|
||||||
|
|
||||||
|
#define PCRE2_COMPILE_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \
|
||||||
|
pcre2_compile_context *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_code_free(pcre2_code *); \
|
||||||
|
PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_code_copy(const pcre2_code *); \
|
||||||
|
PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_code_copy_with_tables(const pcre2_code *);
|
||||||
|
|
||||||
|
|
||||||
|
/* Functions that give information about a compiled pattern. */
|
||||||
|
|
||||||
|
#define PCRE2_PATTERN_INFO_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_pattern_info(const pcre2_code *, uint32_t, void *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_callout_enumerate(const pcre2_code *, \
|
||||||
|
int (*)(pcre2_callout_enumerate_block *, void *), void *);
|
||||||
|
|
||||||
|
|
||||||
|
/* Functions for running a match and inspecting the result. */
|
||||||
|
|
||||||
|
#define PCRE2_MATCH_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL pcre2_match_data *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_match_data_create(uint32_t, pcre2_general_context *); \
|
||||||
|
PCRE2_EXP_DECL pcre2_match_data *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_match_data_create_from_pattern(const pcre2_code *, \
|
||||||
|
pcre2_general_context *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||||
|
uint32_t, pcre2_match_data *, pcre2_match_context *, int *, PCRE2_SIZE); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||||
|
uint32_t, pcre2_match_data *, pcre2_match_context *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_match_data_free(pcre2_match_data *); \
|
||||||
|
PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_get_mark(pcre2_match_data *); \
|
||||||
|
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_get_match_data_size(pcre2_match_data *); \
|
||||||
|
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_get_match_data_heapframes_size(pcre2_match_data *); \
|
||||||
|
PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_get_ovector_count(pcre2_match_data *); \
|
||||||
|
PCRE2_EXP_DECL PCRE2_SIZE *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_get_ovector_pointer(pcre2_match_data *); \
|
||||||
|
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_get_startchar(pcre2_match_data *);
|
||||||
|
|
||||||
|
|
||||||
|
/* Convenience functions for handling matched substrings. */
|
||||||
|
|
||||||
|
#define PCRE2_SUBSTRING_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substring_copy_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR *, \
|
||||||
|
PCRE2_SIZE *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substring_copy_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR *, \
|
||||||
|
PCRE2_SIZE *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substring_free(PCRE2_UCHAR *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substring_get_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR **, \
|
||||||
|
PCRE2_SIZE *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substring_get_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR **, \
|
||||||
|
PCRE2_SIZE *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substring_length_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_SIZE *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substring_length_bynumber(pcre2_match_data *, uint32_t, PCRE2_SIZE *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substring_nametable_scan(const pcre2_code *, PCRE2_SPTR, PCRE2_SPTR *, \
|
||||||
|
PCRE2_SPTR *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substring_number_from_name(const pcre2_code *, PCRE2_SPTR); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substring_list_free(PCRE2_UCHAR **); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **);
|
||||||
|
|
||||||
|
|
||||||
|
/* Functions for serializing / deserializing compiled patterns. */
|
||||||
|
|
||||||
|
#define PCRE2_SERIALIZE_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_serialize_encode(const pcre2_code **, int32_t, uint8_t **, \
|
||||||
|
PCRE2_SIZE *, pcre2_general_context *); \
|
||||||
|
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_serialize_decode(pcre2_code **, int32_t, const uint8_t *, \
|
||||||
|
pcre2_general_context *); \
|
||||||
|
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_serialize_get_number_of_codes(const uint8_t *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_serialize_free(uint8_t *);
|
||||||
|
|
||||||
|
|
||||||
|
/* Convenience function for match + substitute. */
|
||||||
|
|
||||||
|
#define PCRE2_SUBSTITUTE_FUNCTION \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substitute(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||||
|
uint32_t, pcre2_match_data *, pcre2_match_context *, PCRE2_SPTR, \
|
||||||
|
PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *);
|
||||||
|
|
||||||
|
|
||||||
|
/* Functions for converting pattern source strings. */
|
||||||
|
|
||||||
|
#define PCRE2_CONVERT_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_pattern_convert(PCRE2_SPTR, PCRE2_SIZE, uint32_t, PCRE2_UCHAR **, \
|
||||||
|
PCRE2_SIZE *, pcre2_convert_context *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_converted_pattern_free(PCRE2_UCHAR *);
|
||||||
|
|
||||||
|
|
||||||
|
/* Functions for JIT processing */
|
||||||
|
|
||||||
|
#define PCRE2_JIT_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_jit_compile(pcre2_code *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_jit_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||||
|
uint32_t, pcre2_match_data *, pcre2_match_context *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_jit_free_unused_memory(pcre2_general_context *); \
|
||||||
|
PCRE2_EXP_DECL pcre2_jit_stack *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_jit_stack_create(size_t, size_t, pcre2_general_context *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_jit_stack_free(pcre2_jit_stack *);
|
||||||
|
|
||||||
|
|
||||||
|
/* Other miscellaneous functions. */
|
||||||
|
|
||||||
|
#define PCRE2_OTHER_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
|
||||||
|
PCRE2_EXP_DECL const uint8_t *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_maketables(pcre2_general_context *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_maketables_free(pcre2_general_context *, const uint8_t *);
|
||||||
|
|
||||||
|
/* Define macros that generate width-specific names from generic versions. The
|
||||||
|
three-level macro scheme is necessary to get the macros expanded when we want
|
||||||
|
them to be. First we get the width from PCRE2_LOCAL_WIDTH, which is used for
|
||||||
|
generating three versions of everything below. After that, PCRE2_SUFFIX will be
|
||||||
|
re-defined to use PCRE2_CODE_UNIT_WIDTH, for use when macros such as
|
||||||
|
pcre2_compile are called by application code. */
|
||||||
|
|
||||||
|
#define PCRE2_JOIN(a,b) a ## b
|
||||||
|
#define PCRE2_GLUE(a,b) PCRE2_JOIN(a,b)
|
||||||
|
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a,PCRE2_LOCAL_WIDTH)
|
||||||
|
|
||||||
|
|
||||||
|
/* Data types */
|
||||||
|
|
||||||
|
#define PCRE2_UCHAR PCRE2_SUFFIX(PCRE2_UCHAR)
|
||||||
|
#define PCRE2_SPTR PCRE2_SUFFIX(PCRE2_SPTR)
|
||||||
|
|
||||||
|
#define pcre2_code PCRE2_SUFFIX(pcre2_code_)
|
||||||
|
#define pcre2_jit_callback PCRE2_SUFFIX(pcre2_jit_callback_)
|
||||||
|
#define pcre2_jit_stack PCRE2_SUFFIX(pcre2_jit_stack_)
|
||||||
|
|
||||||
|
#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_)
|
||||||
|
#define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_)
|
||||||
|
#define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_)
|
||||||
|
#define pcre2_real_convert_context PCRE2_SUFFIX(pcre2_real_convert_context_)
|
||||||
|
#define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_)
|
||||||
|
#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_)
|
||||||
|
#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_)
|
||||||
|
|
||||||
|
|
||||||
|
/* Data blocks */
|
||||||
|
|
||||||
|
#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_)
|
||||||
|
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
|
||||||
|
#define pcre2_substitute_callout_block PCRE2_SUFFIX(pcre2_substitute_callout_block_)
|
||||||
|
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
|
||||||
|
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
|
||||||
|
#define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_)
|
||||||
|
#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_)
|
||||||
|
#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_)
|
||||||
|
|
||||||
|
|
||||||
|
/* Functions: the complete list in alphabetical order */
|
||||||
|
|
||||||
|
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
||||||
|
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
|
||||||
|
#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_)
|
||||||
|
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
||||||
|
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
||||||
|
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
||||||
|
#define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_)
|
||||||
|
#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_)
|
||||||
|
#define pcre2_config PCRE2_SUFFIX(pcre2_config_)
|
||||||
|
#define pcre2_convert_context_copy PCRE2_SUFFIX(pcre2_convert_context_copy_)
|
||||||
|
#define pcre2_convert_context_create PCRE2_SUFFIX(pcre2_convert_context_create_)
|
||||||
|
#define pcre2_convert_context_free PCRE2_SUFFIX(pcre2_convert_context_free_)
|
||||||
|
#define pcre2_converted_pattern_free PCRE2_SUFFIX(pcre2_converted_pattern_free_)
|
||||||
|
#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_)
|
||||||
|
#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_)
|
||||||
|
#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_)
|
||||||
|
#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_)
|
||||||
|
#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_)
|
||||||
|
#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_)
|
||||||
|
#define pcre2_get_match_data_heapframes_size PCRE2_SUFFIX(pcre2_get_match_data_heapframes_size_)
|
||||||
|
#define pcre2_get_match_data_size PCRE2_SUFFIX(pcre2_get_match_data_size_)
|
||||||
|
#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
|
||||||
|
#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_)
|
||||||
|
#define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_)
|
||||||
|
#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_)
|
||||||
|
#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_)
|
||||||
|
#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_)
|
||||||
|
#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_)
|
||||||
|
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
|
||||||
|
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
|
||||||
|
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
|
||||||
|
#define pcre2_maketables_free PCRE2_SUFFIX(pcre2_maketables_free_)
|
||||||
|
#define pcre2_match PCRE2_SUFFIX(pcre2_match_)
|
||||||
|
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
|
||||||
|
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)
|
||||||
|
#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_)
|
||||||
|
#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_)
|
||||||
|
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
|
||||||
|
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
|
||||||
|
#define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_)
|
||||||
|
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
|
||||||
|
#define pcre2_serialize_decode PCRE2_SUFFIX(pcre2_serialize_decode_)
|
||||||
|
#define pcre2_serialize_encode PCRE2_SUFFIX(pcre2_serialize_encode_)
|
||||||
|
#define pcre2_serialize_free PCRE2_SUFFIX(pcre2_serialize_free_)
|
||||||
|
#define pcre2_serialize_get_number_of_codes PCRE2_SUFFIX(pcre2_serialize_get_number_of_codes_)
|
||||||
|
#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_)
|
||||||
|
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
|
||||||
|
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||||
|
#define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_)
|
||||||
|
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||||
|
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
|
||||||
|
#define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_)
|
||||||
|
#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
|
||||||
|
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
|
||||||
|
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||||
|
#define pcre2_set_max_varlookbehind PCRE2_SUFFIX(pcre2_set_max_varlookbehind_)
|
||||||
|
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||||
|
#define pcre2_set_max_pattern_compiled_length PCRE2_SUFFIX(pcre2_set_max_pattern_compiled_length_)
|
||||||
|
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||||
|
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||||
|
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
|
||||||
|
#define pcre2_set_optimize PCRE2_SUFFIX(pcre2_set_optimize_)
|
||||||
|
#define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_)
|
||||||
|
#define pcre2_set_substitute_case_callout PCRE2_SUFFIX(pcre2_set_substitute_case_callout_)
|
||||||
|
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
|
||||||
|
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
|
||||||
|
#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)
|
||||||
|
#define pcre2_substring_free PCRE2_SUFFIX(pcre2_substring_free_)
|
||||||
|
#define pcre2_substring_get_byname PCRE2_SUFFIX(pcre2_substring_get_byname_)
|
||||||
|
#define pcre2_substring_get_bynumber PCRE2_SUFFIX(pcre2_substring_get_bynumber_)
|
||||||
|
#define pcre2_substring_length_byname PCRE2_SUFFIX(pcre2_substring_length_byname_)
|
||||||
|
#define pcre2_substring_length_bynumber PCRE2_SUFFIX(pcre2_substring_length_bynumber_)
|
||||||
|
#define pcre2_substring_list_get PCRE2_SUFFIX(pcre2_substring_list_get_)
|
||||||
|
#define pcre2_substring_list_free PCRE2_SUFFIX(pcre2_substring_list_free_)
|
||||||
|
#define pcre2_substring_nametable_scan PCRE2_SUFFIX(pcre2_substring_nametable_scan_)
|
||||||
|
#define pcre2_substring_number_from_name PCRE2_SUFFIX(pcre2_substring_number_from_name_)
|
||||||
|
|
||||||
|
/* Keep this old function name for backwards compatibility */
|
||||||
|
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
|
||||||
|
|
||||||
|
/* Keep this obsolete function for backwards compatibility: it is now a noop. */
|
||||||
|
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
|
||||||
|
|
||||||
|
/* Now generate all three sets of width-specific structures and function
|
||||||
|
prototypes. */
|
||||||
|
|
||||||
|
#define PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS \
|
||||||
|
PCRE2_TYPES_LIST \
|
||||||
|
PCRE2_STRUCTURE_LIST \
|
||||||
|
PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||||
|
PCRE2_GENERAL_CONTEXT_FUNCTIONS \
|
||||||
|
PCRE2_COMPILE_CONTEXT_FUNCTIONS \
|
||||||
|
PCRE2_CONVERT_CONTEXT_FUNCTIONS \
|
||||||
|
PCRE2_CONVERT_FUNCTIONS \
|
||||||
|
PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
||||||
|
PCRE2_COMPILE_FUNCTIONS \
|
||||||
|
PCRE2_PATTERN_INFO_FUNCTIONS \
|
||||||
|
PCRE2_MATCH_FUNCTIONS \
|
||||||
|
PCRE2_SUBSTRING_FUNCTIONS \
|
||||||
|
PCRE2_SERIALIZE_FUNCTIONS \
|
||||||
|
PCRE2_SUBSTITUTE_FUNCTION \
|
||||||
|
PCRE2_JIT_FUNCTIONS \
|
||||||
|
PCRE2_OTHER_FUNCTIONS
|
||||||
|
|
||||||
|
#define PCRE2_LOCAL_WIDTH 8
|
||||||
|
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||||
|
#undef PCRE2_LOCAL_WIDTH
|
||||||
|
|
||||||
|
#define PCRE2_LOCAL_WIDTH 16
|
||||||
|
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||||
|
#undef PCRE2_LOCAL_WIDTH
|
||||||
|
|
||||||
|
#define PCRE2_LOCAL_WIDTH 32
|
||||||
|
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||||
|
#undef PCRE2_LOCAL_WIDTH
|
||||||
|
|
||||||
|
/* Undefine the list macros; they are no longer needed. */
|
||||||
|
|
||||||
|
#undef PCRE2_TYPES_LIST
|
||||||
|
#undef PCRE2_STRUCTURE_LIST
|
||||||
|
#undef PCRE2_GENERAL_INFO_FUNCTIONS
|
||||||
|
#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS
|
||||||
|
#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS
|
||||||
|
#undef PCRE2_CONVERT_CONTEXT_FUNCTIONS
|
||||||
|
#undef PCRE2_MATCH_CONTEXT_FUNCTIONS
|
||||||
|
#undef PCRE2_COMPILE_FUNCTIONS
|
||||||
|
#undef PCRE2_PATTERN_INFO_FUNCTIONS
|
||||||
|
#undef PCRE2_MATCH_FUNCTIONS
|
||||||
|
#undef PCRE2_SUBSTRING_FUNCTIONS
|
||||||
|
#undef PCRE2_SERIALIZE_FUNCTIONS
|
||||||
|
#undef PCRE2_SUBSTITUTE_FUNCTION
|
||||||
|
#undef PCRE2_JIT_FUNCTIONS
|
||||||
|
#undef PCRE2_OTHER_FUNCTIONS
|
||||||
|
#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||||
|
|
||||||
|
/* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine
|
||||||
|
PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make
|
||||||
|
PCRE2_SUFFIX a no-op. Otherwise, generate an error. */
|
||||||
|
|
||||||
|
#undef PCRE2_SUFFIX
|
||||||
|
#ifndef PCRE2_CODE_UNIT_WIDTH
|
||||||
|
#error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h.
|
||||||
|
#error Use 8, 16, or 32; or 0 for a multi-width application.
|
||||||
|
#else /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8 || \
|
||||||
|
PCRE2_CODE_UNIT_WIDTH == 16 || \
|
||||||
|
PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH)
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 0
|
||||||
|
#undef PCRE2_JOIN
|
||||||
|
#undef PCRE2_GLUE
|
||||||
|
#define PCRE2_SUFFIX(a) a
|
||||||
|
#else
|
||||||
|
#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32.
|
||||||
|
#endif
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} /* extern "C" */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* PCRE2_H_IDEMPOTENT_GUARD */
|
||||||
|
|
||||||
|
/* End of pcre2.h */
|
||||||
1069
3rd/pcre2/src/pcre2.h.in
Normal file
1069
3rd/pcre2/src/pcre2.h.in
Normal file
@@ -0,0 +1,1069 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This is the public header file for the PCRE library, second API, to be
|
||||||
|
#included by applications that call PCRE2 functions.
|
||||||
|
|
||||||
|
Copyright (c) 2016-2024 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef PCRE2_H_IDEMPOTENT_GUARD
|
||||||
|
#define PCRE2_H_IDEMPOTENT_GUARD
|
||||||
|
|
||||||
|
/* The current PCRE version information. */
|
||||||
|
|
||||||
|
#define PCRE2_MAJOR @PCRE2_MAJOR@
|
||||||
|
#define PCRE2_MINOR @PCRE2_MINOR@
|
||||||
|
#define PCRE2_PRERELEASE @PCRE2_PRERELEASE@
|
||||||
|
#define PCRE2_DATE @PCRE2_DATE@
|
||||||
|
|
||||||
|
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||||
|
imported have to be identified as such. When building PCRE2, the appropriate
|
||||||
|
export setting is defined in pcre2_internal.h, which includes this file. So we
|
||||||
|
don't change existing definitions of PCRE2_EXP_DECL. */
|
||||||
|
|
||||||
|
#if defined(_WIN32) && !defined(PCRE2_STATIC)
|
||||||
|
# ifndef PCRE2_EXP_DECL
|
||||||
|
# define PCRE2_EXP_DECL extern __declspec(dllimport)
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* By default, we use the standard "extern" declarations. */
|
||||||
|
|
||||||
|
#ifndef PCRE2_EXP_DECL
|
||||||
|
# ifdef __cplusplus
|
||||||
|
# define PCRE2_EXP_DECL extern "C"
|
||||||
|
# else
|
||||||
|
# define PCRE2_EXP_DECL extern
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* When compiling with the MSVC compiler, it is sometimes necessary to include
|
||||||
|
a "calling convention" before exported function names. (This is secondhand
|
||||||
|
information; I know nothing about MSVC myself). For example, something like
|
||||||
|
|
||||||
|
void __cdecl function(....)
|
||||||
|
|
||||||
|
might be needed. In order so make this easy, all the exported functions have
|
||||||
|
PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
|
||||||
|
set, we ensure here that it has no effect. */
|
||||||
|
|
||||||
|
#ifndef PCRE2_CALL_CONVENTION
|
||||||
|
#define PCRE2_CALL_CONVENTION
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and
|
||||||
|
uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do
|
||||||
|
not have stdint.h, which is why we use inttypes.h, which according to the C
|
||||||
|
standard is a superset of stdint.h. If inttypes.h is not available the build
|
||||||
|
will break and the relevant values must be provided by some other means. */
|
||||||
|
|
||||||
|
#include <limits.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <inttypes.h>
|
||||||
|
|
||||||
|
/* Allow for C++ users compiling this directly. */
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* The following option bits can be passed to pcre2_compile(), pcre2_match(),
|
||||||
|
or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it
|
||||||
|
is passed. Put these bits at the most significant end of the options word so
|
||||||
|
others can be added next to them */
|
||||||
|
|
||||||
|
#define PCRE2_ANCHORED 0x80000000u
|
||||||
|
#define PCRE2_NO_UTF_CHECK 0x40000000u
|
||||||
|
#define PCRE2_ENDANCHORED 0x20000000u
|
||||||
|
|
||||||
|
/* The following option bits can be passed only to pcre2_compile(). However,
|
||||||
|
they may affect compilation, JIT compilation, and/or interpretive execution.
|
||||||
|
The following tags indicate which:
|
||||||
|
|
||||||
|
C alters what is compiled by pcre2_compile()
|
||||||
|
J alters what is compiled by pcre2_jit_compile()
|
||||||
|
M is inspected during pcre2_match() execution
|
||||||
|
D is inspected during pcre2_dfa_match() execution
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define PCRE2_ALLOW_EMPTY_CLASS 0x00000001u /* C */
|
||||||
|
#define PCRE2_ALT_BSUX 0x00000002u /* C */
|
||||||
|
#define PCRE2_AUTO_CALLOUT 0x00000004u /* C */
|
||||||
|
#define PCRE2_CASELESS 0x00000008u /* C */
|
||||||
|
#define PCRE2_DOLLAR_ENDONLY 0x00000010u /* J M D */
|
||||||
|
#define PCRE2_DOTALL 0x00000020u /* C */
|
||||||
|
#define PCRE2_DUPNAMES 0x00000040u /* C */
|
||||||
|
#define PCRE2_EXTENDED 0x00000080u /* C */
|
||||||
|
#define PCRE2_FIRSTLINE 0x00000100u /* J M D */
|
||||||
|
#define PCRE2_MATCH_UNSET_BACKREF 0x00000200u /* C J M */
|
||||||
|
#define PCRE2_MULTILINE 0x00000400u /* C */
|
||||||
|
#define PCRE2_NEVER_UCP 0x00000800u /* C */
|
||||||
|
#define PCRE2_NEVER_UTF 0x00001000u /* C */
|
||||||
|
#define PCRE2_NO_AUTO_CAPTURE 0x00002000u /* C */
|
||||||
|
#define PCRE2_NO_AUTO_POSSESS 0x00004000u /* C */
|
||||||
|
#define PCRE2_NO_DOTSTAR_ANCHOR 0x00008000u /* C */
|
||||||
|
#define PCRE2_NO_START_OPTIMIZE 0x00010000u /* J M D */
|
||||||
|
#define PCRE2_UCP 0x00020000u /* C J M D */
|
||||||
|
#define PCRE2_UNGREEDY 0x00040000u /* C */
|
||||||
|
#define PCRE2_UTF 0x00080000u /* C J M D */
|
||||||
|
#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */
|
||||||
|
#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */
|
||||||
|
#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */
|
||||||
|
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
|
||||||
|
#define PCRE2_EXTENDED_MORE 0x01000000u /* C */
|
||||||
|
#define PCRE2_LITERAL 0x02000000u /* C */
|
||||||
|
#define PCRE2_MATCH_INVALID_UTF 0x04000000u /* J M D */
|
||||||
|
#define PCRE2_ALT_EXTENDED_CLASS 0x08000000u /* C */
|
||||||
|
|
||||||
|
/* An additional compile options word is available in the compile context. */
|
||||||
|
|
||||||
|
#define PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES 0x00000001u /* C */
|
||||||
|
#define PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL 0x00000002u /* C */
|
||||||
|
#define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */
|
||||||
|
#define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */
|
||||||
|
#define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */
|
||||||
|
#define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */
|
||||||
|
#define PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK 0x00000040u /* C */
|
||||||
|
#define PCRE2_EXTRA_CASELESS_RESTRICT 0x00000080u /* C */
|
||||||
|
#define PCRE2_EXTRA_ASCII_BSD 0x00000100u /* C */
|
||||||
|
#define PCRE2_EXTRA_ASCII_BSS 0x00000200u /* C */
|
||||||
|
#define PCRE2_EXTRA_ASCII_BSW 0x00000400u /* C */
|
||||||
|
#define PCRE2_EXTRA_ASCII_POSIX 0x00000800u /* C */
|
||||||
|
#define PCRE2_EXTRA_ASCII_DIGIT 0x00001000u /* C */
|
||||||
|
#define PCRE2_EXTRA_PYTHON_OCTAL 0x00002000u /* C */
|
||||||
|
#define PCRE2_EXTRA_NO_BS0 0x00004000u /* C */
|
||||||
|
#define PCRE2_EXTRA_NEVER_CALLOUT 0x00008000u /* C */
|
||||||
|
#define PCRE2_EXTRA_TURKISH_CASING 0x00010000u /* C */
|
||||||
|
|
||||||
|
/* These are for pcre2_jit_compile(). */
|
||||||
|
|
||||||
|
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
|
||||||
|
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
|
||||||
|
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
|
||||||
|
#define PCRE2_JIT_INVALID_UTF 0x00000100u
|
||||||
|
#define PCRE2_JIT_TEST_ALLOC 0x00000200u
|
||||||
|
|
||||||
|
/* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and
|
||||||
|
pcre2_substitute(). Some are allowed only for one of the functions, and in
|
||||||
|
these cases it is noted below. Note that PCRE2_ANCHORED, PCRE2_ENDANCHORED and
|
||||||
|
PCRE2_NO_UTF_CHECK can also be passed to these functions (though
|
||||||
|
pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */
|
||||||
|
|
||||||
|
#define PCRE2_NOTBOL 0x00000001u
|
||||||
|
#define PCRE2_NOTEOL 0x00000002u
|
||||||
|
#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
|
||||||
|
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
|
||||||
|
#define PCRE2_PARTIAL_SOFT 0x00000010u
|
||||||
|
#define PCRE2_PARTIAL_HARD 0x00000020u
|
||||||
|
#define PCRE2_DFA_RESTART 0x00000040u /* pcre2_dfa_match() only */
|
||||||
|
#define PCRE2_DFA_SHORTEST 0x00000080u /* pcre2_dfa_match() only */
|
||||||
|
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u /* pcre2_substitute() only */
|
||||||
|
#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u /* pcre2_substitute() only */
|
||||||
|
#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u /* pcre2_substitute() only */
|
||||||
|
#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u /* pcre2_substitute() only */
|
||||||
|
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */
|
||||||
|
#define PCRE2_NO_JIT 0x00002000u /* not for pcre2_dfa_match() */
|
||||||
|
#define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u
|
||||||
|
#define PCRE2_SUBSTITUTE_LITERAL 0x00008000u /* pcre2_substitute() only */
|
||||||
|
#define PCRE2_SUBSTITUTE_MATCHED 0x00010000u /* pcre2_substitute() only */
|
||||||
|
#define PCRE2_SUBSTITUTE_REPLACEMENT_ONLY 0x00020000u /* pcre2_substitute() only */
|
||||||
|
#define PCRE2_DISABLE_RECURSELOOP_CHECK 0x00040000u /* not for pcre2_dfa_match() or pcre2_jit_match() */
|
||||||
|
|
||||||
|
/* Options for pcre2_pattern_convert(). */
|
||||||
|
|
||||||
|
#define PCRE2_CONVERT_UTF 0x00000001u
|
||||||
|
#define PCRE2_CONVERT_NO_UTF_CHECK 0x00000002u
|
||||||
|
#define PCRE2_CONVERT_POSIX_BASIC 0x00000004u
|
||||||
|
#define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u
|
||||||
|
#define PCRE2_CONVERT_GLOB 0x00000010u
|
||||||
|
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000030u
|
||||||
|
#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000050u
|
||||||
|
|
||||||
|
/* Newline and \R settings, for use in compile contexts. The newline values
|
||||||
|
must be kept in step with values set in config.h and both sets must all be
|
||||||
|
greater than zero. */
|
||||||
|
|
||||||
|
#define PCRE2_NEWLINE_CR 1
|
||||||
|
#define PCRE2_NEWLINE_LF 2
|
||||||
|
#define PCRE2_NEWLINE_CRLF 3
|
||||||
|
#define PCRE2_NEWLINE_ANY 4
|
||||||
|
#define PCRE2_NEWLINE_ANYCRLF 5
|
||||||
|
#define PCRE2_NEWLINE_NUL 6
|
||||||
|
|
||||||
|
#define PCRE2_BSR_UNICODE 1
|
||||||
|
#define PCRE2_BSR_ANYCRLF 2
|
||||||
|
|
||||||
|
/* Error codes for pcre2_compile(). Some of these are also used by
|
||||||
|
pcre2_pattern_convert(). */
|
||||||
|
|
||||||
|
#define PCRE2_ERROR_END_BACKSLASH 101
|
||||||
|
#define PCRE2_ERROR_END_BACKSLASH_C 102
|
||||||
|
#define PCRE2_ERROR_UNKNOWN_ESCAPE 103
|
||||||
|
#define PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER 104
|
||||||
|
#define PCRE2_ERROR_QUANTIFIER_TOO_BIG 105
|
||||||
|
#define PCRE2_ERROR_MISSING_SQUARE_BRACKET 106
|
||||||
|
#define PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS 107
|
||||||
|
#define PCRE2_ERROR_CLASS_RANGE_ORDER 108
|
||||||
|
#define PCRE2_ERROR_QUANTIFIER_INVALID 109
|
||||||
|
#define PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT 110
|
||||||
|
#define PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY 111
|
||||||
|
#define PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS 112
|
||||||
|
#define PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING 113
|
||||||
|
#define PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS 114
|
||||||
|
#define PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE 115
|
||||||
|
#define PCRE2_ERROR_NULL_PATTERN 116
|
||||||
|
#define PCRE2_ERROR_BAD_OPTIONS 117
|
||||||
|
#define PCRE2_ERROR_MISSING_COMMENT_CLOSING 118
|
||||||
|
#define PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP 119
|
||||||
|
#define PCRE2_ERROR_PATTERN_TOO_LARGE 120
|
||||||
|
#define PCRE2_ERROR_HEAP_FAILED 121
|
||||||
|
#define PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS 122
|
||||||
|
#define PCRE2_ERROR_INTERNAL_CODE_OVERFLOW 123
|
||||||
|
#define PCRE2_ERROR_MISSING_CONDITION_CLOSING 124
|
||||||
|
#define PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH 125
|
||||||
|
#define PCRE2_ERROR_ZERO_RELATIVE_REFERENCE 126
|
||||||
|
#define PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES 127
|
||||||
|
#define PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED 128
|
||||||
|
#define PCRE2_ERROR_BAD_RELATIVE_REFERENCE 129
|
||||||
|
#define PCRE2_ERROR_UNKNOWN_POSIX_CLASS 130
|
||||||
|
#define PCRE2_ERROR_INTERNAL_STUDY_ERROR 131
|
||||||
|
#define PCRE2_ERROR_UNICODE_NOT_SUPPORTED 132
|
||||||
|
#define PCRE2_ERROR_PARENTHESES_STACK_CHECK 133
|
||||||
|
#define PCRE2_ERROR_CODE_POINT_TOO_BIG 134
|
||||||
|
#define PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED 135
|
||||||
|
#define PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C 136
|
||||||
|
#define PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE 137
|
||||||
|
#define PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG 138
|
||||||
|
#define PCRE2_ERROR_MISSING_CALLOUT_CLOSING 139
|
||||||
|
#define PCRE2_ERROR_ESCAPE_INVALID_IN_VERB 140
|
||||||
|
#define PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P 141
|
||||||
|
#define PCRE2_ERROR_MISSING_NAME_TERMINATOR 142
|
||||||
|
#define PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME 143
|
||||||
|
#define PCRE2_ERROR_INVALID_SUBPATTERN_NAME 144
|
||||||
|
#define PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE 145
|
||||||
|
#define PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY 146
|
||||||
|
#define PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY 147
|
||||||
|
#define PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG 148
|
||||||
|
#define PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS 149
|
||||||
|
#define PCRE2_ERROR_CLASS_INVALID_RANGE 150
|
||||||
|
#define PCRE2_ERROR_OCTAL_BYTE_TOO_BIG 151
|
||||||
|
#define PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE 152
|
||||||
|
#define PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN 153
|
||||||
|
#define PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES 154
|
||||||
|
#define PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE 155
|
||||||
|
#define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE 156
|
||||||
|
#define PCRE2_ERROR_BACKSLASH_G_SYNTAX 157
|
||||||
|
#define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158
|
||||||
|
/* Error 159 is obsolete and should now never occur */
|
||||||
|
#define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED 159
|
||||||
|
#define PCRE2_ERROR_VERB_UNKNOWN 160
|
||||||
|
#define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG 161
|
||||||
|
#define PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED 162
|
||||||
|
#define PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW 163
|
||||||
|
#define PCRE2_ERROR_INVALID_OCTAL 164
|
||||||
|
#define PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH 165
|
||||||
|
#define PCRE2_ERROR_MARK_MISSING_ARGUMENT 166
|
||||||
|
#define PCRE2_ERROR_INVALID_HEXADECIMAL 167
|
||||||
|
#define PCRE2_ERROR_BACKSLASH_C_SYNTAX 168
|
||||||
|
#define PCRE2_ERROR_BACKSLASH_K_SYNTAX 169
|
||||||
|
#define PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS 170
|
||||||
|
#define PCRE2_ERROR_BACKSLASH_N_IN_CLASS 171
|
||||||
|
#define PCRE2_ERROR_CALLOUT_STRING_TOO_LONG 172
|
||||||
|
#define PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT 173
|
||||||
|
#define PCRE2_ERROR_UTF_IS_DISABLED 174
|
||||||
|
#define PCRE2_ERROR_UCP_IS_DISABLED 175
|
||||||
|
#define PCRE2_ERROR_VERB_NAME_TOO_LONG 176
|
||||||
|
#define PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG 177
|
||||||
|
#define PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS 178
|
||||||
|
#define PCRE2_ERROR_VERSION_CONDITION_SYNTAX 179
|
||||||
|
#define PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS 180
|
||||||
|
#define PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER 181
|
||||||
|
#define PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER 182
|
||||||
|
#define PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED 183
|
||||||
|
#define PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP 184
|
||||||
|
#define PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED 185
|
||||||
|
#define PCRE2_ERROR_PATTERN_TOO_COMPLICATED 186
|
||||||
|
#define PCRE2_ERROR_LOOKBEHIND_TOO_LONG 187
|
||||||
|
#define PCRE2_ERROR_PATTERN_STRING_TOO_LONG 188
|
||||||
|
#define PCRE2_ERROR_INTERNAL_BAD_CODE 189
|
||||||
|
#define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190
|
||||||
|
#define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191
|
||||||
|
#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192
|
||||||
|
#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193
|
||||||
|
#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194
|
||||||
|
#define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195
|
||||||
|
#define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196
|
||||||
|
#define PCRE2_ERROR_TOO_MANY_CAPTURES 197
|
||||||
|
#define PCRE2_ERROR_MISSING_OCTAL_DIGIT 198
|
||||||
|
#define PCRE2_ERROR_BACKSLASH_K_IN_LOOKAROUND 199
|
||||||
|
#define PCRE2_ERROR_MAX_VAR_LOOKBEHIND_EXCEEDED 200
|
||||||
|
#define PCRE2_ERROR_PATTERN_COMPILED_SIZE_TOO_BIG 201
|
||||||
|
#define PCRE2_ERROR_OVERSIZE_PYTHON_OCTAL 202
|
||||||
|
#define PCRE2_ERROR_CALLOUT_CALLER_DISABLED 203
|
||||||
|
#define PCRE2_ERROR_EXTRA_CASING_REQUIRES_UNICODE 204
|
||||||
|
#define PCRE2_ERROR_TURKISH_CASING_REQUIRES_UTF 205
|
||||||
|
#define PCRE2_ERROR_EXTRA_CASING_INCOMPATIBLE 206
|
||||||
|
#define PCRE2_ERROR_ECLASS_NEST_TOO_DEEP 207
|
||||||
|
#define PCRE2_ERROR_ECLASS_INVALID_OPERATOR 208
|
||||||
|
#define PCRE2_ERROR_ECLASS_UNEXPECTED_OPERATOR 209
|
||||||
|
#define PCRE2_ERROR_ECLASS_EXPECTED_OPERAND 210
|
||||||
|
#define PCRE2_ERROR_ECLASS_MIXED_OPERATORS 211
|
||||||
|
#define PCRE2_ERROR_ECLASS_HINT_SQUARE_BRACKET 212
|
||||||
|
#define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_EXPR 213
|
||||||
|
#define PCRE2_ERROR_PERL_ECLASS_EMPTY_EXPR 214
|
||||||
|
#define PCRE2_ERROR_PERL_ECLASS_MISSING_CLOSE 215
|
||||||
|
#define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_CHAR 216
|
||||||
|
|
||||||
|
/* "Expected" matching error codes: no match and partial match. */
|
||||||
|
|
||||||
|
#define PCRE2_ERROR_NOMATCH (-1)
|
||||||
|
#define PCRE2_ERROR_PARTIAL (-2)
|
||||||
|
|
||||||
|
/* Error codes for UTF-8 validity checks */
|
||||||
|
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR1 (-3)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR2 (-4)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR3 (-5)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR4 (-6)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR5 (-7)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR6 (-8)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR7 (-9)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR8 (-10)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR9 (-11)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR10 (-12)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR11 (-13)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR12 (-14)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR13 (-15)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR14 (-16)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR15 (-17)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR16 (-18)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR17 (-19)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR18 (-20)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR19 (-21)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR20 (-22)
|
||||||
|
#define PCRE2_ERROR_UTF8_ERR21 (-23)
|
||||||
|
|
||||||
|
/* Error codes for UTF-16 validity checks */
|
||||||
|
|
||||||
|
#define PCRE2_ERROR_UTF16_ERR1 (-24)
|
||||||
|
#define PCRE2_ERROR_UTF16_ERR2 (-25)
|
||||||
|
#define PCRE2_ERROR_UTF16_ERR3 (-26)
|
||||||
|
|
||||||
|
/* Error codes for UTF-32 validity checks */
|
||||||
|
|
||||||
|
#define PCRE2_ERROR_UTF32_ERR1 (-27)
|
||||||
|
#define PCRE2_ERROR_UTF32_ERR2 (-28)
|
||||||
|
|
||||||
|
/* Miscellaneous error codes for pcre2[_dfa]_match(), substring extraction
|
||||||
|
functions, context functions, and serializing functions. They are in numerical
|
||||||
|
order. Originally they were in alphabetical order too, but now that PCRE2 is
|
||||||
|
released, the numbers must not be changed. */
|
||||||
|
|
||||||
|
#define PCRE2_ERROR_BADDATA (-29)
|
||||||
|
#define PCRE2_ERROR_MIXEDTABLES (-30) /* Name was changed */
|
||||||
|
#define PCRE2_ERROR_BADMAGIC (-31)
|
||||||
|
#define PCRE2_ERROR_BADMODE (-32)
|
||||||
|
#define PCRE2_ERROR_BADOFFSET (-33)
|
||||||
|
#define PCRE2_ERROR_BADOPTION (-34)
|
||||||
|
#define PCRE2_ERROR_BADREPLACEMENT (-35)
|
||||||
|
#define PCRE2_ERROR_BADUTFOFFSET (-36)
|
||||||
|
#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
|
||||||
|
#define PCRE2_ERROR_DFA_BADRESTART (-38)
|
||||||
|
#define PCRE2_ERROR_DFA_RECURSE (-39)
|
||||||
|
#define PCRE2_ERROR_DFA_UCOND (-40)
|
||||||
|
#define PCRE2_ERROR_DFA_UFUNC (-41)
|
||||||
|
#define PCRE2_ERROR_DFA_UITEM (-42)
|
||||||
|
#define PCRE2_ERROR_DFA_WSSIZE (-43)
|
||||||
|
#define PCRE2_ERROR_INTERNAL (-44)
|
||||||
|
#define PCRE2_ERROR_JIT_BADOPTION (-45)
|
||||||
|
#define PCRE2_ERROR_JIT_STACKLIMIT (-46)
|
||||||
|
#define PCRE2_ERROR_MATCHLIMIT (-47)
|
||||||
|
#define PCRE2_ERROR_NOMEMORY (-48)
|
||||||
|
#define PCRE2_ERROR_NOSUBSTRING (-49)
|
||||||
|
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-50)
|
||||||
|
#define PCRE2_ERROR_NULL (-51)
|
||||||
|
#define PCRE2_ERROR_RECURSELOOP (-52)
|
||||||
|
#define PCRE2_ERROR_DEPTHLIMIT (-53)
|
||||||
|
#define PCRE2_ERROR_RECURSIONLIMIT (-53) /* Obsolete synonym */
|
||||||
|
#define PCRE2_ERROR_UNAVAILABLE (-54)
|
||||||
|
#define PCRE2_ERROR_UNSET (-55)
|
||||||
|
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
|
||||||
|
#define PCRE2_ERROR_BADREPESCAPE (-57)
|
||||||
|
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
|
||||||
|
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
|
||||||
|
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||||
|
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
|
||||||
|
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
|
||||||
|
#define PCRE2_ERROR_HEAPLIMIT (-63)
|
||||||
|
#define PCRE2_ERROR_CONVERT_SYNTAX (-64)
|
||||||
|
#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65)
|
||||||
|
#define PCRE2_ERROR_DFA_UINVALID_UTF (-66)
|
||||||
|
#define PCRE2_ERROR_INVALIDOFFSET (-67)
|
||||||
|
#define PCRE2_ERROR_JIT_UNSUPPORTED (-68)
|
||||||
|
#define PCRE2_ERROR_REPLACECASE (-69)
|
||||||
|
#define PCRE2_ERROR_TOOLARGEREPLACE (-70)
|
||||||
|
|
||||||
|
|
||||||
|
/* Request types for pcre2_pattern_info() */
|
||||||
|
|
||||||
|
#define PCRE2_INFO_ALLOPTIONS 0
|
||||||
|
#define PCRE2_INFO_ARGOPTIONS 1
|
||||||
|
#define PCRE2_INFO_BACKREFMAX 2
|
||||||
|
#define PCRE2_INFO_BSR 3
|
||||||
|
#define PCRE2_INFO_CAPTURECOUNT 4
|
||||||
|
#define PCRE2_INFO_FIRSTCODEUNIT 5
|
||||||
|
#define PCRE2_INFO_FIRSTCODETYPE 6
|
||||||
|
#define PCRE2_INFO_FIRSTBITMAP 7
|
||||||
|
#define PCRE2_INFO_HASCRORLF 8
|
||||||
|
#define PCRE2_INFO_JCHANGED 9
|
||||||
|
#define PCRE2_INFO_JITSIZE 10
|
||||||
|
#define PCRE2_INFO_LASTCODEUNIT 11
|
||||||
|
#define PCRE2_INFO_LASTCODETYPE 12
|
||||||
|
#define PCRE2_INFO_MATCHEMPTY 13
|
||||||
|
#define PCRE2_INFO_MATCHLIMIT 14
|
||||||
|
#define PCRE2_INFO_MAXLOOKBEHIND 15
|
||||||
|
#define PCRE2_INFO_MINLENGTH 16
|
||||||
|
#define PCRE2_INFO_NAMECOUNT 17
|
||||||
|
#define PCRE2_INFO_NAMEENTRYSIZE 18
|
||||||
|
#define PCRE2_INFO_NAMETABLE 19
|
||||||
|
#define PCRE2_INFO_NEWLINE 20
|
||||||
|
#define PCRE2_INFO_DEPTHLIMIT 21
|
||||||
|
#define PCRE2_INFO_RECURSIONLIMIT 21 /* Obsolete synonym */
|
||||||
|
#define PCRE2_INFO_SIZE 22
|
||||||
|
#define PCRE2_INFO_HASBACKSLASHC 23
|
||||||
|
#define PCRE2_INFO_FRAMESIZE 24
|
||||||
|
#define PCRE2_INFO_HEAPLIMIT 25
|
||||||
|
#define PCRE2_INFO_EXTRAOPTIONS 26
|
||||||
|
|
||||||
|
/* Request types for pcre2_config(). */
|
||||||
|
|
||||||
|
#define PCRE2_CONFIG_BSR 0
|
||||||
|
#define PCRE2_CONFIG_JIT 1
|
||||||
|
#define PCRE2_CONFIG_JITTARGET 2
|
||||||
|
#define PCRE2_CONFIG_LINKSIZE 3
|
||||||
|
#define PCRE2_CONFIG_MATCHLIMIT 4
|
||||||
|
#define PCRE2_CONFIG_NEWLINE 5
|
||||||
|
#define PCRE2_CONFIG_PARENSLIMIT 6
|
||||||
|
#define PCRE2_CONFIG_DEPTHLIMIT 7
|
||||||
|
#define PCRE2_CONFIG_RECURSIONLIMIT 7 /* Obsolete synonym */
|
||||||
|
#define PCRE2_CONFIG_STACKRECURSE 8 /* Obsolete */
|
||||||
|
#define PCRE2_CONFIG_UNICODE 9
|
||||||
|
#define PCRE2_CONFIG_UNICODE_VERSION 10
|
||||||
|
#define PCRE2_CONFIG_VERSION 11
|
||||||
|
#define PCRE2_CONFIG_HEAPLIMIT 12
|
||||||
|
#define PCRE2_CONFIG_NEVER_BACKSLASH_C 13
|
||||||
|
#define PCRE2_CONFIG_COMPILED_WIDTHS 14
|
||||||
|
#define PCRE2_CONFIG_TABLES_LENGTH 15
|
||||||
|
|
||||||
|
/* Optimization directives for pcre2_set_optimize().
|
||||||
|
For binary compatibility, only add to this list; do not renumber. */
|
||||||
|
|
||||||
|
#define PCRE2_OPTIMIZATION_NONE 0
|
||||||
|
#define PCRE2_OPTIMIZATION_FULL 1
|
||||||
|
|
||||||
|
#define PCRE2_AUTO_POSSESS 64
|
||||||
|
#define PCRE2_AUTO_POSSESS_OFF 65
|
||||||
|
#define PCRE2_DOTSTAR_ANCHOR 66
|
||||||
|
#define PCRE2_DOTSTAR_ANCHOR_OFF 67
|
||||||
|
#define PCRE2_START_OPTIMIZE 68
|
||||||
|
#define PCRE2_START_OPTIMIZE_OFF 69
|
||||||
|
|
||||||
|
/* Types used in pcre2_set_substitute_case_callout().
|
||||||
|
|
||||||
|
PCRE2_SUBSTITUTE_CASE_LOWER and PCRE2_SUBSTITUTE_CASE_UPPER are passed to the
|
||||||
|
callout to indicate that the case of the entire callout input should be
|
||||||
|
case-transformed. PCRE2_SUBSTITUTE_CASE_TITLE_FIRST is passed to indicate that
|
||||||
|
only the first character or glyph should be transformed to Unicode titlecase,
|
||||||
|
and the rest to lowercase. */
|
||||||
|
|
||||||
|
#define PCRE2_SUBSTITUTE_CASE_LOWER 1
|
||||||
|
#define PCRE2_SUBSTITUTE_CASE_UPPER 2
|
||||||
|
#define PCRE2_SUBSTITUTE_CASE_TITLE_FIRST 3
|
||||||
|
|
||||||
|
/* Types for code units in patterns and subject strings. */
|
||||||
|
|
||||||
|
typedef uint8_t PCRE2_UCHAR8;
|
||||||
|
typedef uint16_t PCRE2_UCHAR16;
|
||||||
|
typedef uint32_t PCRE2_UCHAR32;
|
||||||
|
|
||||||
|
typedef const PCRE2_UCHAR8 *PCRE2_SPTR8;
|
||||||
|
typedef const PCRE2_UCHAR16 *PCRE2_SPTR16;
|
||||||
|
typedef const PCRE2_UCHAR32 *PCRE2_SPTR32;
|
||||||
|
|
||||||
|
/* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE2,
|
||||||
|
including pattern offsets for errors and subject offsets after a match. We
|
||||||
|
define special values to indicate zero-terminated strings and unset offsets in
|
||||||
|
the offset vector (ovector). */
|
||||||
|
|
||||||
|
#define PCRE2_SIZE size_t
|
||||||
|
#define PCRE2_SIZE_MAX SIZE_MAX
|
||||||
|
#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0)
|
||||||
|
#define PCRE2_UNSET (~(PCRE2_SIZE)0)
|
||||||
|
|
||||||
|
/* Generic types for opaque structures and JIT callback functions. These
|
||||||
|
declarations are defined in a macro that is expanded for each width later. */
|
||||||
|
|
||||||
|
#define PCRE2_TYPES_LIST \
|
||||||
|
struct pcre2_real_general_context; \
|
||||||
|
typedef struct pcre2_real_general_context pcre2_general_context; \
|
||||||
|
\
|
||||||
|
struct pcre2_real_compile_context; \
|
||||||
|
typedef struct pcre2_real_compile_context pcre2_compile_context; \
|
||||||
|
\
|
||||||
|
struct pcre2_real_match_context; \
|
||||||
|
typedef struct pcre2_real_match_context pcre2_match_context; \
|
||||||
|
\
|
||||||
|
struct pcre2_real_convert_context; \
|
||||||
|
typedef struct pcre2_real_convert_context pcre2_convert_context; \
|
||||||
|
\
|
||||||
|
struct pcre2_real_code; \
|
||||||
|
typedef struct pcre2_real_code pcre2_code; \
|
||||||
|
\
|
||||||
|
struct pcre2_real_match_data; \
|
||||||
|
typedef struct pcre2_real_match_data pcre2_match_data; \
|
||||||
|
\
|
||||||
|
struct pcre2_real_jit_stack; \
|
||||||
|
typedef struct pcre2_real_jit_stack pcre2_jit_stack; \
|
||||||
|
\
|
||||||
|
typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *);
|
||||||
|
|
||||||
|
|
||||||
|
/* The structures for passing out data via callout functions. We use structures
|
||||||
|
so that new fields can be added on the end in future versions, without changing
|
||||||
|
the API of the function, thereby allowing old clients to work without
|
||||||
|
modification. Define the generic versions in a macro; the width-specific
|
||||||
|
versions are generated from this macro below. */
|
||||||
|
|
||||||
|
/* Flags for the callout_flags field. These are cleared after a callout. */
|
||||||
|
|
||||||
|
#define PCRE2_CALLOUT_STARTMATCH 0x00000001u /* Set for each bumpalong */
|
||||||
|
#define PCRE2_CALLOUT_BACKTRACK 0x00000002u /* Set after a backtrack */
|
||||||
|
|
||||||
|
#define PCRE2_STRUCTURE_LIST \
|
||||||
|
typedef struct pcre2_callout_block { \
|
||||||
|
uint32_t version; /* Identifies version of block */ \
|
||||||
|
/* ------------------------ Version 0 ------------------------------- */ \
|
||||||
|
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||||
|
uint32_t capture_top; /* Max current capture */ \
|
||||||
|
uint32_t capture_last; /* Most recently closed capture */ \
|
||||||
|
PCRE2_SIZE *offset_vector; /* The offset vector */ \
|
||||||
|
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
|
||||||
|
PCRE2_SPTR subject; /* The subject being matched */ \
|
||||||
|
PCRE2_SIZE subject_length; /* The length of the subject */ \
|
||||||
|
PCRE2_SIZE start_match; /* Offset to start of this match attempt */ \
|
||||||
|
PCRE2_SIZE current_position; /* Where we currently are in the subject */ \
|
||||||
|
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||||
|
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||||
|
/* ------------------- Added for Version 1 -------------------------- */ \
|
||||||
|
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
|
||||||
|
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||||
|
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||||
|
/* ------------------- Added for Version 2 -------------------------- */ \
|
||||||
|
uint32_t callout_flags; /* See above for list */ \
|
||||||
|
/* ------------------------------------------------------------------ */ \
|
||||||
|
} pcre2_callout_block; \
|
||||||
|
\
|
||||||
|
typedef struct pcre2_callout_enumerate_block { \
|
||||||
|
uint32_t version; /* Identifies version of block */ \
|
||||||
|
/* ------------------------ Version 0 ------------------------------- */ \
|
||||||
|
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||||
|
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||||
|
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||||
|
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
|
||||||
|
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||||
|
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||||
|
/* ------------------------------------------------------------------ */ \
|
||||||
|
} pcre2_callout_enumerate_block; \
|
||||||
|
\
|
||||||
|
typedef struct pcre2_substitute_callout_block { \
|
||||||
|
uint32_t version; /* Identifies version of block */ \
|
||||||
|
/* ------------------------ Version 0 ------------------------------- */ \
|
||||||
|
PCRE2_SPTR input; /* Pointer to input subject string */ \
|
||||||
|
PCRE2_SPTR output; /* Pointer to output buffer */ \
|
||||||
|
PCRE2_SIZE output_offsets[2]; /* Changed portion of the output */ \
|
||||||
|
PCRE2_SIZE *ovector; /* Pointer to current ovector */ \
|
||||||
|
uint32_t oveccount; /* Count of pairs set in ovector */ \
|
||||||
|
uint32_t subscount; /* Substitution number */ \
|
||||||
|
/* ------------------------------------------------------------------ */ \
|
||||||
|
} pcre2_substitute_callout_block;
|
||||||
|
|
||||||
|
|
||||||
|
/* List the generic forms of all other functions in macros, which will be
|
||||||
|
expanded for each width below. Start with functions that give general
|
||||||
|
information. */
|
||||||
|
|
||||||
|
#define PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_config(uint32_t, void *);
|
||||||
|
|
||||||
|
|
||||||
|
/* Functions for manipulating contexts. */
|
||||||
|
|
||||||
|
#define PCRE2_GENERAL_CONTEXT_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_general_context_copy(pcre2_general_context *); \
|
||||||
|
PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_general_context_create(void *(*)(size_t, void *), \
|
||||||
|
void (*)(void *, void *), void *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_general_context_free(pcre2_general_context *);
|
||||||
|
|
||||||
|
#define PCRE2_COMPILE_CONTEXT_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL pcre2_compile_context *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_compile_context_copy(pcre2_compile_context *); \
|
||||||
|
PCRE2_EXP_DECL pcre2_compile_context *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_compile_context_create(pcre2_general_context *);\
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_compile_context_free(pcre2_compile_context *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_character_tables(pcre2_compile_context *, const uint8_t *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_max_pattern_compiled_length(pcre2_compile_context *, PCRE2_SIZE); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_max_varlookbehind(pcre2_compile_context *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_newline(pcre2_compile_context *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_compile_recursion_guard(pcre2_compile_context *, \
|
||||||
|
int (*)(uint32_t, void *), void *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_optimize(pcre2_compile_context *, uint32_t);
|
||||||
|
|
||||||
|
#define PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_match_context_copy(pcre2_match_context *); \
|
||||||
|
PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_match_context_create(pcre2_general_context *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_match_context_free(pcre2_match_context *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_callout(pcre2_match_context *, \
|
||||||
|
int (*)(pcre2_callout_block *, void *), void *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_substitute_callout(pcre2_match_context *, \
|
||||||
|
int (*)(pcre2_substitute_callout_block *, void *), void *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_substitute_case_callout(pcre2_match_context *, \
|
||||||
|
PCRE2_SIZE (*)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE, int, \
|
||||||
|
void *), \
|
||||||
|
void *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_offset_limit(pcre2_match_context *, PCRE2_SIZE); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_recursion_limit(pcre2_match_context *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_recursion_memory_management(pcre2_match_context *, \
|
||||||
|
void *(*)(size_t, void *), void (*)(void *, void *), void *);
|
||||||
|
|
||||||
|
#define PCRE2_CONVERT_CONTEXT_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_convert_context_copy(pcre2_convert_context *); \
|
||||||
|
PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_convert_context_create(pcre2_general_context *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_convert_context_free(pcre2_convert_context *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_glob_separator(pcre2_convert_context *, uint32_t);
|
||||||
|
|
||||||
|
|
||||||
|
/* Functions concerned with compiling a pattern to PCRE internal code. */
|
||||||
|
|
||||||
|
#define PCRE2_COMPILE_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \
|
||||||
|
pcre2_compile_context *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_code_free(pcre2_code *); \
|
||||||
|
PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_code_copy(const pcre2_code *); \
|
||||||
|
PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_code_copy_with_tables(const pcre2_code *);
|
||||||
|
|
||||||
|
|
||||||
|
/* Functions that give information about a compiled pattern. */
|
||||||
|
|
||||||
|
#define PCRE2_PATTERN_INFO_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_pattern_info(const pcre2_code *, uint32_t, void *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_callout_enumerate(const pcre2_code *, \
|
||||||
|
int (*)(pcre2_callout_enumerate_block *, void *), void *);
|
||||||
|
|
||||||
|
|
||||||
|
/* Functions for running a match and inspecting the result. */
|
||||||
|
|
||||||
|
#define PCRE2_MATCH_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL pcre2_match_data *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_match_data_create(uint32_t, pcre2_general_context *); \
|
||||||
|
PCRE2_EXP_DECL pcre2_match_data *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_match_data_create_from_pattern(const pcre2_code *, \
|
||||||
|
pcre2_general_context *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||||
|
uint32_t, pcre2_match_data *, pcre2_match_context *, int *, PCRE2_SIZE); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||||
|
uint32_t, pcre2_match_data *, pcre2_match_context *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_match_data_free(pcre2_match_data *); \
|
||||||
|
PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_get_mark(pcre2_match_data *); \
|
||||||
|
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_get_match_data_size(pcre2_match_data *); \
|
||||||
|
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_get_match_data_heapframes_size(pcre2_match_data *); \
|
||||||
|
PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_get_ovector_count(pcre2_match_data *); \
|
||||||
|
PCRE2_EXP_DECL PCRE2_SIZE *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_get_ovector_pointer(pcre2_match_data *); \
|
||||||
|
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_get_startchar(pcre2_match_data *);
|
||||||
|
|
||||||
|
|
||||||
|
/* Convenience functions for handling matched substrings. */
|
||||||
|
|
||||||
|
#define PCRE2_SUBSTRING_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substring_copy_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR *, \
|
||||||
|
PCRE2_SIZE *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substring_copy_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR *, \
|
||||||
|
PCRE2_SIZE *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substring_free(PCRE2_UCHAR *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substring_get_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR **, \
|
||||||
|
PCRE2_SIZE *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substring_get_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR **, \
|
||||||
|
PCRE2_SIZE *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substring_length_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_SIZE *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substring_length_bynumber(pcre2_match_data *, uint32_t, PCRE2_SIZE *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substring_nametable_scan(const pcre2_code *, PCRE2_SPTR, PCRE2_SPTR *, \
|
||||||
|
PCRE2_SPTR *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substring_number_from_name(const pcre2_code *, PCRE2_SPTR); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substring_list_free(PCRE2_UCHAR **); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **);
|
||||||
|
|
||||||
|
|
||||||
|
/* Functions for serializing / deserializing compiled patterns. */
|
||||||
|
|
||||||
|
#define PCRE2_SERIALIZE_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_serialize_encode(const pcre2_code **, int32_t, uint8_t **, \
|
||||||
|
PCRE2_SIZE *, pcre2_general_context *); \
|
||||||
|
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_serialize_decode(pcre2_code **, int32_t, const uint8_t *, \
|
||||||
|
pcre2_general_context *); \
|
||||||
|
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_serialize_get_number_of_codes(const uint8_t *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_serialize_free(uint8_t *);
|
||||||
|
|
||||||
|
|
||||||
|
/* Convenience function for match + substitute. */
|
||||||
|
|
||||||
|
#define PCRE2_SUBSTITUTE_FUNCTION \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_substitute(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||||
|
uint32_t, pcre2_match_data *, pcre2_match_context *, PCRE2_SPTR, \
|
||||||
|
PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *);
|
||||||
|
|
||||||
|
|
||||||
|
/* Functions for converting pattern source strings. */
|
||||||
|
|
||||||
|
#define PCRE2_CONVERT_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_pattern_convert(PCRE2_SPTR, PCRE2_SIZE, uint32_t, PCRE2_UCHAR **, \
|
||||||
|
PCRE2_SIZE *, pcre2_convert_context *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_converted_pattern_free(PCRE2_UCHAR *);
|
||||||
|
|
||||||
|
|
||||||
|
/* Functions for JIT processing */
|
||||||
|
|
||||||
|
#define PCRE2_JIT_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_jit_compile(pcre2_code *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_jit_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||||
|
uint32_t, pcre2_match_data *, pcre2_match_context *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_jit_free_unused_memory(pcre2_general_context *); \
|
||||||
|
PCRE2_EXP_DECL pcre2_jit_stack *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_jit_stack_create(size_t, size_t, pcre2_general_context *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_jit_stack_free(pcre2_jit_stack *);
|
||||||
|
|
||||||
|
|
||||||
|
/* Other miscellaneous functions. */
|
||||||
|
|
||||||
|
#define PCRE2_OTHER_FUNCTIONS \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
|
||||||
|
PCRE2_EXP_DECL const uint8_t *PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_maketables(pcre2_general_context *); \
|
||||||
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_maketables_free(pcre2_general_context *, const uint8_t *);
|
||||||
|
|
||||||
|
/* Define macros that generate width-specific names from generic versions. The
|
||||||
|
three-level macro scheme is necessary to get the macros expanded when we want
|
||||||
|
them to be. First we get the width from PCRE2_LOCAL_WIDTH, which is used for
|
||||||
|
generating three versions of everything below. After that, PCRE2_SUFFIX will be
|
||||||
|
re-defined to use PCRE2_CODE_UNIT_WIDTH, for use when macros such as
|
||||||
|
pcre2_compile are called by application code. */
|
||||||
|
|
||||||
|
#define PCRE2_JOIN(a,b) a ## b
|
||||||
|
#define PCRE2_GLUE(a,b) PCRE2_JOIN(a,b)
|
||||||
|
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a,PCRE2_LOCAL_WIDTH)
|
||||||
|
|
||||||
|
|
||||||
|
/* Data types */
|
||||||
|
|
||||||
|
#define PCRE2_UCHAR PCRE2_SUFFIX(PCRE2_UCHAR)
|
||||||
|
#define PCRE2_SPTR PCRE2_SUFFIX(PCRE2_SPTR)
|
||||||
|
|
||||||
|
#define pcre2_code PCRE2_SUFFIX(pcre2_code_)
|
||||||
|
#define pcre2_jit_callback PCRE2_SUFFIX(pcre2_jit_callback_)
|
||||||
|
#define pcre2_jit_stack PCRE2_SUFFIX(pcre2_jit_stack_)
|
||||||
|
|
||||||
|
#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_)
|
||||||
|
#define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_)
|
||||||
|
#define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_)
|
||||||
|
#define pcre2_real_convert_context PCRE2_SUFFIX(pcre2_real_convert_context_)
|
||||||
|
#define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_)
|
||||||
|
#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_)
|
||||||
|
#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_)
|
||||||
|
|
||||||
|
|
||||||
|
/* Data blocks */
|
||||||
|
|
||||||
|
#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_)
|
||||||
|
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
|
||||||
|
#define pcre2_substitute_callout_block PCRE2_SUFFIX(pcre2_substitute_callout_block_)
|
||||||
|
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
|
||||||
|
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
|
||||||
|
#define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_)
|
||||||
|
#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_)
|
||||||
|
#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_)
|
||||||
|
|
||||||
|
|
||||||
|
/* Functions: the complete list in alphabetical order */
|
||||||
|
|
||||||
|
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
||||||
|
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
|
||||||
|
#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_)
|
||||||
|
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
||||||
|
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
||||||
|
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
||||||
|
#define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_)
|
||||||
|
#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_)
|
||||||
|
#define pcre2_config PCRE2_SUFFIX(pcre2_config_)
|
||||||
|
#define pcre2_convert_context_copy PCRE2_SUFFIX(pcre2_convert_context_copy_)
|
||||||
|
#define pcre2_convert_context_create PCRE2_SUFFIX(pcre2_convert_context_create_)
|
||||||
|
#define pcre2_convert_context_free PCRE2_SUFFIX(pcre2_convert_context_free_)
|
||||||
|
#define pcre2_converted_pattern_free PCRE2_SUFFIX(pcre2_converted_pattern_free_)
|
||||||
|
#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_)
|
||||||
|
#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_)
|
||||||
|
#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_)
|
||||||
|
#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_)
|
||||||
|
#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_)
|
||||||
|
#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_)
|
||||||
|
#define pcre2_get_match_data_heapframes_size PCRE2_SUFFIX(pcre2_get_match_data_heapframes_size_)
|
||||||
|
#define pcre2_get_match_data_size PCRE2_SUFFIX(pcre2_get_match_data_size_)
|
||||||
|
#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
|
||||||
|
#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_)
|
||||||
|
#define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_)
|
||||||
|
#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_)
|
||||||
|
#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_)
|
||||||
|
#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_)
|
||||||
|
#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_)
|
||||||
|
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
|
||||||
|
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
|
||||||
|
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
|
||||||
|
#define pcre2_maketables_free PCRE2_SUFFIX(pcre2_maketables_free_)
|
||||||
|
#define pcre2_match PCRE2_SUFFIX(pcre2_match_)
|
||||||
|
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
|
||||||
|
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)
|
||||||
|
#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_)
|
||||||
|
#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_)
|
||||||
|
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
|
||||||
|
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
|
||||||
|
#define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_)
|
||||||
|
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
|
||||||
|
#define pcre2_serialize_decode PCRE2_SUFFIX(pcre2_serialize_decode_)
|
||||||
|
#define pcre2_serialize_encode PCRE2_SUFFIX(pcre2_serialize_encode_)
|
||||||
|
#define pcre2_serialize_free PCRE2_SUFFIX(pcre2_serialize_free_)
|
||||||
|
#define pcre2_serialize_get_number_of_codes PCRE2_SUFFIX(pcre2_serialize_get_number_of_codes_)
|
||||||
|
#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_)
|
||||||
|
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
|
||||||
|
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||||
|
#define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_)
|
||||||
|
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||||
|
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
|
||||||
|
#define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_)
|
||||||
|
#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
|
||||||
|
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
|
||||||
|
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||||
|
#define pcre2_set_max_varlookbehind PCRE2_SUFFIX(pcre2_set_max_varlookbehind_)
|
||||||
|
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||||
|
#define pcre2_set_max_pattern_compiled_length PCRE2_SUFFIX(pcre2_set_max_pattern_compiled_length_)
|
||||||
|
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||||
|
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||||
|
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
|
||||||
|
#define pcre2_set_optimize PCRE2_SUFFIX(pcre2_set_optimize_)
|
||||||
|
#define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_)
|
||||||
|
#define pcre2_set_substitute_case_callout PCRE2_SUFFIX(pcre2_set_substitute_case_callout_)
|
||||||
|
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
|
||||||
|
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
|
||||||
|
#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)
|
||||||
|
#define pcre2_substring_free PCRE2_SUFFIX(pcre2_substring_free_)
|
||||||
|
#define pcre2_substring_get_byname PCRE2_SUFFIX(pcre2_substring_get_byname_)
|
||||||
|
#define pcre2_substring_get_bynumber PCRE2_SUFFIX(pcre2_substring_get_bynumber_)
|
||||||
|
#define pcre2_substring_length_byname PCRE2_SUFFIX(pcre2_substring_length_byname_)
|
||||||
|
#define pcre2_substring_length_bynumber PCRE2_SUFFIX(pcre2_substring_length_bynumber_)
|
||||||
|
#define pcre2_substring_list_get PCRE2_SUFFIX(pcre2_substring_list_get_)
|
||||||
|
#define pcre2_substring_list_free PCRE2_SUFFIX(pcre2_substring_list_free_)
|
||||||
|
#define pcre2_substring_nametable_scan PCRE2_SUFFIX(pcre2_substring_nametable_scan_)
|
||||||
|
#define pcre2_substring_number_from_name PCRE2_SUFFIX(pcre2_substring_number_from_name_)
|
||||||
|
|
||||||
|
/* Keep this old function name for backwards compatibility */
|
||||||
|
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
|
||||||
|
|
||||||
|
/* Keep this obsolete function for backwards compatibility: it is now a noop. */
|
||||||
|
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
|
||||||
|
|
||||||
|
/* Now generate all three sets of width-specific structures and function
|
||||||
|
prototypes. */
|
||||||
|
|
||||||
|
#define PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS \
|
||||||
|
PCRE2_TYPES_LIST \
|
||||||
|
PCRE2_STRUCTURE_LIST \
|
||||||
|
PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||||
|
PCRE2_GENERAL_CONTEXT_FUNCTIONS \
|
||||||
|
PCRE2_COMPILE_CONTEXT_FUNCTIONS \
|
||||||
|
PCRE2_CONVERT_CONTEXT_FUNCTIONS \
|
||||||
|
PCRE2_CONVERT_FUNCTIONS \
|
||||||
|
PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
||||||
|
PCRE2_COMPILE_FUNCTIONS \
|
||||||
|
PCRE2_PATTERN_INFO_FUNCTIONS \
|
||||||
|
PCRE2_MATCH_FUNCTIONS \
|
||||||
|
PCRE2_SUBSTRING_FUNCTIONS \
|
||||||
|
PCRE2_SERIALIZE_FUNCTIONS \
|
||||||
|
PCRE2_SUBSTITUTE_FUNCTION \
|
||||||
|
PCRE2_JIT_FUNCTIONS \
|
||||||
|
PCRE2_OTHER_FUNCTIONS
|
||||||
|
|
||||||
|
#define PCRE2_LOCAL_WIDTH 8
|
||||||
|
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||||
|
#undef PCRE2_LOCAL_WIDTH
|
||||||
|
|
||||||
|
#define PCRE2_LOCAL_WIDTH 16
|
||||||
|
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||||
|
#undef PCRE2_LOCAL_WIDTH
|
||||||
|
|
||||||
|
#define PCRE2_LOCAL_WIDTH 32
|
||||||
|
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||||
|
#undef PCRE2_LOCAL_WIDTH
|
||||||
|
|
||||||
|
/* Undefine the list macros; they are no longer needed. */
|
||||||
|
|
||||||
|
#undef PCRE2_TYPES_LIST
|
||||||
|
#undef PCRE2_STRUCTURE_LIST
|
||||||
|
#undef PCRE2_GENERAL_INFO_FUNCTIONS
|
||||||
|
#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS
|
||||||
|
#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS
|
||||||
|
#undef PCRE2_CONVERT_CONTEXT_FUNCTIONS
|
||||||
|
#undef PCRE2_MATCH_CONTEXT_FUNCTIONS
|
||||||
|
#undef PCRE2_COMPILE_FUNCTIONS
|
||||||
|
#undef PCRE2_PATTERN_INFO_FUNCTIONS
|
||||||
|
#undef PCRE2_MATCH_FUNCTIONS
|
||||||
|
#undef PCRE2_SUBSTRING_FUNCTIONS
|
||||||
|
#undef PCRE2_SERIALIZE_FUNCTIONS
|
||||||
|
#undef PCRE2_SUBSTITUTE_FUNCTION
|
||||||
|
#undef PCRE2_JIT_FUNCTIONS
|
||||||
|
#undef PCRE2_OTHER_FUNCTIONS
|
||||||
|
#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||||
|
|
||||||
|
/* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine
|
||||||
|
PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make
|
||||||
|
PCRE2_SUFFIX a no-op. Otherwise, generate an error. */
|
||||||
|
|
||||||
|
#undef PCRE2_SUFFIX
|
||||||
|
#ifndef PCRE2_CODE_UNIT_WIDTH
|
||||||
|
#error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h.
|
||||||
|
#error Use 8, 16, or 32; or 0 for a multi-width application.
|
||||||
|
#else /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8 || \
|
||||||
|
PCRE2_CODE_UNIT_WIDTH == 16 || \
|
||||||
|
PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH)
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 0
|
||||||
|
#undef PCRE2_JOIN
|
||||||
|
#undef PCRE2_GLUE
|
||||||
|
#define PCRE2_SUFFIX(a) a
|
||||||
|
#else
|
||||||
|
#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32.
|
||||||
|
#endif
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} /* extern "C" */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* PCRE2_H_IDEMPOTENT_GUARD */
|
||||||
|
|
||||||
|
/* End of pcre2.h */
|
||||||
1412
3rd/pcre2/src/pcre2_auto_possess.c
Normal file
1412
3rd/pcre2/src/pcre2_auto_possess.c
Normal file
@@ -0,0 +1,1412 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* This module contains functions that scan a compiled pattern and change
|
||||||
|
repeats into possessive repeats where possible. */
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre2_internal.h"
|
||||||
|
|
||||||
|
/* This macro represents the max size of list[] and that is used to keep
|
||||||
|
track of UCD info in several places, it should be kept on sync with the
|
||||||
|
value used by GenerateUcd.py */
|
||||||
|
#define MAX_LIST 8
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Tables for auto-possessification *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This table is used to check whether auto-possessification is possible
|
||||||
|
between adjacent character-type opcodes. The left-hand (repeated) opcode is
|
||||||
|
used to select the row, and the right-hand opcode is use to select the column.
|
||||||
|
A value of 1 means that auto-possessification is OK. For example, the second
|
||||||
|
value in the first row means that \D+\d can be turned into \D++\d.
|
||||||
|
|
||||||
|
The Unicode property types (\P and \p) have to be present to fill out the table
|
||||||
|
because of what their opcode values are, but the table values should always be
|
||||||
|
zero because property types are handled separately in the code. The last four
|
||||||
|
columns apply to items that cannot be repeated, so there is no need to have
|
||||||
|
rows for them. Note that OP_DIGIT etc. are generated only when PCRE2_UCP is
|
||||||
|
*not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
|
||||||
|
|
||||||
|
#define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1)
|
||||||
|
#define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1)
|
||||||
|
|
||||||
|
static const uint8_t autoposstab[APTROWS][APTCOLS] = {
|
||||||
|
/* \D \d \S \s \W \w . .+ \C \P \p \R \H \h \V \v \X \Z \z $ $M */
|
||||||
|
{ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \D */
|
||||||
|
{ 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \d */
|
||||||
|
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \S */
|
||||||
|
{ 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \s */
|
||||||
|
{ 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \W */
|
||||||
|
{ 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \w */
|
||||||
|
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* . */
|
||||||
|
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* .+ */
|
||||||
|
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \C */
|
||||||
|
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \P */
|
||||||
|
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \p */
|
||||||
|
{ 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \R */
|
||||||
|
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \H */
|
||||||
|
{ 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \h */
|
||||||
|
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \V */
|
||||||
|
{ 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 }, /* \v */
|
||||||
|
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 } /* \X */
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
/* This table is used to check whether auto-possessification is possible
|
||||||
|
between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The
|
||||||
|
left-hand (repeated) opcode is used to select the row, and the right-hand
|
||||||
|
opcode is used to select the column. The values are as follows:
|
||||||
|
|
||||||
|
0 Always return FALSE (never auto-possessify)
|
||||||
|
1 Character groups are distinct (possessify if both are OP_PROP)
|
||||||
|
2 Check character categories in the same group (general or particular)
|
||||||
|
3 TRUE if the two opcodes are not the same (PROP vs NOTPROP)
|
||||||
|
|
||||||
|
4 Check left general category vs right particular category
|
||||||
|
5 Check right general category vs left particular category
|
||||||
|
|
||||||
|
6 Left alphanum vs right general category
|
||||||
|
7 Left space vs right general category
|
||||||
|
8 Left word vs right general category
|
||||||
|
|
||||||
|
9 Right alphanum vs left general category
|
||||||
|
10 Right space vs left general category
|
||||||
|
11 Right word vs left general category
|
||||||
|
|
||||||
|
12 Left alphanum vs right particular category
|
||||||
|
13 Left space vs right particular category
|
||||||
|
14 Left word vs right particular category
|
||||||
|
|
||||||
|
15 Right alphanum vs left particular category
|
||||||
|
16 Right space vs left particular category
|
||||||
|
17 Right word vs left particular category
|
||||||
|
*/
|
||||||
|
|
||||||
|
static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
|
||||||
|
/* LAMP GC PC SC SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */
|
||||||
|
{ 3, 0, 0, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_LAMP */
|
||||||
|
{ 0, 2, 4, 0, 0, 9, 10, 10, 11, 0, 0, 0, 0 }, /* PT_GC */
|
||||||
|
{ 0, 5, 2, 0, 0, 15, 16, 16, 17, 0, 0, 0, 0 }, /* PT_PC */
|
||||||
|
{ 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SC */
|
||||||
|
{ 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SCX */
|
||||||
|
{ 3, 6, 12, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_ALNUM */
|
||||||
|
{ 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_SPACE */
|
||||||
|
{ 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_PXSPACE */
|
||||||
|
{ 0, 8, 14, 0, 0, 0, 1, 1, 3, 0, 0, 0, 0 }, /* PT_WORD */
|
||||||
|
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */
|
||||||
|
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0 }, /* PT_UCNC */
|
||||||
|
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_BIDICL */
|
||||||
|
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* PT_BOOL */
|
||||||
|
/* PT_ANY does not need a record. */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* This table is used to check whether auto-possessification is possible
|
||||||
|
between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP) when one
|
||||||
|
specifies a general category and the other specifies a particular category. The
|
||||||
|
row is selected by the general category and the column by the particular
|
||||||
|
category. The value is 1 if the particular category is not part of the general
|
||||||
|
category. */
|
||||||
|
|
||||||
|
static const uint8_t catposstab[7][30] = {
|
||||||
|
/* Cc Cf Cn Co Cs Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc Pd Pe Pf Pi Po Ps Sc Sk Sm So Zl Zp Zs */
|
||||||
|
{ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* C */
|
||||||
|
{ 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* L */
|
||||||
|
{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* M */
|
||||||
|
{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* N */
|
||||||
|
{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 }, /* P */
|
||||||
|
{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1 }, /* S */
|
||||||
|
{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 } /* Z */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* This table is used when checking ALNUM, (PX)SPACE, SPACE, and WORD against
|
||||||
|
a general or particular category. The properties in each row are those
|
||||||
|
that apply to the character set in question. Duplication means that a little
|
||||||
|
unnecessary work is done when checking, but this keeps things much simpler
|
||||||
|
because they can all use the same code. For more details see the comment where
|
||||||
|
this table is used.
|
||||||
|
|
||||||
|
Note: SPACE and PXSPACE used to be different because Perl excluded VT from
|
||||||
|
"space", but from Perl 5.18 it's included, so both categories are treated the
|
||||||
|
same here. */
|
||||||
|
|
||||||
|
static const uint8_t posspropstab[3][4] = {
|
||||||
|
{ ucp_L, ucp_N, ucp_N, ucp_Nl }, /* ALNUM, 3rd and 4th values redundant */
|
||||||
|
{ ucp_Z, ucp_Z, ucp_C, ucp_Cc }, /* SPACE and PXSPACE, 2nd value redundant */
|
||||||
|
{ ucp_L, ucp_N, ucp_P, ucp_Po } /* WORD */
|
||||||
|
};
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
/*************************************************
|
||||||
|
* Check a character and a property *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function is called by compare_opcodes() when a property item is
|
||||||
|
adjacent to a fixed character.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
c the character
|
||||||
|
ptype the property type
|
||||||
|
pdata the data for the type
|
||||||
|
negated TRUE if it's a negated property (\P or \p{^)
|
||||||
|
|
||||||
|
Returns: TRUE if auto-possessifying is OK
|
||||||
|
*/
|
||||||
|
|
||||||
|
static BOOL
|
||||||
|
check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
|
||||||
|
BOOL negated)
|
||||||
|
{
|
||||||
|
BOOL ok, rc;
|
||||||
|
const uint32_t *p;
|
||||||
|
const ucd_record *prop = GET_UCD(c);
|
||||||
|
|
||||||
|
switch(ptype)
|
||||||
|
{
|
||||||
|
case PT_LAMP:
|
||||||
|
return (prop->chartype == ucp_Lu ||
|
||||||
|
prop->chartype == ucp_Ll ||
|
||||||
|
prop->chartype == ucp_Lt) == negated;
|
||||||
|
|
||||||
|
case PT_GC:
|
||||||
|
return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
|
||||||
|
|
||||||
|
case PT_PC:
|
||||||
|
return (pdata == prop->chartype) == negated;
|
||||||
|
|
||||||
|
case PT_SC:
|
||||||
|
return (pdata == prop->script) == negated;
|
||||||
|
|
||||||
|
case PT_SCX:
|
||||||
|
ok = (pdata == prop->script
|
||||||
|
|| MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0);
|
||||||
|
return ok == negated;
|
||||||
|
|
||||||
|
/* These are specials */
|
||||||
|
|
||||||
|
case PT_ALNUM:
|
||||||
|
return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||||
|
PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
|
||||||
|
|
||||||
|
/* Perl space used to exclude VT, but from Perl 5.18 it is included, which
|
||||||
|
means that Perl space and POSIX space are now identical. PCRE was changed
|
||||||
|
at release 8.34. */
|
||||||
|
|
||||||
|
case PT_SPACE: /* Perl space */
|
||||||
|
case PT_PXSPACE: /* POSIX space */
|
||||||
|
switch(c)
|
||||||
|
{
|
||||||
|
HSPACE_CASES:
|
||||||
|
VSPACE_CASES:
|
||||||
|
rc = negated;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
rc = (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
|
||||||
|
}
|
||||||
|
return rc;
|
||||||
|
|
||||||
|
case PT_WORD:
|
||||||
|
return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||||
|
PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
|
||||||
|
c == CHAR_UNDERSCORE) == negated;
|
||||||
|
|
||||||
|
case PT_CLIST:
|
||||||
|
p = PRIV(ucd_caseless_sets) + prop->caseset;
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
if (c < *p) return !negated;
|
||||||
|
if (c == *p++) return negated;
|
||||||
|
}
|
||||||
|
PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Haven't yet thought these through. */
|
||||||
|
|
||||||
|
case PT_BIDICL:
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
case PT_BOOL:
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Base opcode of repeated opcodes *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* Returns the base opcode for repeated single character type opcodes. If the
|
||||||
|
opcode is not a repeated character type, it returns with the original value.
|
||||||
|
|
||||||
|
Arguments: c opcode
|
||||||
|
Returns: base opcode for the type
|
||||||
|
*/
|
||||||
|
|
||||||
|
static PCRE2_UCHAR
|
||||||
|
get_repeat_base(PCRE2_UCHAR c)
|
||||||
|
{
|
||||||
|
return (c > OP_TYPEPOSUPTO)? c :
|
||||||
|
(c >= OP_TYPESTAR)? OP_TYPESTAR :
|
||||||
|
(c >= OP_NOTSTARI)? OP_NOTSTARI :
|
||||||
|
(c >= OP_NOTSTAR)? OP_NOTSTAR :
|
||||||
|
(c >= OP_STARI)? OP_STARI :
|
||||||
|
OP_STAR;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Fill the character property list *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* Checks whether the code points to an opcode that can take part in auto-
|
||||||
|
possessification, and if so, fills a list with its properties.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
code points to start of expression
|
||||||
|
utf TRUE if in UTF mode
|
||||||
|
ucp TRUE if in UCP mode
|
||||||
|
fcc points to the case-flipping table
|
||||||
|
list points to output list
|
||||||
|
list[0] will be filled with the opcode
|
||||||
|
list[1] will be non-zero if this opcode
|
||||||
|
can match an empty character string
|
||||||
|
list[2..7] depends on the opcode
|
||||||
|
|
||||||
|
Returns: points to the start of the next opcode if *code is accepted
|
||||||
|
NULL if *code is not accepted
|
||||||
|
*/
|
||||||
|
|
||||||
|
static PCRE2_SPTR
|
||||||
|
get_chr_property_list(PCRE2_SPTR code, BOOL utf, BOOL ucp, const uint8_t *fcc,
|
||||||
|
uint32_t *list)
|
||||||
|
{
|
||||||
|
PCRE2_UCHAR c = *code;
|
||||||
|
PCRE2_UCHAR base;
|
||||||
|
PCRE2_SPTR end;
|
||||||
|
PCRE2_SPTR class_end;
|
||||||
|
uint32_t chr;
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
uint32_t *clist_dest;
|
||||||
|
const uint32_t *clist_src;
|
||||||
|
#else
|
||||||
|
(void)utf; /* Suppress "unused parameter" compiler warnings */
|
||||||
|
(void)ucp;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
list[0] = c;
|
||||||
|
list[1] = FALSE;
|
||||||
|
code++;
|
||||||
|
|
||||||
|
if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
|
||||||
|
{
|
||||||
|
base = get_repeat_base(c);
|
||||||
|
c -= (base - OP_STAR);
|
||||||
|
|
||||||
|
if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
|
||||||
|
code += IMM2_SIZE;
|
||||||
|
|
||||||
|
list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
|
||||||
|
c != OP_POSPLUS);
|
||||||
|
|
||||||
|
switch(base)
|
||||||
|
{
|
||||||
|
case OP_STAR:
|
||||||
|
list[0] = OP_CHAR;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_STARI:
|
||||||
|
list[0] = OP_CHARI;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_NOTSTAR:
|
||||||
|
list[0] = OP_NOT;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_NOTSTARI:
|
||||||
|
list[0] = OP_NOTI;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_TYPESTAR:
|
||||||
|
list[0] = *code;
|
||||||
|
code++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
c = list[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
switch(c)
|
||||||
|
{
|
||||||
|
case OP_NOT_DIGIT:
|
||||||
|
case OP_DIGIT:
|
||||||
|
case OP_NOT_WHITESPACE:
|
||||||
|
case OP_WHITESPACE:
|
||||||
|
case OP_NOT_WORDCHAR:
|
||||||
|
case OP_WORDCHAR:
|
||||||
|
case OP_ANY:
|
||||||
|
case OP_ALLANY:
|
||||||
|
case OP_ANYNL:
|
||||||
|
case OP_NOT_HSPACE:
|
||||||
|
case OP_HSPACE:
|
||||||
|
case OP_NOT_VSPACE:
|
||||||
|
case OP_VSPACE:
|
||||||
|
case OP_EXTUNI:
|
||||||
|
case OP_EODN:
|
||||||
|
case OP_EOD:
|
||||||
|
case OP_DOLL:
|
||||||
|
case OP_DOLLM:
|
||||||
|
return code;
|
||||||
|
|
||||||
|
case OP_CHAR:
|
||||||
|
case OP_NOT:
|
||||||
|
GETCHARINCTEST(chr, code);
|
||||||
|
list[2] = chr;
|
||||||
|
list[3] = NOTACHAR;
|
||||||
|
return code;
|
||||||
|
|
||||||
|
case OP_CHARI:
|
||||||
|
case OP_NOTI:
|
||||||
|
list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
|
||||||
|
GETCHARINCTEST(chr, code);
|
||||||
|
list[2] = chr;
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (chr < 128 || (chr < 256 && !utf && !ucp))
|
||||||
|
list[3] = fcc[chr];
|
||||||
|
else
|
||||||
|
list[3] = UCD_OTHERCASE(chr);
|
||||||
|
#elif defined SUPPORT_WIDE_CHARS
|
||||||
|
list[3] = (chr < 256) ? fcc[chr] : chr;
|
||||||
|
#else
|
||||||
|
list[3] = fcc[chr];
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* The othercase might be the same value. */
|
||||||
|
|
||||||
|
if (chr == list[3])
|
||||||
|
list[3] = NOTACHAR;
|
||||||
|
else
|
||||||
|
list[4] = NOTACHAR;
|
||||||
|
return code;
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
case OP_PROP:
|
||||||
|
case OP_NOTPROP:
|
||||||
|
if (code[0] != PT_CLIST)
|
||||||
|
{
|
||||||
|
list[2] = code[0];
|
||||||
|
list[3] = code[1];
|
||||||
|
return code + 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Convert only if we have enough space. */
|
||||||
|
|
||||||
|
clist_src = PRIV(ucd_caseless_sets) + code[1];
|
||||||
|
clist_dest = list + 2;
|
||||||
|
code += 2;
|
||||||
|
|
||||||
|
do {
|
||||||
|
if (clist_dest >= list + MAX_LIST)
|
||||||
|
{
|
||||||
|
/* Early return if there is not enough space. GenerateUcd.py
|
||||||
|
generated a list with more than 5 characters and something
|
||||||
|
must be done about that going forward. */
|
||||||
|
PCRE2_DEBUG_UNREACHABLE(); /* Remove if it ever triggers */
|
||||||
|
list[2] = code[0];
|
||||||
|
list[3] = code[1];
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
*clist_dest++ = *clist_src;
|
||||||
|
}
|
||||||
|
while(*clist_src++ != NOTACHAR);
|
||||||
|
|
||||||
|
/* All characters are stored. The terminating NOTACHAR is copied from the
|
||||||
|
clist itself. */
|
||||||
|
|
||||||
|
list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
|
||||||
|
return code;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
case OP_NCLASS:
|
||||||
|
case OP_CLASS:
|
||||||
|
#ifdef SUPPORT_WIDE_CHARS
|
||||||
|
case OP_XCLASS:
|
||||||
|
case OP_ECLASS:
|
||||||
|
if (c == OP_XCLASS || c == OP_ECLASS)
|
||||||
|
end = code + GET(code, 0) - 1;
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
end = code + 32 / sizeof(PCRE2_UCHAR);
|
||||||
|
class_end = end;
|
||||||
|
|
||||||
|
switch(*end)
|
||||||
|
{
|
||||||
|
case OP_CRSTAR:
|
||||||
|
case OP_CRMINSTAR:
|
||||||
|
case OP_CRQUERY:
|
||||||
|
case OP_CRMINQUERY:
|
||||||
|
case OP_CRPOSSTAR:
|
||||||
|
case OP_CRPOSQUERY:
|
||||||
|
list[1] = TRUE;
|
||||||
|
end++;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_CRPLUS:
|
||||||
|
case OP_CRMINPLUS:
|
||||||
|
case OP_CRPOSPLUS:
|
||||||
|
end++;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_CRRANGE:
|
||||||
|
case OP_CRMINRANGE:
|
||||||
|
case OP_CRPOSRANGE:
|
||||||
|
list[1] = (GET2(end, 1) == 0);
|
||||||
|
end += 1 + 2 * IMM2_SIZE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
list[2] = (uint32_t)(end - code);
|
||||||
|
list[3] = (uint32_t)(end - class_end);
|
||||||
|
return end;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL; /* Opcode not accepted */
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Scan further character sets for match *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* Checks whether the base and the current opcode have a common character, in
|
||||||
|
which case the base cannot be possessified.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
code points to the byte code
|
||||||
|
utf TRUE in UTF mode
|
||||||
|
ucp TRUE in UCP mode
|
||||||
|
cb compile data block
|
||||||
|
base_list the data list of the base opcode
|
||||||
|
base_end the end of the base opcode
|
||||||
|
rec_limit points to recursion depth counter
|
||||||
|
|
||||||
|
Returns: TRUE if the auto-possessification is possible
|
||||||
|
*/
|
||||||
|
|
||||||
|
static BOOL
|
||||||
|
compare_opcodes(PCRE2_SPTR code, BOOL utf, BOOL ucp, const compile_block *cb,
|
||||||
|
const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit)
|
||||||
|
{
|
||||||
|
PCRE2_UCHAR c;
|
||||||
|
uint32_t list[MAX_LIST];
|
||||||
|
const uint32_t *chr_ptr;
|
||||||
|
const uint32_t *ochr_ptr;
|
||||||
|
const uint32_t *list_ptr;
|
||||||
|
PCRE2_SPTR next_code;
|
||||||
|
#ifdef SUPPORT_WIDE_CHARS
|
||||||
|
PCRE2_SPTR xclass_flags;
|
||||||
|
#endif
|
||||||
|
const uint8_t *class_bitset;
|
||||||
|
const uint8_t *set1, *set2, *set_end;
|
||||||
|
uint32_t chr;
|
||||||
|
BOOL accepted, invert_bits;
|
||||||
|
BOOL entered_a_group = FALSE;
|
||||||
|
|
||||||
|
if (--(*rec_limit) <= 0) return FALSE; /* Recursion has gone too deep */
|
||||||
|
|
||||||
|
/* Note: the base_list[1] contains whether the current opcode has a greedy
|
||||||
|
(represented by a non-zero value) quantifier. This is a different from
|
||||||
|
other character type lists, which store here that the character iterator
|
||||||
|
matches to an empty string (also represented by a non-zero value). */
|
||||||
|
|
||||||
|
for(;;)
|
||||||
|
{
|
||||||
|
PCRE2_SPTR bracode;
|
||||||
|
|
||||||
|
/* All operations move the code pointer forward.
|
||||||
|
Therefore infinite recursions are not possible. */
|
||||||
|
|
||||||
|
c = *code;
|
||||||
|
|
||||||
|
/* Skip over callouts */
|
||||||
|
|
||||||
|
if (c == OP_CALLOUT)
|
||||||
|
{
|
||||||
|
code += PRIV(OP_lengths)[c];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c == OP_CALLOUT_STR)
|
||||||
|
{
|
||||||
|
code += GET(code, 1 + 2*LINK_SIZE);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* At the end of a branch, skip to the end of the group and process it. */
|
||||||
|
|
||||||
|
if (c == OP_ALT)
|
||||||
|
{
|
||||||
|
do code += GET(code, 1); while (*code == OP_ALT);
|
||||||
|
c = *code;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Inspect the next opcode. */
|
||||||
|
|
||||||
|
switch(c)
|
||||||
|
{
|
||||||
|
/* We can always possessify a greedy iterator at the end of the pattern,
|
||||||
|
which is reached after skipping over the final OP_KET. A non-greedy
|
||||||
|
iterator must never be possessified. */
|
||||||
|
|
||||||
|
case OP_END:
|
||||||
|
return base_list[1] != 0;
|
||||||
|
|
||||||
|
/* When an iterator is at the end of certain kinds of group we can inspect
|
||||||
|
what follows the group by skipping over the closing ket. Note that this
|
||||||
|
does not apply to OP_KETRMAX or OP_KETRMIN because what follows any given
|
||||||
|
iteration is variable (could be another iteration or could be the next
|
||||||
|
item). As these two opcodes are not listed in the next switch, they will
|
||||||
|
end up as the next code to inspect, and return FALSE by virtue of being
|
||||||
|
unsupported. */
|
||||||
|
|
||||||
|
case OP_KET:
|
||||||
|
case OP_KETRPOS:
|
||||||
|
/* The non-greedy case cannot be converted to a possessive form. */
|
||||||
|
|
||||||
|
if (base_list[1] == 0) return FALSE;
|
||||||
|
|
||||||
|
/* If the bracket is capturing it might be referenced by an OP_RECURSE
|
||||||
|
so its last iterator can never be possessified if the pattern contains
|
||||||
|
recursions. (This could be improved by keeping a list of group numbers that
|
||||||
|
are called by recursion.) */
|
||||||
|
|
||||||
|
bracode = code - GET(code, 1);
|
||||||
|
switch(*bracode)
|
||||||
|
{
|
||||||
|
case OP_CBRA:
|
||||||
|
case OP_SCBRA:
|
||||||
|
case OP_CBRAPOS:
|
||||||
|
case OP_SCBRAPOS:
|
||||||
|
if (cb->had_recurse) return FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* A script run might have to backtrack if the iterated item can match
|
||||||
|
characters from more than one script. So give up unless repeating an
|
||||||
|
explicit character. */
|
||||||
|
|
||||||
|
case OP_SCRIPT_RUN:
|
||||||
|
if (base_list[0] != OP_CHAR && base_list[0] != OP_CHARI)
|
||||||
|
return FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Atomic sub-patterns and forward assertions can always auto-possessify
|
||||||
|
their last iterator. However, if the group was entered as a result of
|
||||||
|
checking a previous iterator, this is not possible. */
|
||||||
|
|
||||||
|
case OP_ASSERT:
|
||||||
|
case OP_ASSERT_NOT:
|
||||||
|
case OP_ONCE:
|
||||||
|
return !entered_a_group;
|
||||||
|
|
||||||
|
/* Fixed-length lookbehinds can be treated the same way, but variable
|
||||||
|
length lookbehinds must not auto-possessify their last iterator. Note
|
||||||
|
that in order to identify a variable length lookbehind we must check
|
||||||
|
through all branches, because some may be of fixed length. */
|
||||||
|
|
||||||
|
case OP_ASSERTBACK:
|
||||||
|
case OP_ASSERTBACK_NOT:
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if (bracode[1+LINK_SIZE] == OP_VREVERSE) return FALSE; /* Variable */
|
||||||
|
bracode += GET(bracode, 1);
|
||||||
|
}
|
||||||
|
while (*bracode == OP_ALT);
|
||||||
|
return !entered_a_group; /* Not variable length */
|
||||||
|
|
||||||
|
/* Non-atomic assertions - don't possessify last iterator. This needs
|
||||||
|
more thought. */
|
||||||
|
|
||||||
|
case OP_ASSERT_NA:
|
||||||
|
case OP_ASSERTBACK_NA:
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Skip over the bracket and inspect what comes next. */
|
||||||
|
|
||||||
|
code += PRIV(OP_lengths)[c];
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Handle cases where the next item is a group. */
|
||||||
|
|
||||||
|
case OP_ONCE:
|
||||||
|
case OP_BRA:
|
||||||
|
case OP_CBRA:
|
||||||
|
next_code = code + GET(code, 1);
|
||||||
|
code += PRIV(OP_lengths)[c];
|
||||||
|
|
||||||
|
/* Check each branch. We have to recurse a level for all but the last
|
||||||
|
branch. */
|
||||||
|
|
||||||
|
while (*next_code == OP_ALT)
|
||||||
|
{
|
||||||
|
if (!compare_opcodes(code, utf, ucp, cb, base_list, base_end, rec_limit))
|
||||||
|
return FALSE;
|
||||||
|
code = next_code + 1 + LINK_SIZE;
|
||||||
|
next_code += GET(next_code, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
entered_a_group = TRUE;
|
||||||
|
continue;
|
||||||
|
|
||||||
|
case OP_BRAZERO:
|
||||||
|
case OP_BRAMINZERO:
|
||||||
|
|
||||||
|
next_code = code + 1;
|
||||||
|
if (*next_code != OP_BRA && *next_code != OP_CBRA &&
|
||||||
|
*next_code != OP_ONCE) return FALSE;
|
||||||
|
|
||||||
|
do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
|
||||||
|
|
||||||
|
/* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
|
||||||
|
|
||||||
|
next_code += 1 + LINK_SIZE;
|
||||||
|
if (!compare_opcodes(next_code, utf, ucp, cb, base_list, base_end,
|
||||||
|
rec_limit))
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
code += PRIV(OP_lengths)[c];
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* The next opcode does not need special handling; fall through and use it
|
||||||
|
to see if the base can be possessified. */
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We now have the next appropriate opcode to compare with the base. Check
|
||||||
|
for a supported opcode, and load its properties. */
|
||||||
|
|
||||||
|
code = get_chr_property_list(code, utf, ucp, cb->fcc, list);
|
||||||
|
if (code == NULL) return FALSE; /* Unsupported */
|
||||||
|
|
||||||
|
/* If either opcode is a small character list, set pointers for comparing
|
||||||
|
characters from that list with another list, or with a property. */
|
||||||
|
|
||||||
|
if (base_list[0] == OP_CHAR)
|
||||||
|
{
|
||||||
|
chr_ptr = base_list + 2;
|
||||||
|
list_ptr = list;
|
||||||
|
}
|
||||||
|
else if (list[0] == OP_CHAR)
|
||||||
|
{
|
||||||
|
chr_ptr = list + 2;
|
||||||
|
list_ptr = base_list;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Character bitsets can also be compared to certain opcodes. */
|
||||||
|
|
||||||
|
else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
/* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */
|
||||||
|
|| (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
|
||||||
|
#endif
|
||||||
|
)
|
||||||
|
{
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
|
||||||
|
#else
|
||||||
|
if (base_list[0] == OP_CLASS)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
set1 = (const uint8_t *)(base_end - base_list[2]);
|
||||||
|
list_ptr = list;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
set1 = (const uint8_t *)(code - list[2]);
|
||||||
|
list_ptr = base_list;
|
||||||
|
}
|
||||||
|
|
||||||
|
invert_bits = FALSE;
|
||||||
|
switch(list_ptr[0])
|
||||||
|
{
|
||||||
|
case OP_CLASS:
|
||||||
|
case OP_NCLASS:
|
||||||
|
set2 = (const uint8_t *)
|
||||||
|
((list_ptr == list ? code : base_end) - list_ptr[2]);
|
||||||
|
break;
|
||||||
|
|
||||||
|
#ifdef SUPPORT_WIDE_CHARS
|
||||||
|
case OP_XCLASS:
|
||||||
|
xclass_flags = (list_ptr == list ? code : base_end) -
|
||||||
|
list_ptr[2] + LINK_SIZE;
|
||||||
|
if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE;
|
||||||
|
if ((*xclass_flags & XCL_MAP) == 0)
|
||||||
|
{
|
||||||
|
/* No bits are set for characters < 256. */
|
||||||
|
if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0;
|
||||||
|
/* Might be an empty repeat. */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
set2 = (const uint8_t *)(xclass_flags + 1);
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
case OP_NOT_DIGIT:
|
||||||
|
invert_bits = TRUE;
|
||||||
|
/* Fall through */
|
||||||
|
case OP_DIGIT:
|
||||||
|
set2 = (const uint8_t *)(cb->cbits + cbit_digit);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_NOT_WHITESPACE:
|
||||||
|
invert_bits = TRUE;
|
||||||
|
/* Fall through */
|
||||||
|
case OP_WHITESPACE:
|
||||||
|
set2 = (const uint8_t *)(cb->cbits + cbit_space);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_NOT_WORDCHAR:
|
||||||
|
invert_bits = TRUE;
|
||||||
|
/* Fall through */
|
||||||
|
case OP_WORDCHAR:
|
||||||
|
set2 = (const uint8_t *)(cb->cbits + cbit_word);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Because the bit sets are unaligned bytes, we need to perform byte
|
||||||
|
comparison here. */
|
||||||
|
|
||||||
|
set_end = set1 + 32;
|
||||||
|
if (invert_bits)
|
||||||
|
{
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if ((*set1++ & ~(*set2++)) != 0) return FALSE;
|
||||||
|
}
|
||||||
|
while (set1 < set_end);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if ((*set1++ & *set2++) != 0) return FALSE;
|
||||||
|
}
|
||||||
|
while (set1 < set_end);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (list[1] == 0) return TRUE;
|
||||||
|
/* Might be an empty repeat. */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Some property combinations also acceptable. Unicode property opcodes are
|
||||||
|
processed specially; the rest can be handled with a lookup table. */
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
uint32_t leftop, rightop;
|
||||||
|
|
||||||
|
leftop = base_list[0];
|
||||||
|
rightop = list[0];
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
accepted = FALSE; /* Always set in non-unicode case. */
|
||||||
|
if (leftop == OP_PROP || leftop == OP_NOTPROP)
|
||||||
|
{
|
||||||
|
if (rightop == OP_EOD)
|
||||||
|
accepted = TRUE;
|
||||||
|
else if (rightop == OP_PROP || rightop == OP_NOTPROP)
|
||||||
|
{
|
||||||
|
int n;
|
||||||
|
const uint8_t *p;
|
||||||
|
BOOL same = leftop == rightop;
|
||||||
|
BOOL lisprop = leftop == OP_PROP;
|
||||||
|
BOOL risprop = rightop == OP_PROP;
|
||||||
|
BOOL bothprop = lisprop && risprop;
|
||||||
|
|
||||||
|
/* There's a table that specifies how each combination is to be
|
||||||
|
processed:
|
||||||
|
0 Always return FALSE (never auto-possessify)
|
||||||
|
1 Character groups are distinct (possessify if both are OP_PROP)
|
||||||
|
2 Check character categories in the same group (general or particular)
|
||||||
|
3 Return TRUE if the two opcodes are not the same
|
||||||
|
... see comments below
|
||||||
|
*/
|
||||||
|
|
||||||
|
n = propposstab[base_list[2]][list[2]];
|
||||||
|
switch(n)
|
||||||
|
{
|
||||||
|
case 0: break;
|
||||||
|
case 1: accepted = bothprop; break;
|
||||||
|
case 2: accepted = (base_list[3] == list[3]) != same; break;
|
||||||
|
case 3: accepted = !same; break;
|
||||||
|
|
||||||
|
case 4: /* Left general category, right particular category */
|
||||||
|
accepted = risprop && catposstab[base_list[3]][list[3]] == same;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 5: /* Right general category, left particular category */
|
||||||
|
accepted = lisprop && catposstab[list[3]][base_list[3]] == same;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* This code is logically tricky. Think hard before fiddling with it.
|
||||||
|
The posspropstab table has four entries per row. Each row relates to
|
||||||
|
one of PCRE's special properties such as ALNUM or SPACE or WORD.
|
||||||
|
Only WORD actually needs all four entries, but using repeats for the
|
||||||
|
others means they can all use the same code below.
|
||||||
|
|
||||||
|
The first two entries in each row are Unicode general categories, and
|
||||||
|
apply always, because all the characters they include are part of the
|
||||||
|
PCRE character set. The third and fourth entries are a general and a
|
||||||
|
particular category, respectively, that include one or more relevant
|
||||||
|
characters. One or the other is used, depending on whether the check
|
||||||
|
is for a general or a particular category. However, in both cases the
|
||||||
|
category contains more characters than the specials that are defined
|
||||||
|
for the property being tested against. Therefore, it cannot be used
|
||||||
|
in a NOTPROP case.
|
||||||
|
|
||||||
|
Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po.
|
||||||
|
Underscore is covered by ucp_P or ucp_Po. */
|
||||||
|
|
||||||
|
case 6: /* Left alphanum vs right general category */
|
||||||
|
case 7: /* Left space vs right general category */
|
||||||
|
case 8: /* Left word vs right general category */
|
||||||
|
p = posspropstab[n-6];
|
||||||
|
accepted = risprop && lisprop ==
|
||||||
|
(list[3] != p[0] &&
|
||||||
|
list[3] != p[1] &&
|
||||||
|
(list[3] != p[2] || !lisprop));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 9: /* Right alphanum vs left general category */
|
||||||
|
case 10: /* Right space vs left general category */
|
||||||
|
case 11: /* Right word vs left general category */
|
||||||
|
p = posspropstab[n-9];
|
||||||
|
accepted = lisprop && risprop ==
|
||||||
|
(base_list[3] != p[0] &&
|
||||||
|
base_list[3] != p[1] &&
|
||||||
|
(base_list[3] != p[2] || !risprop));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 12: /* Left alphanum vs right particular category */
|
||||||
|
case 13: /* Left space vs right particular category */
|
||||||
|
case 14: /* Left word vs right particular category */
|
||||||
|
p = posspropstab[n-12];
|
||||||
|
accepted = risprop && lisprop ==
|
||||||
|
(catposstab[p[0]][list[3]] &&
|
||||||
|
catposstab[p[1]][list[3]] &&
|
||||||
|
(list[3] != p[3] || !lisprop));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 15: /* Right alphanum vs left particular category */
|
||||||
|
case 16: /* Right space vs left particular category */
|
||||||
|
case 17: /* Right word vs left particular category */
|
||||||
|
p = posspropstab[n-15];
|
||||||
|
accepted = lisprop && risprop ==
|
||||||
|
(catposstab[p[0]][base_list[3]] &&
|
||||||
|
catposstab[p[1]][base_list[3]] &&
|
||||||
|
(base_list[3] != p[3] || !risprop));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
else
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
|
||||||
|
rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
|
||||||
|
autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
|
||||||
|
|
||||||
|
if (!accepted) return FALSE;
|
||||||
|
|
||||||
|
if (list[1] == 0) return TRUE;
|
||||||
|
/* Might be an empty repeat. */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Control reaches here only if one of the items is a small character list.
|
||||||
|
All characters are checked against the other side. */
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
chr = *chr_ptr;
|
||||||
|
|
||||||
|
switch(list_ptr[0])
|
||||||
|
{
|
||||||
|
case OP_CHAR:
|
||||||
|
ochr_ptr = list_ptr + 2;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if (chr == *ochr_ptr) return FALSE;
|
||||||
|
ochr_ptr++;
|
||||||
|
}
|
||||||
|
while(*ochr_ptr != NOTACHAR);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_NOT:
|
||||||
|
ochr_ptr = list_ptr + 2;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if (chr == *ochr_ptr)
|
||||||
|
break;
|
||||||
|
ochr_ptr++;
|
||||||
|
}
|
||||||
|
while(*ochr_ptr != NOTACHAR);
|
||||||
|
if (*ochr_ptr == NOTACHAR) return FALSE; /* Not found */
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Note that OP_DIGIT etc. are generated only when PCRE2_UCP is *not*
|
||||||
|
set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
|
||||||
|
|
||||||
|
case OP_DIGIT:
|
||||||
|
if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_NOT_DIGIT:
|
||||||
|
if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_WHITESPACE:
|
||||||
|
if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_NOT_WHITESPACE:
|
||||||
|
if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_WORDCHAR:
|
||||||
|
if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_NOT_WORDCHAR:
|
||||||
|
if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_HSPACE:
|
||||||
|
switch(chr)
|
||||||
|
{
|
||||||
|
HSPACE_CASES: return FALSE;
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_NOT_HSPACE:
|
||||||
|
switch(chr)
|
||||||
|
{
|
||||||
|
HSPACE_CASES: break;
|
||||||
|
default: return FALSE;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_ANYNL:
|
||||||
|
case OP_VSPACE:
|
||||||
|
switch(chr)
|
||||||
|
{
|
||||||
|
VSPACE_CASES: return FALSE;
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_NOT_VSPACE:
|
||||||
|
switch(chr)
|
||||||
|
{
|
||||||
|
VSPACE_CASES: break;
|
||||||
|
default: return FALSE;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_DOLL:
|
||||||
|
case OP_EODN:
|
||||||
|
switch (chr)
|
||||||
|
{
|
||||||
|
case CHAR_CR:
|
||||||
|
case CHAR_LF:
|
||||||
|
case CHAR_VT:
|
||||||
|
case CHAR_FF:
|
||||||
|
case CHAR_NEL:
|
||||||
|
#ifndef EBCDIC
|
||||||
|
case 0x2028:
|
||||||
|
case 0x2029:
|
||||||
|
#endif /* Not EBCDIC */
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_EOD: /* Can always possessify before \z */
|
||||||
|
break;
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
case OP_PROP:
|
||||||
|
case OP_NOTPROP:
|
||||||
|
if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
|
||||||
|
list_ptr[0] == OP_NOTPROP))
|
||||||
|
return FALSE;
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
case OP_NCLASS:
|
||||||
|
if (chr > 255) return FALSE;
|
||||||
|
/* Fall through */
|
||||||
|
|
||||||
|
case OP_CLASS:
|
||||||
|
if (chr > 255) break;
|
||||||
|
class_bitset = (const uint8_t *)
|
||||||
|
((list_ptr == list ? code : base_end) - list_ptr[2]);
|
||||||
|
if ((class_bitset[chr >> 3] & (1u << (chr & 7))) != 0) return FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
#ifdef SUPPORT_WIDE_CHARS
|
||||||
|
case OP_XCLASS:
|
||||||
|
if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
|
||||||
|
list_ptr[2] + LINK_SIZE, (const uint8_t*)cb->start_code, utf))
|
||||||
|
return FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_ECLASS:
|
||||||
|
if (PRIV(eclass)(chr,
|
||||||
|
(list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE,
|
||||||
|
(list_ptr == list ? code : base_end) - list_ptr[3],
|
||||||
|
(const uint8_t*)cb->start_code, utf))
|
||||||
|
return FALSE;
|
||||||
|
break;
|
||||||
|
#endif /* SUPPORT_WIDE_CHARS */
|
||||||
|
|
||||||
|
default:
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
chr_ptr++;
|
||||||
|
}
|
||||||
|
while(*chr_ptr != NOTACHAR);
|
||||||
|
|
||||||
|
/* At least one character must be matched from this opcode. */
|
||||||
|
|
||||||
|
if (list[1] == 0) return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
|
||||||
|
return FALSE; /* Avoid compiler warnings */
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Scan compiled regex for auto-possession *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* Replaces single character iterations with their possessive alternatives
|
||||||
|
if appropriate. This function modifies the compiled opcode! Hitting a
|
||||||
|
non-existent opcode may indicate a bug in PCRE2, but it can also be caused if a
|
||||||
|
bad UTF string was compiled with PCRE2_NO_UTF_CHECK. The rec_limit catches
|
||||||
|
overly complicated or large patterns. In these cases, the check just stops,
|
||||||
|
leaving the remainder of the pattern unpossessified.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
code points to start of the byte code
|
||||||
|
cb compile data block
|
||||||
|
|
||||||
|
Returns: 0 for success
|
||||||
|
-1 if a non-existant opcode is encountered
|
||||||
|
*/
|
||||||
|
|
||||||
|
int
|
||||||
|
PRIV(auto_possessify)(PCRE2_UCHAR *code, const compile_block *cb)
|
||||||
|
{
|
||||||
|
PCRE2_UCHAR c;
|
||||||
|
PCRE2_SPTR end;
|
||||||
|
PCRE2_UCHAR *repeat_opcode;
|
||||||
|
uint32_t list[MAX_LIST];
|
||||||
|
int rec_limit = 1000; /* Was 10,000 but clang+ASAN uses a lot of stack. */
|
||||||
|
BOOL utf = (cb->external_options & PCRE2_UTF) != 0;
|
||||||
|
BOOL ucp = (cb->external_options & PCRE2_UCP) != 0;
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
c = *code;
|
||||||
|
|
||||||
|
if (c >= OP_TABLE_LENGTH)
|
||||||
|
{
|
||||||
|
PCRE2_DEBUG_UNREACHABLE();
|
||||||
|
return -1; /* Something gone wrong */
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
|
||||||
|
{
|
||||||
|
c -= get_repeat_base(c) - OP_STAR;
|
||||||
|
end = (c <= OP_MINUPTO) ?
|
||||||
|
get_chr_property_list(code, utf, ucp, cb->fcc, list) : NULL;
|
||||||
|
list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
|
||||||
|
|
||||||
|
if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end,
|
||||||
|
&rec_limit))
|
||||||
|
{
|
||||||
|
switch(c)
|
||||||
|
{
|
||||||
|
case OP_STAR:
|
||||||
|
*code += OP_POSSTAR - OP_STAR;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_MINSTAR:
|
||||||
|
*code += OP_POSSTAR - OP_MINSTAR;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_PLUS:
|
||||||
|
*code += OP_POSPLUS - OP_PLUS;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_MINPLUS:
|
||||||
|
*code += OP_POSPLUS - OP_MINPLUS;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_QUERY:
|
||||||
|
*code += OP_POSQUERY - OP_QUERY;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_MINQUERY:
|
||||||
|
*code += OP_POSQUERY - OP_MINQUERY;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_UPTO:
|
||||||
|
*code += OP_POSUPTO - OP_UPTO;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_MINUPTO:
|
||||||
|
*code += OP_POSUPTO - OP_MINUPTO;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
c = *code;
|
||||||
|
}
|
||||||
|
else if (c == OP_CLASS || c == OP_NCLASS
|
||||||
|
#ifdef SUPPORT_WIDE_CHARS
|
||||||
|
|| c == OP_XCLASS || c == OP_ECLASS
|
||||||
|
#endif
|
||||||
|
)
|
||||||
|
{
|
||||||
|
#ifdef SUPPORT_WIDE_CHARS
|
||||||
|
if (c == OP_XCLASS || c == OP_ECLASS)
|
||||||
|
repeat_opcode = code + GET(code, 1);
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
repeat_opcode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
|
||||||
|
|
||||||
|
c = *repeat_opcode;
|
||||||
|
if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
|
||||||
|
{
|
||||||
|
/* The return from get_chr_property_list() will never be NULL when
|
||||||
|
*code (aka c) is one of the four class opcodes. However, gcc with
|
||||||
|
-fanalyzer notes that a NULL return is possible, and grumbles. Hence we
|
||||||
|
put in a check. */
|
||||||
|
|
||||||
|
end = get_chr_property_list(code, utf, ucp, cb->fcc, list);
|
||||||
|
list[1] = (c & 1) == 0;
|
||||||
|
|
||||||
|
if (end != NULL &&
|
||||||
|
compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit))
|
||||||
|
{
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
case OP_CRSTAR:
|
||||||
|
case OP_CRMINSTAR:
|
||||||
|
*repeat_opcode = OP_CRPOSSTAR;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_CRPLUS:
|
||||||
|
case OP_CRMINPLUS:
|
||||||
|
*repeat_opcode = OP_CRPOSPLUS;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_CRQUERY:
|
||||||
|
case OP_CRMINQUERY:
|
||||||
|
*repeat_opcode = OP_CRPOSQUERY;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_CRRANGE:
|
||||||
|
case OP_CRMINRANGE:
|
||||||
|
*repeat_opcode = OP_CRPOSRANGE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
c = *code;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch(c)
|
||||||
|
{
|
||||||
|
case OP_END:
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
case OP_TYPESTAR:
|
||||||
|
case OP_TYPEMINSTAR:
|
||||||
|
case OP_TYPEPLUS:
|
||||||
|
case OP_TYPEMINPLUS:
|
||||||
|
case OP_TYPEQUERY:
|
||||||
|
case OP_TYPEMINQUERY:
|
||||||
|
case OP_TYPEPOSSTAR:
|
||||||
|
case OP_TYPEPOSPLUS:
|
||||||
|
case OP_TYPEPOSQUERY:
|
||||||
|
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_TYPEUPTO:
|
||||||
|
case OP_TYPEMINUPTO:
|
||||||
|
case OP_TYPEEXACT:
|
||||||
|
case OP_TYPEPOSUPTO:
|
||||||
|
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
|
||||||
|
code += 2;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_CALLOUT_STR:
|
||||||
|
code += GET(code, 1 + 2*LINK_SIZE);
|
||||||
|
break;
|
||||||
|
|
||||||
|
#ifdef SUPPORT_WIDE_CHARS
|
||||||
|
case OP_XCLASS:
|
||||||
|
case OP_ECLASS:
|
||||||
|
code += GET(code, 1);
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
case OP_MARK:
|
||||||
|
case OP_COMMIT_ARG:
|
||||||
|
case OP_PRUNE_ARG:
|
||||||
|
case OP_SKIP_ARG:
|
||||||
|
case OP_THEN_ARG:
|
||||||
|
code += code[1];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add in the fixed length from the table */
|
||||||
|
|
||||||
|
code += PRIV(OP_lengths)[c];
|
||||||
|
|
||||||
|
/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
|
||||||
|
followed by a multi-byte character. The length in the table is a minimum, so
|
||||||
|
we have to arrange to skip the extra code units. */
|
||||||
|
|
||||||
|
#ifdef MAYBE_UTF_MULTI
|
||||||
|
if (utf) switch(c)
|
||||||
|
{
|
||||||
|
case OP_CHAR:
|
||||||
|
case OP_CHARI:
|
||||||
|
case OP_NOT:
|
||||||
|
case OP_NOTI:
|
||||||
|
case OP_STAR:
|
||||||
|
case OP_MINSTAR:
|
||||||
|
case OP_PLUS:
|
||||||
|
case OP_MINPLUS:
|
||||||
|
case OP_QUERY:
|
||||||
|
case OP_MINQUERY:
|
||||||
|
case OP_UPTO:
|
||||||
|
case OP_MINUPTO:
|
||||||
|
case OP_EXACT:
|
||||||
|
case OP_POSSTAR:
|
||||||
|
case OP_POSPLUS:
|
||||||
|
case OP_POSQUERY:
|
||||||
|
case OP_POSUPTO:
|
||||||
|
case OP_STARI:
|
||||||
|
case OP_MINSTARI:
|
||||||
|
case OP_PLUSI:
|
||||||
|
case OP_MINPLUSI:
|
||||||
|
case OP_QUERYI:
|
||||||
|
case OP_MINQUERYI:
|
||||||
|
case OP_UPTOI:
|
||||||
|
case OP_MINUPTOI:
|
||||||
|
case OP_EXACTI:
|
||||||
|
case OP_POSSTARI:
|
||||||
|
case OP_POSPLUSI:
|
||||||
|
case OP_POSQUERYI:
|
||||||
|
case OP_POSUPTOI:
|
||||||
|
case OP_NOTSTAR:
|
||||||
|
case OP_NOTMINSTAR:
|
||||||
|
case OP_NOTPLUS:
|
||||||
|
case OP_NOTMINPLUS:
|
||||||
|
case OP_NOTQUERY:
|
||||||
|
case OP_NOTMINQUERY:
|
||||||
|
case OP_NOTUPTO:
|
||||||
|
case OP_NOTMINUPTO:
|
||||||
|
case OP_NOTEXACT:
|
||||||
|
case OP_NOTPOSSTAR:
|
||||||
|
case OP_NOTPOSPLUS:
|
||||||
|
case OP_NOTPOSQUERY:
|
||||||
|
case OP_NOTPOSUPTO:
|
||||||
|
case OP_NOTSTARI:
|
||||||
|
case OP_NOTMINSTARI:
|
||||||
|
case OP_NOTPLUSI:
|
||||||
|
case OP_NOTMINPLUSI:
|
||||||
|
case OP_NOTQUERYI:
|
||||||
|
case OP_NOTMINQUERYI:
|
||||||
|
case OP_NOTUPTOI:
|
||||||
|
case OP_NOTMINUPTOI:
|
||||||
|
case OP_NOTEXACTI:
|
||||||
|
case OP_NOTPOSSTARI:
|
||||||
|
case OP_NOTPOSPLUSI:
|
||||||
|
case OP_NOTPOSQUERYI:
|
||||||
|
case OP_NOTPOSUPTOI:
|
||||||
|
if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
(void)(utf); /* Keep compiler happy by referencing function argument */
|
||||||
|
#endif /* SUPPORT_WIDE_CHARS */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre2_auto_possess.c */
|
||||||
196
3rd/pcre2/src/pcre2_chartables.c.dist
Normal file
196
3rd/pcre2/src/pcre2_chartables.c.dist
Normal file
@@ -0,0 +1,196 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This file was automatically written by the pcre2_dftables auxiliary
|
||||||
|
program. It contains character tables that are used when no external
|
||||||
|
tables are passed to PCRE2 by the application that calls it. The tables
|
||||||
|
are used only for characters whose code values are less than 256, and
|
||||||
|
only relevant if not in UCP mode. */
|
||||||
|
|
||||||
|
/* This set of tables was written in the C locale. */
|
||||||
|
|
||||||
|
/* The pcre2_ftables program (which is distributed with PCRE2) can be used
|
||||||
|
to build alternative versions of this file. This is necessary if you are
|
||||||
|
running in an EBCDIC environment, or if you want to default to a different
|
||||||
|
encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates
|
||||||
|
these tables in the "C" locale by default. This happens automatically if
|
||||||
|
PCRE2 is configured with --enable-rebuild-chartables. However, you can run
|
||||||
|
pcre2_dftables manually with the -L option to build tables using the LC_ALL
|
||||||
|
locale. */
|
||||||
|
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "pcre2_internal.h"
|
||||||
|
|
||||||
|
const uint8_t PRIV(default_tables)[] = {
|
||||||
|
|
||||||
|
/* This table is a lower casing table. */
|
||||||
|
|
||||||
|
0, 1, 2, 3, 4, 5, 6, 7,
|
||||||
|
8, 9, 10, 11, 12, 13, 14, 15,
|
||||||
|
16, 17, 18, 19, 20, 21, 22, 23,
|
||||||
|
24, 25, 26, 27, 28, 29, 30, 31,
|
||||||
|
32, 33, 34, 35, 36, 37, 38, 39,
|
||||||
|
40, 41, 42, 43, 44, 45, 46, 47,
|
||||||
|
48, 49, 50, 51, 52, 53, 54, 55,
|
||||||
|
56, 57, 58, 59, 60, 61, 62, 63,
|
||||||
|
64, 97, 98, 99,100,101,102,103,
|
||||||
|
104,105,106,107,108,109,110,111,
|
||||||
|
112,113,114,115,116,117,118,119,
|
||||||
|
120,121,122, 91, 92, 93, 94, 95,
|
||||||
|
96, 97, 98, 99,100,101,102,103,
|
||||||
|
104,105,106,107,108,109,110,111,
|
||||||
|
112,113,114,115,116,117,118,119,
|
||||||
|
120,121,122,123,124,125,126,127,
|
||||||
|
128,129,130,131,132,133,134,135,
|
||||||
|
136,137,138,139,140,141,142,143,
|
||||||
|
144,145,146,147,148,149,150,151,
|
||||||
|
152,153,154,155,156,157,158,159,
|
||||||
|
160,161,162,163,164,165,166,167,
|
||||||
|
168,169,170,171,172,173,174,175,
|
||||||
|
176,177,178,179,180,181,182,183,
|
||||||
|
184,185,186,187,188,189,190,191,
|
||||||
|
192,193,194,195,196,197,198,199,
|
||||||
|
200,201,202,203,204,205,206,207,
|
||||||
|
208,209,210,211,212,213,214,215,
|
||||||
|
216,217,218,219,220,221,222,223,
|
||||||
|
224,225,226,227,228,229,230,231,
|
||||||
|
232,233,234,235,236,237,238,239,
|
||||||
|
240,241,242,243,244,245,246,247,
|
||||||
|
248,249,250,251,252,253,254,255,
|
||||||
|
|
||||||
|
/* This table is a case flipping table. */
|
||||||
|
|
||||||
|
0, 1, 2, 3, 4, 5, 6, 7,
|
||||||
|
8, 9, 10, 11, 12, 13, 14, 15,
|
||||||
|
16, 17, 18, 19, 20, 21, 22, 23,
|
||||||
|
24, 25, 26, 27, 28, 29, 30, 31,
|
||||||
|
32, 33, 34, 35, 36, 37, 38, 39,
|
||||||
|
40, 41, 42, 43, 44, 45, 46, 47,
|
||||||
|
48, 49, 50, 51, 52, 53, 54, 55,
|
||||||
|
56, 57, 58, 59, 60, 61, 62, 63,
|
||||||
|
64, 97, 98, 99,100,101,102,103,
|
||||||
|
104,105,106,107,108,109,110,111,
|
||||||
|
112,113,114,115,116,117,118,119,
|
||||||
|
120,121,122, 91, 92, 93, 94, 95,
|
||||||
|
96, 65, 66, 67, 68, 69, 70, 71,
|
||||||
|
72, 73, 74, 75, 76, 77, 78, 79,
|
||||||
|
80, 81, 82, 83, 84, 85, 86, 87,
|
||||||
|
88, 89, 90,123,124,125,126,127,
|
||||||
|
128,129,130,131,132,133,134,135,
|
||||||
|
136,137,138,139,140,141,142,143,
|
||||||
|
144,145,146,147,148,149,150,151,
|
||||||
|
152,153,154,155,156,157,158,159,
|
||||||
|
160,161,162,163,164,165,166,167,
|
||||||
|
168,169,170,171,172,173,174,175,
|
||||||
|
176,177,178,179,180,181,182,183,
|
||||||
|
184,185,186,187,188,189,190,191,
|
||||||
|
192,193,194,195,196,197,198,199,
|
||||||
|
200,201,202,203,204,205,206,207,
|
||||||
|
208,209,210,211,212,213,214,215,
|
||||||
|
216,217,218,219,220,221,222,223,
|
||||||
|
224,225,226,227,228,229,230,231,
|
||||||
|
232,233,234,235,236,237,238,239,
|
||||||
|
240,241,242,243,244,245,246,247,
|
||||||
|
248,249,250,251,252,253,254,255,
|
||||||
|
|
||||||
|
/* This table contains bit maps for various character classes. Each map is 32
|
||||||
|
bytes long and the bits run from the least significant end of each byte. The
|
||||||
|
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
|
||||||
|
graph, print, punct, and cntrl. Other classes are built from combinations. */
|
||||||
|
|
||||||
|
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, /* space */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* xdigit */
|
||||||
|
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* digit */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* upper */
|
||||||
|
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* lower */
|
||||||
|
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* word */
|
||||||
|
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
|
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, /* graph */
|
||||||
|
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
|
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, /* print */
|
||||||
|
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
|
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, /* punct */
|
||||||
|
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
|
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, /* cntrl */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
|
/* This table identifies various classes of character by individual bits:
|
||||||
|
0x01 white space character
|
||||||
|
0x02 letter
|
||||||
|
0x04 lower case letter
|
||||||
|
0x08 decimal digit
|
||||||
|
0x10 word (alphanumeric or '_')
|
||||||
|
*/
|
||||||
|
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
|
||||||
|
0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
|
||||||
|
0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */
|
||||||
|
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, /* 0 - 7 */
|
||||||
|
0x18,0x18,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */
|
||||||
|
0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* @ - G */
|
||||||
|
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
|
||||||
|
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
|
||||||
|
0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x10, /* X - _ */
|
||||||
|
0x00,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* ` - g */
|
||||||
|
0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* h - o */
|
||||||
|
0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* p - w */
|
||||||
|
0x16,0x16,0x16,0x00,0x00,0x00,0x00,0x00, /* x -127 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
|
||||||
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
|
||||||
|
|
||||||
|
/* End of pcre2_chartables.c */
|
||||||
94
3rd/pcre2/src/pcre2_chkdint.c
Normal file
94
3rd/pcre2/src/pcre2_chkdint.c
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Copyright (c) 2023 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* This file contains functions to implement checked integer operation */
|
||||||
|
|
||||||
|
#ifndef PCRE2_PCRE2TEST
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "pcre2_internal.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Checked Integer Multiplication *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/*
|
||||||
|
Arguments:
|
||||||
|
r A pointer to PCRE2_SIZE to store the answer
|
||||||
|
a, b Two integers
|
||||||
|
|
||||||
|
Returns: Bool indicating if the operation overflows
|
||||||
|
|
||||||
|
It is modeled after C23's <stdckdint.h> interface
|
||||||
|
The INT64_OR_DOUBLE type is a 64-bit integer type when available,
|
||||||
|
otherwise double. */
|
||||||
|
|
||||||
|
BOOL
|
||||||
|
PRIV(ckd_smul)(PCRE2_SIZE *r, int a, int b)
|
||||||
|
{
|
||||||
|
#ifdef HAVE_BUILTIN_MUL_OVERFLOW
|
||||||
|
PCRE2_SIZE m;
|
||||||
|
|
||||||
|
if (__builtin_mul_overflow(a, b, &m)) return TRUE;
|
||||||
|
|
||||||
|
*r = m;
|
||||||
|
#else
|
||||||
|
INT64_OR_DOUBLE m;
|
||||||
|
|
||||||
|
PCRE2_ASSERT(a >= 0 && b >= 0);
|
||||||
|
|
||||||
|
m = (INT64_OR_DOUBLE)a * (INT64_OR_DOUBLE)b;
|
||||||
|
|
||||||
|
#if defined INT64_MAX || defined int64_t
|
||||||
|
if (sizeof(m) > sizeof(*r) && m > (INT64_OR_DOUBLE)PCRE2_SIZE_MAX) return TRUE;
|
||||||
|
*r = (PCRE2_SIZE)m;
|
||||||
|
#else
|
||||||
|
if (m > PCRE2_SIZE_MAX) return TRUE;
|
||||||
|
*r = m;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre2_chkdint.c */
|
||||||
11101
3rd/pcre2/src/pcre2_compile.c
Normal file
11101
3rd/pcre2/src/pcre2_compile.c
Normal file
File diff suppressed because it is too large
Load Diff
280
3rd/pcre2/src/pcre2_compile.h
Normal file
280
3rd/pcre2/src/pcre2_compile.h
Normal file
@@ -0,0 +1,280 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE2 is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef PCRE2_COMPILE_H_IDEMPOTENT_GUARD
|
||||||
|
#define PCRE2_COMPILE_H_IDEMPOTENT_GUARD
|
||||||
|
|
||||||
|
#include "pcre2_internal.h"
|
||||||
|
|
||||||
|
/* Compile time error code numbers. They are given names so that they can more
|
||||||
|
easily be tracked. When a new number is added, the tables called eint1 and
|
||||||
|
eint2 in pcre2posix.c may need to be updated, and a new error text must be
|
||||||
|
added to compile_error_texts in pcre2_error.c. Also, the error codes in
|
||||||
|
pcre2.h.in must be updated - their values are exactly 100 greater than these
|
||||||
|
values. */
|
||||||
|
|
||||||
|
enum { ERR0 = COMPILE_ERROR_BASE,
|
||||||
|
ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10,
|
||||||
|
ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
|
||||||
|
ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, ERR30,
|
||||||
|
ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40,
|
||||||
|
ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
|
||||||
|
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
|
||||||
|
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
|
||||||
|
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
|
||||||
|
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
|
||||||
|
ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99, ERR100,
|
||||||
|
ERR101,ERR102,ERR103,ERR104,ERR105,ERR106,ERR107,ERR108,ERR109,ERR110,
|
||||||
|
ERR111,ERR112,ERR113,ERR114,ERR115,ERR116 };
|
||||||
|
|
||||||
|
/* Code values for parsed patterns, which are stored in a vector of 32-bit
|
||||||
|
unsigned ints. Values less than META_END are literal data values. The coding
|
||||||
|
for identifying the item is in the top 16-bits, leaving 16 bits for the
|
||||||
|
additional data that some of them need. The META_CODE, META_DATA, and META_DIFF
|
||||||
|
macros are used to manipulate parsed pattern elements.
|
||||||
|
|
||||||
|
NOTE: When these definitions are changed, the table of extra lengths for each
|
||||||
|
code (meta_extra_lengths) must be updated to remain in step. */
|
||||||
|
|
||||||
|
#define META_END 0x80000000u /* End of pattern */
|
||||||
|
|
||||||
|
#define META_ALT 0x80010000u /* alternation */
|
||||||
|
#define META_ATOMIC 0x80020000u /* atomic group */
|
||||||
|
#define META_BACKREF 0x80030000u /* Back ref */
|
||||||
|
#define META_BACKREF_BYNAME 0x80040000u /* \k'name' */
|
||||||
|
#define META_BIGVALUE 0x80050000u /* Next is a literal > META_END */
|
||||||
|
#define META_CALLOUT_NUMBER 0x80060000u /* (?C with numerical argument */
|
||||||
|
#define META_CALLOUT_STRING 0x80070000u /* (?C with string argument */
|
||||||
|
#define META_CAPTURE 0x80080000u /* Capturing parenthesis */
|
||||||
|
#define META_CIRCUMFLEX 0x80090000u /* ^ metacharacter */
|
||||||
|
#define META_CLASS 0x800a0000u /* start non-empty class */
|
||||||
|
#define META_CLASS_EMPTY 0x800b0000u /* empty class */
|
||||||
|
#define META_CLASS_EMPTY_NOT 0x800c0000u /* negative empty class */
|
||||||
|
#define META_CLASS_END 0x800d0000u /* end of non-empty class */
|
||||||
|
#define META_CLASS_NOT 0x800e0000u /* start non-empty negative class */
|
||||||
|
#define META_COND_ASSERT 0x800f0000u /* (?(?assertion)... */
|
||||||
|
#define META_COND_DEFINE 0x80100000u /* (?(DEFINE)... */
|
||||||
|
#define META_COND_NAME 0x80110000u /* (?(<name>)... */
|
||||||
|
#define META_COND_NUMBER 0x80120000u /* (?(digits)... */
|
||||||
|
#define META_COND_RNAME 0x80130000u /* (?(R&name)... */
|
||||||
|
#define META_COND_RNUMBER 0x80140000u /* (?(Rdigits)... */
|
||||||
|
#define META_COND_VERSION 0x80150000u /* (?(VERSION<op>x.y)... */
|
||||||
|
#define META_OFFSET 0x80160000u /* Setting offset for various
|
||||||
|
META codes (e.g. META_SCS_NAME) */
|
||||||
|
#define META_SCS 0x80170000u /* (*scan_substring:... */
|
||||||
|
#define META_SCS_NAME 0x80180000u /* Next <name> of scan_substring */
|
||||||
|
#define META_SCS_NUMBER 0x80190000u /* Next digits of scan_substring */
|
||||||
|
#define META_DOLLAR 0x801a0000u /* $ metacharacter */
|
||||||
|
#define META_DOT 0x801b0000u /* . metacharacter */
|
||||||
|
#define META_ESCAPE 0x801c0000u /* \d and friends */
|
||||||
|
#define META_KET 0x801d0000u /* closing parenthesis */
|
||||||
|
#define META_NOCAPTURE 0x801e0000u /* no capture parens */
|
||||||
|
#define META_OPTIONS 0x801f0000u /* (?i) and friends */
|
||||||
|
#define META_POSIX 0x80200000u /* POSIX class item */
|
||||||
|
#define META_POSIX_NEG 0x80210000u /* negative POSIX class item */
|
||||||
|
#define META_RANGE_ESCAPED 0x80220000u /* range with at least one escape */
|
||||||
|
#define META_RANGE_LITERAL 0x80230000u /* range defined literally */
|
||||||
|
#define META_RECURSE 0x80240000u /* Recursion */
|
||||||
|
#define META_RECURSE_BYNAME 0x80250000u /* (?&name) */
|
||||||
|
#define META_SCRIPT_RUN 0x80260000u /* (*script_run:...) */
|
||||||
|
|
||||||
|
/* These must be kept together to make it easy to check that an assertion
|
||||||
|
is present where expected in a conditional group. */
|
||||||
|
|
||||||
|
#define META_LOOKAHEAD 0x80270000u /* (?= */
|
||||||
|
#define META_LOOKAHEADNOT 0x80280000u /* (?! */
|
||||||
|
#define META_LOOKBEHIND 0x80290000u /* (?<= */
|
||||||
|
#define META_LOOKBEHINDNOT 0x802a0000u /* (?<! */
|
||||||
|
|
||||||
|
/* These cannot be conditions */
|
||||||
|
|
||||||
|
#define META_LOOKAHEAD_NA 0x802b0000u /* (*napla: */
|
||||||
|
#define META_LOOKBEHIND_NA 0x802c0000u /* (*naplb: */
|
||||||
|
|
||||||
|
/* These must be kept in this order, with consecutive values, and the _ARG
|
||||||
|
versions of COMMIT, PRUNE, SKIP, and THEN immediately after their non-argument
|
||||||
|
versions. */
|
||||||
|
|
||||||
|
#define META_MARK 0x802d0000u /* (*MARK) */
|
||||||
|
#define META_ACCEPT 0x802e0000u /* (*ACCEPT) */
|
||||||
|
#define META_FAIL 0x802f0000u /* (*FAIL) */
|
||||||
|
#define META_COMMIT 0x80300000u /* These */
|
||||||
|
#define META_COMMIT_ARG 0x80310000u /* pairs */
|
||||||
|
#define META_PRUNE 0x80320000u /* must */
|
||||||
|
#define META_PRUNE_ARG 0x80330000u /* be */
|
||||||
|
#define META_SKIP 0x80340000u /* kept */
|
||||||
|
#define META_SKIP_ARG 0x80350000u /* in */
|
||||||
|
#define META_THEN 0x80360000u /* this */
|
||||||
|
#define META_THEN_ARG 0x80370000u /* order */
|
||||||
|
|
||||||
|
/* These must be kept in groups of adjacent 3 values, and all together. */
|
||||||
|
|
||||||
|
#define META_ASTERISK 0x80380000u /* * */
|
||||||
|
#define META_ASTERISK_PLUS 0x80390000u /* *+ */
|
||||||
|
#define META_ASTERISK_QUERY 0x803a0000u /* *? */
|
||||||
|
#define META_PLUS 0x803b0000u /* + */
|
||||||
|
#define META_PLUS_PLUS 0x803c0000u /* ++ */
|
||||||
|
#define META_PLUS_QUERY 0x803d0000u /* +? */
|
||||||
|
#define META_QUERY 0x803e0000u /* ? */
|
||||||
|
#define META_QUERY_PLUS 0x803f0000u /* ?+ */
|
||||||
|
#define META_QUERY_QUERY 0x80400000u /* ?? */
|
||||||
|
#define META_MINMAX 0x80410000u /* {n,m} repeat */
|
||||||
|
#define META_MINMAX_PLUS 0x80420000u /* {n,m}+ repeat */
|
||||||
|
#define META_MINMAX_QUERY 0x80430000u /* {n,m}? repeat */
|
||||||
|
|
||||||
|
/* These meta codes must be kept in a group, with the OR/SUB/XOR in
|
||||||
|
this order, and AND/NOT at the start/end. */
|
||||||
|
|
||||||
|
#define META_ECLASS_AND 0x80440000u /* && (or &) in a class */
|
||||||
|
#define META_ECLASS_OR 0x80450000u /* || (or |, +) in a class */
|
||||||
|
#define META_ECLASS_SUB 0x80460000u /* -- (or -) in a class */
|
||||||
|
#define META_ECLASS_XOR 0x80470000u /* ~~ (or ^) in a class */
|
||||||
|
#define META_ECLASS_NOT 0x80480000u /* ! in a class */
|
||||||
|
|
||||||
|
/* Convenience aliases. */
|
||||||
|
|
||||||
|
#define META_FIRST_QUANTIFIER META_ASTERISK
|
||||||
|
#define META_LAST_QUANTIFIER META_MINMAX_QUERY
|
||||||
|
|
||||||
|
/* This is a special "meta code" that is used only to distinguish (*asr: from
|
||||||
|
(*sr: in the table of alphabetic assertions. It is never stored in the parsed
|
||||||
|
pattern because (*asr: is turned into (*sr:(*atomic: at that stage. There is
|
||||||
|
therefore no need for it to have a length entry, so use a high value. */
|
||||||
|
|
||||||
|
#define META_ATOMIC_SCRIPT_RUN 0x8fff0000u
|
||||||
|
|
||||||
|
/* Macros for manipulating elements of the parsed pattern vector. */
|
||||||
|
|
||||||
|
#define META_CODE(x) (x & 0xffff0000u)
|
||||||
|
#define META_DATA(x) (x & 0x0000ffffu)
|
||||||
|
#define META_DIFF(x,y) ((x-y)>>16)
|
||||||
|
|
||||||
|
/* Extended class management flags. */
|
||||||
|
|
||||||
|
#define CLASS_IS_ECLASS 0x1
|
||||||
|
|
||||||
|
/* Macro for the highest character value. */
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
#define MAX_UCHAR_VALUE 0xffu
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
#define MAX_UCHAR_VALUE 0xffffu
|
||||||
|
#else
|
||||||
|
#define MAX_UCHAR_VALUE 0xffffffffu
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define GET_MAX_CHAR_VALUE(utf) \
|
||||||
|
((utf) ? MAX_UTF_CODE_POINT : MAX_UCHAR_VALUE)
|
||||||
|
|
||||||
|
/* Macro for setting individual bits in class bitmaps. */
|
||||||
|
|
||||||
|
#define SETBIT(a,b) a[(b) >> 3] |= (uint8_t)(1u << ((b) & 0x7))
|
||||||
|
|
||||||
|
/* Macro for 8 bit specific checks. */
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
#define SELECT_VALUE8(value8, value) (value8)
|
||||||
|
#else
|
||||||
|
#define SELECT_VALUE8(value8, value) (value)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Macro for aligning data. */
|
||||||
|
#define CLIST_ALIGN_TO(base, align) \
|
||||||
|
((base + ((size_t)(align) - 1)) & ~((size_t)(align) - 1))
|
||||||
|
|
||||||
|
/* Structure for holding information about an OP_ECLASS internal operand.
|
||||||
|
An "operand" here could be just a single OP_[X]CLASS, or it could be some
|
||||||
|
complex expression; but it's some sequence of ECL_* codes which pushes one
|
||||||
|
value to the stack. */
|
||||||
|
typedef struct {
|
||||||
|
/* The position of the operand - or NULL if (lengthptr != NULL). */
|
||||||
|
PCRE2_UCHAR *code_start;
|
||||||
|
PCRE2_SIZE length;
|
||||||
|
/* The operand's type if it is a single code (ECL_XCLASS, ECL_ANY, ECL_NONE);
|
||||||
|
otherwise zero if the operand is not atomic. */
|
||||||
|
uint8_t op_single_type;
|
||||||
|
/* Regardless of whether it's a single code or not, we fully constant-fold
|
||||||
|
the bitmap for code points < 256. */
|
||||||
|
class_bits_storage bits;
|
||||||
|
} eclass_op_info;
|
||||||
|
|
||||||
|
/* Macros for the definitions below, to prevent name collisions. */
|
||||||
|
|
||||||
|
#define _pcre2_posix_class_maps PCRE2_SUFFIX(_pcre2_posix_class_maps)
|
||||||
|
#define _pcre2_update_classbits PCRE2_SUFFIX(_pcre2_update_classbits_)
|
||||||
|
#define _pcre2_compile_class_nested PCRE2_SUFFIX(_pcre2_compile_class_nested_)
|
||||||
|
#define _pcre2_compile_class_not_nested PCRE2_SUFFIX(_pcre2_compile_class_not_nested_)
|
||||||
|
|
||||||
|
|
||||||
|
/* Indices of the POSIX classes in posix_names, posix_name_lengths,
|
||||||
|
posix_class_maps, and posix_substitutes. They must be kept in sync. */
|
||||||
|
|
||||||
|
#define PC_DIGIT 7
|
||||||
|
#define PC_GRAPH 8
|
||||||
|
#define PC_PRINT 9
|
||||||
|
#define PC_PUNCT 10
|
||||||
|
#define PC_XDIGIT 13
|
||||||
|
|
||||||
|
extern const int PRIV(posix_class_maps)[];
|
||||||
|
|
||||||
|
|
||||||
|
/* Set bits in classbits according to the property type */
|
||||||
|
|
||||||
|
void PRIV(update_classbits)(uint32_t ptype, uint32_t pdata, BOOL negated,
|
||||||
|
uint8_t *classbits);
|
||||||
|
|
||||||
|
/* Compile the META codes from start_ptr...end_ptr, writing a single OP_CLASS
|
||||||
|
OP_CLASS, OP_NCLASS, OP_XCLASS, or OP_ALLANY into pcode. */
|
||||||
|
|
||||||
|
uint32_t *PRIV(compile_class_not_nested)(uint32_t options, uint32_t xoptions,
|
||||||
|
uint32_t *start_ptr, PCRE2_UCHAR **pcode, BOOL negate_class, BOOL* has_bitmap,
|
||||||
|
int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr);
|
||||||
|
|
||||||
|
/* Compile the META codes in pptr into opcodes written to pcode. The pptr must
|
||||||
|
start at a META_CLASS or META_CLASS_NOT.
|
||||||
|
|
||||||
|
The pptr will be left pointing at the matching META_CLASS_END. */
|
||||||
|
|
||||||
|
BOOL PRIV(compile_class_nested)(uint32_t options, uint32_t xoptions,
|
||||||
|
uint32_t **pptr, PCRE2_UCHAR **pcode, int *errorcodeptr,
|
||||||
|
compile_block *cb, PCRE2_SIZE *lengthptr);
|
||||||
|
|
||||||
|
#endif /* PCRE2_COMPILE_H_IDEMPOTENT_GUARD */
|
||||||
|
|
||||||
|
/* End of pcre2_compile.h */
|
||||||
2737
3rd/pcre2/src/pcre2_compile_class.c
Normal file
2737
3rd/pcre2/src/pcre2_compile_class.c
Normal file
@@ -0,0 +1,2737 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "pcre2_compile.h"
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
/* Option bits for eclass. */
|
||||||
|
uint32_t options;
|
||||||
|
uint32_t xoptions;
|
||||||
|
/* Rarely used members. */
|
||||||
|
int *errorcodeptr;
|
||||||
|
compile_block *cb;
|
||||||
|
/* Bitmap is needed. */
|
||||||
|
BOOL needs_bitmap;
|
||||||
|
} eclass_context;
|
||||||
|
|
||||||
|
/* Checks the allowed tokens at the end of a class structure in debug mode.
|
||||||
|
When a new token is not processed by all loops, and the token is equals to
|
||||||
|
a) one of the cases here:
|
||||||
|
the compiler will complain about a duplicated case value.
|
||||||
|
b) none of the cases here:
|
||||||
|
the loop without the handler will stop with an assertion failure. */
|
||||||
|
|
||||||
|
#ifdef PCRE2_DEBUG
|
||||||
|
#define CLASS_END_CASES(meta) \
|
||||||
|
default: \
|
||||||
|
PCRE2_ASSERT((meta) <= META_END); \
|
||||||
|
/* Fall through */ \
|
||||||
|
case META_CLASS: \
|
||||||
|
case META_CLASS_NOT: \
|
||||||
|
case META_CLASS_EMPTY: \
|
||||||
|
case META_CLASS_EMPTY_NOT: \
|
||||||
|
case META_CLASS_END: \
|
||||||
|
case META_ECLASS_AND: \
|
||||||
|
case META_ECLASS_OR: \
|
||||||
|
case META_ECLASS_SUB: \
|
||||||
|
case META_ECLASS_XOR: \
|
||||||
|
case META_ECLASS_NOT:
|
||||||
|
#else
|
||||||
|
#define CLASS_END_CASES(meta) \
|
||||||
|
default:
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef SUPPORT_WIDE_CHARS
|
||||||
|
|
||||||
|
/* Heapsort algorithm. */
|
||||||
|
|
||||||
|
static void do_heapify(uint32_t *buffer, size_t size, size_t i)
|
||||||
|
{
|
||||||
|
size_t max;
|
||||||
|
size_t left;
|
||||||
|
size_t right;
|
||||||
|
uint32_t tmp1, tmp2;
|
||||||
|
|
||||||
|
while (TRUE)
|
||||||
|
{
|
||||||
|
max = i;
|
||||||
|
left = (i << 1) + 2;
|
||||||
|
right = left + 2;
|
||||||
|
|
||||||
|
if (left < size && buffer[left] > buffer[max]) max = left;
|
||||||
|
if (right < size && buffer[right] > buffer[max]) max = right;
|
||||||
|
if (i == max) return;
|
||||||
|
|
||||||
|
/* Swap items. */
|
||||||
|
tmp1 = buffer[i];
|
||||||
|
tmp2 = buffer[i + 1];
|
||||||
|
buffer[i] = buffer[max];
|
||||||
|
buffer[i + 1] = buffer[max + 1];
|
||||||
|
buffer[max] = tmp1;
|
||||||
|
buffer[max + 1] = tmp2;
|
||||||
|
i = max;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
|
||||||
|
#define PARSE_CLASS_UTF 0x1
|
||||||
|
#define PARSE_CLASS_CASELESS_UTF 0x2
|
||||||
|
#define PARSE_CLASS_RESTRICTED_UTF 0x4
|
||||||
|
#define PARSE_CLASS_TURKISH_UTF 0x8
|
||||||
|
|
||||||
|
/* Get the range of nocase characters which includes the
|
||||||
|
'c' character passed as argument, or directly follows 'c'. */
|
||||||
|
|
||||||
|
static const uint32_t*
|
||||||
|
get_nocase_range(uint32_t c)
|
||||||
|
{
|
||||||
|
uint32_t left = 0;
|
||||||
|
uint32_t right = PRIV(ucd_nocase_ranges_size);
|
||||||
|
uint32_t middle;
|
||||||
|
|
||||||
|
if (c > MAX_UTF_CODE_POINT) return PRIV(ucd_nocase_ranges) + right;
|
||||||
|
|
||||||
|
while (TRUE)
|
||||||
|
{
|
||||||
|
/* Range end of the middle element. */
|
||||||
|
middle = ((left + right) >> 1) | 0x1;
|
||||||
|
|
||||||
|
if (PRIV(ucd_nocase_ranges)[middle] <= c)
|
||||||
|
left = middle + 1;
|
||||||
|
else if (middle > 1 && PRIV(ucd_nocase_ranges)[middle - 2] > c)
|
||||||
|
right = middle - 1;
|
||||||
|
else
|
||||||
|
return PRIV(ucd_nocase_ranges) + (middle - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get the list of othercase characters, which belongs to the passed range.
|
||||||
|
Create ranges from these characters, and append them to the buffer argument. */
|
||||||
|
|
||||||
|
static size_t
|
||||||
|
utf_caseless_extend(uint32_t start, uint32_t end, uint32_t options,
|
||||||
|
uint32_t *buffer)
|
||||||
|
{
|
||||||
|
uint32_t new_start = start;
|
||||||
|
uint32_t new_end = end;
|
||||||
|
uint32_t c = start;
|
||||||
|
const uint32_t *list;
|
||||||
|
uint32_t tmp[3];
|
||||||
|
size_t result = 2;
|
||||||
|
const uint32_t *skip_range = get_nocase_range(c);
|
||||||
|
uint32_t skip_start = skip_range[0];
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
PCRE2_ASSERT(options & PARSE_CLASS_UTF);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
if (end > MAX_UTF_CODE_POINT) end = MAX_UTF_CODE_POINT;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
while (c <= end)
|
||||||
|
{
|
||||||
|
uint32_t co;
|
||||||
|
|
||||||
|
if (c > skip_start)
|
||||||
|
{
|
||||||
|
c = skip_range[1];
|
||||||
|
skip_range += 2;
|
||||||
|
skip_start = skip_range[0];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compute caseless set. */
|
||||||
|
|
||||||
|
if ((options & (PARSE_CLASS_TURKISH_UTF|PARSE_CLASS_RESTRICTED_UTF)) ==
|
||||||
|
PARSE_CLASS_TURKISH_UTF &&
|
||||||
|
UCD_ANY_I(c))
|
||||||
|
{
|
||||||
|
co = PRIV(ucd_turkish_dotted_i_caseset) + (UCD_DOTTED_I(c)? 0 : 3);
|
||||||
|
}
|
||||||
|
else if ((co = UCD_CASESET(c)) != 0 &&
|
||||||
|
(options & PARSE_CLASS_RESTRICTED_UTF) != 0 &&
|
||||||
|
PRIV(ucd_caseless_sets)[co] < 128)
|
||||||
|
{
|
||||||
|
co = 0; /* Ignore the caseless set if it's restricted. */
|
||||||
|
}
|
||||||
|
|
||||||
|
if (co != 0)
|
||||||
|
list = PRIV(ucd_caseless_sets) + co;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
co = UCD_OTHERCASE(c);
|
||||||
|
list = tmp;
|
||||||
|
tmp[0] = c;
|
||||||
|
tmp[1] = NOTACHAR;
|
||||||
|
|
||||||
|
if (co != c)
|
||||||
|
{
|
||||||
|
tmp[1] = co;
|
||||||
|
tmp[2] = NOTACHAR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
c++;
|
||||||
|
|
||||||
|
/* Add characters. */
|
||||||
|
do
|
||||||
|
{
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
if (!(options & PARSE_CLASS_UTF) && *list > 0xffff) continue;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (*list < new_start)
|
||||||
|
{
|
||||||
|
if (*list + 1 == new_start)
|
||||||
|
{
|
||||||
|
new_start--;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (*list > new_end)
|
||||||
|
{
|
||||||
|
if (*list - 1 == new_end)
|
||||||
|
{
|
||||||
|
new_end++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else continue;
|
||||||
|
|
||||||
|
result += 2;
|
||||||
|
if (buffer != NULL)
|
||||||
|
{
|
||||||
|
buffer[0] = *list;
|
||||||
|
buffer[1] = *list;
|
||||||
|
buffer += 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (*(++list) != NOTACHAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (buffer != NULL)
|
||||||
|
{
|
||||||
|
buffer[0] = new_start;
|
||||||
|
buffer[1] = new_end;
|
||||||
|
buffer += 2;
|
||||||
|
(void)buffer;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Add a character list to a buffer. */
|
||||||
|
|
||||||
|
static size_t
|
||||||
|
append_char_list(const uint32_t *p, uint32_t *buffer)
|
||||||
|
{
|
||||||
|
const uint32_t *n;
|
||||||
|
size_t result = 0;
|
||||||
|
|
||||||
|
while (*p != NOTACHAR)
|
||||||
|
{
|
||||||
|
n = p;
|
||||||
|
while (n[0] == n[1] - 1) n++;
|
||||||
|
|
||||||
|
PCRE2_ASSERT(*p < 0xffff);
|
||||||
|
|
||||||
|
if (buffer != NULL)
|
||||||
|
{
|
||||||
|
buffer[0] = *p;
|
||||||
|
buffer[1] = *n;
|
||||||
|
buffer += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
result += 2;
|
||||||
|
p = n + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t
|
||||||
|
get_highest_char(uint32_t options)
|
||||||
|
{
|
||||||
|
(void)options; /* Avoid compiler warning. */
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
return MAX_UTF_CODE_POINT;
|
||||||
|
#else
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
return GET_MAX_CHAR_VALUE((options & PARSE_CLASS_UTF) != 0);
|
||||||
|
#else
|
||||||
|
return MAX_UCHAR_VALUE;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add a negated character list to a buffer. */
|
||||||
|
static size_t
|
||||||
|
append_negated_char_list(const uint32_t *p, uint32_t options, uint32_t *buffer)
|
||||||
|
{
|
||||||
|
const uint32_t *n;
|
||||||
|
uint32_t start = 0;
|
||||||
|
size_t result = 2;
|
||||||
|
|
||||||
|
PCRE2_ASSERT(*p > 0);
|
||||||
|
|
||||||
|
while (*p != NOTACHAR)
|
||||||
|
{
|
||||||
|
n = p;
|
||||||
|
while (n[0] == n[1] - 1) n++;
|
||||||
|
|
||||||
|
PCRE2_ASSERT(*p < 0xffff);
|
||||||
|
|
||||||
|
if (buffer != NULL)
|
||||||
|
{
|
||||||
|
buffer[0] = start;
|
||||||
|
buffer[1] = *p - 1;
|
||||||
|
buffer += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
result += 2;
|
||||||
|
start = *n + 1;
|
||||||
|
p = n + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (buffer != NULL)
|
||||||
|
{
|
||||||
|
buffer[0] = start;
|
||||||
|
buffer[1] = get_highest_char(options);
|
||||||
|
buffer += 2;
|
||||||
|
(void)buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t *
|
||||||
|
append_non_ascii_range(uint32_t options, uint32_t *buffer)
|
||||||
|
{
|
||||||
|
if (buffer == NULL) return NULL;
|
||||||
|
|
||||||
|
buffer[0] = 0x100;
|
||||||
|
buffer[1] = get_highest_char(options);
|
||||||
|
return buffer + 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t
|
||||||
|
parse_class(uint32_t *ptr, uint32_t options, uint32_t *buffer)
|
||||||
|
{
|
||||||
|
size_t total_size = 0;
|
||||||
|
size_t size;
|
||||||
|
uint32_t meta_arg;
|
||||||
|
uint32_t start_char;
|
||||||
|
|
||||||
|
while (TRUE)
|
||||||
|
{
|
||||||
|
switch (META_CODE(*ptr))
|
||||||
|
{
|
||||||
|
case META_ESCAPE:
|
||||||
|
meta_arg = META_DATA(*ptr);
|
||||||
|
switch (meta_arg)
|
||||||
|
{
|
||||||
|
case ESC_D:
|
||||||
|
case ESC_W:
|
||||||
|
case ESC_S:
|
||||||
|
buffer = append_non_ascii_range(options, buffer);
|
||||||
|
total_size += 2;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ESC_h:
|
||||||
|
size = append_char_list(PRIV(hspace_list), buffer);
|
||||||
|
total_size += size;
|
||||||
|
if (buffer != NULL) buffer += size;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ESC_H:
|
||||||
|
size = append_negated_char_list(PRIV(hspace_list), options, buffer);
|
||||||
|
total_size += size;
|
||||||
|
if (buffer != NULL) buffer += size;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ESC_v:
|
||||||
|
size = append_char_list(PRIV(vspace_list), buffer);
|
||||||
|
total_size += size;
|
||||||
|
if (buffer != NULL) buffer += size;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ESC_V:
|
||||||
|
size = append_negated_char_list(PRIV(vspace_list), options, buffer);
|
||||||
|
total_size += size;
|
||||||
|
if (buffer != NULL) buffer += size;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ESC_p:
|
||||||
|
case ESC_P:
|
||||||
|
ptr++;
|
||||||
|
if (meta_arg == ESC_p && (*ptr >> 16) == PT_ANY)
|
||||||
|
{
|
||||||
|
if (buffer != NULL)
|
||||||
|
{
|
||||||
|
buffer[0] = 0;
|
||||||
|
buffer[1] = get_highest_char(options);
|
||||||
|
buffer += 2;
|
||||||
|
}
|
||||||
|
total_size += 2;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
ptr++;
|
||||||
|
continue;
|
||||||
|
case META_POSIX_NEG:
|
||||||
|
buffer = append_non_ascii_range(options, buffer);
|
||||||
|
total_size += 2;
|
||||||
|
ptr += 2;
|
||||||
|
continue;
|
||||||
|
case META_POSIX:
|
||||||
|
ptr += 2;
|
||||||
|
continue;
|
||||||
|
case META_BIGVALUE:
|
||||||
|
/* Character literal */
|
||||||
|
ptr++;
|
||||||
|
break;
|
||||||
|
CLASS_END_CASES(*ptr)
|
||||||
|
if (*ptr >= META_END) return total_size;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
start_char = *ptr;
|
||||||
|
|
||||||
|
if (ptr[1] == META_RANGE_LITERAL || ptr[1] == META_RANGE_ESCAPED)
|
||||||
|
{
|
||||||
|
ptr += 2;
|
||||||
|
PCRE2_ASSERT(*ptr < META_END || *ptr == META_BIGVALUE);
|
||||||
|
|
||||||
|
if (*ptr == META_BIGVALUE) ptr++;
|
||||||
|
|
||||||
|
#ifdef EBCDIC
|
||||||
|
#error "Missing EBCDIC support"
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (options & PARSE_CLASS_CASELESS_UTF)
|
||||||
|
{
|
||||||
|
size = utf_caseless_extend(start_char, *ptr++, options, buffer);
|
||||||
|
if (buffer != NULL) buffer += size;
|
||||||
|
total_size += size;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (buffer != NULL)
|
||||||
|
{
|
||||||
|
buffer[0] = start_char;
|
||||||
|
buffer[1] = *ptr;
|
||||||
|
buffer += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
ptr++;
|
||||||
|
total_size += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
return total_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Extra uint32_t values for storing the lengths of range lists in
|
||||||
|
the worst case. Two uint32_t lengths and a range end for a range
|
||||||
|
starting before 255 */
|
||||||
|
#define CHAR_LIST_EXTRA_SIZE 3
|
||||||
|
|
||||||
|
/* Starting character values for each character list. */
|
||||||
|
|
||||||
|
static const uint32_t char_list_starts[] = {
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
XCL_CHAR_LIST_HIGH_32_START,
|
||||||
|
#endif
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 32 || defined SUPPORT_UNICODE
|
||||||
|
XCL_CHAR_LIST_LOW_32_START,
|
||||||
|
#endif
|
||||||
|
XCL_CHAR_LIST_HIGH_16_START,
|
||||||
|
/* Must be terminated by XCL_CHAR_LIST_LOW_16_START,
|
||||||
|
which also represents the end of the bitset. */
|
||||||
|
XCL_CHAR_LIST_LOW_16_START,
|
||||||
|
};
|
||||||
|
|
||||||
|
static class_ranges *
|
||||||
|
compile_optimize_class(uint32_t *start_ptr, uint32_t options,
|
||||||
|
uint32_t xoptions, compile_block *cb)
|
||||||
|
{
|
||||||
|
class_ranges* cranges;
|
||||||
|
uint32_t *ptr;
|
||||||
|
uint32_t *buffer;
|
||||||
|
uint32_t *dst;
|
||||||
|
uint32_t class_options = 0;
|
||||||
|
size_t range_list_size = 0, total_size, i;
|
||||||
|
uint32_t tmp1, tmp2;
|
||||||
|
const uint32_t *char_list_next;
|
||||||
|
uint16_t *next_char;
|
||||||
|
uint32_t char_list_start, char_list_end;
|
||||||
|
uint32_t range_start, range_end;
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (options & PCRE2_UTF)
|
||||||
|
class_options |= PARSE_CLASS_UTF;
|
||||||
|
|
||||||
|
if ((options & PCRE2_CASELESS) && (options & (PCRE2_UTF|PCRE2_UCP)))
|
||||||
|
class_options |= PARSE_CLASS_CASELESS_UTF;
|
||||||
|
|
||||||
|
if (xoptions & PCRE2_EXTRA_CASELESS_RESTRICT)
|
||||||
|
class_options |= PARSE_CLASS_RESTRICTED_UTF;
|
||||||
|
|
||||||
|
if (xoptions & PCRE2_EXTRA_TURKISH_CASING)
|
||||||
|
class_options |= PARSE_CLASS_TURKISH_UTF;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Compute required space for the range. */
|
||||||
|
|
||||||
|
range_list_size = parse_class(start_ptr, class_options, NULL);
|
||||||
|
PCRE2_ASSERT((range_list_size & 0x1) == 0);
|
||||||
|
|
||||||
|
/* Allocate buffer. The total_size also represents the end of the buffer. */
|
||||||
|
|
||||||
|
total_size = range_list_size +
|
||||||
|
((range_list_size >= 2) ? CHAR_LIST_EXTRA_SIZE : 0);
|
||||||
|
|
||||||
|
cranges = cb->cx->memctl.malloc(
|
||||||
|
sizeof(class_ranges) + total_size * sizeof(uint32_t),
|
||||||
|
cb->cx->memctl.memory_data);
|
||||||
|
|
||||||
|
if (cranges == NULL) return NULL;
|
||||||
|
|
||||||
|
cranges->next = NULL;
|
||||||
|
cranges->range_list_size = (uint16_t)range_list_size;
|
||||||
|
cranges->char_lists_types = 0;
|
||||||
|
cranges->char_lists_size = 0;
|
||||||
|
cranges->char_lists_start = 0;
|
||||||
|
|
||||||
|
if (range_list_size == 0) return cranges;
|
||||||
|
|
||||||
|
buffer = (uint32_t*)(cranges + 1);
|
||||||
|
parse_class(start_ptr, class_options, buffer);
|
||||||
|
|
||||||
|
/* Using <= instead of == to help static analysis. */
|
||||||
|
if (range_list_size <= 2) return cranges;
|
||||||
|
|
||||||
|
/* In-place sorting of ranges. */
|
||||||
|
|
||||||
|
i = (((range_list_size >> 2) - 1) << 1);
|
||||||
|
while (TRUE)
|
||||||
|
{
|
||||||
|
do_heapify(buffer, range_list_size, i);
|
||||||
|
if (i == 0) break;
|
||||||
|
i -= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
i = range_list_size - 2;
|
||||||
|
while (TRUE)
|
||||||
|
{
|
||||||
|
tmp1 = buffer[i];
|
||||||
|
tmp2 = buffer[i + 1];
|
||||||
|
buffer[i] = buffer[0];
|
||||||
|
buffer[i + 1] = buffer[1];
|
||||||
|
buffer[0] = tmp1;
|
||||||
|
buffer[1] = tmp2;
|
||||||
|
|
||||||
|
do_heapify(buffer, i, 0);
|
||||||
|
if (i == 0) break;
|
||||||
|
i -= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Merge ranges whenever possible. */
|
||||||
|
dst = buffer;
|
||||||
|
ptr = buffer + 2;
|
||||||
|
range_list_size -= 2;
|
||||||
|
|
||||||
|
/* The second condition is a very rare corner case, where the end of the last
|
||||||
|
range is the maximum character. This range cannot be extended further. */
|
||||||
|
|
||||||
|
while (range_list_size > 0 && dst[1] != ~(uint32_t)0)
|
||||||
|
{
|
||||||
|
if (dst[1] + 1 < ptr[0])
|
||||||
|
{
|
||||||
|
dst += 2;
|
||||||
|
dst[0] = ptr[0];
|
||||||
|
dst[1] = ptr[1];
|
||||||
|
}
|
||||||
|
else if (dst[1] < ptr[1]) dst[1] = ptr[1];
|
||||||
|
|
||||||
|
ptr += 2;
|
||||||
|
range_list_size -= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_ASSERT(dst[1] <= get_highest_char(class_options));
|
||||||
|
|
||||||
|
/* When the number of ranges are less than six,
|
||||||
|
they are not converted to range lists. */
|
||||||
|
|
||||||
|
ptr = buffer;
|
||||||
|
while (ptr < dst && ptr[1] < 0x100) ptr += 2;
|
||||||
|
if (dst - ptr < (2 * (6 - 1)))
|
||||||
|
{
|
||||||
|
cranges->range_list_size = (uint16_t)(dst + 2 - buffer);
|
||||||
|
return cranges;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compute character lists structures. */
|
||||||
|
|
||||||
|
char_list_next = char_list_starts;
|
||||||
|
char_list_start = *char_list_next++;
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
char_list_end = XCL_CHAR_LIST_HIGH_32_END;
|
||||||
|
#elif defined SUPPORT_UNICODE
|
||||||
|
char_list_end = XCL_CHAR_LIST_LOW_32_END;
|
||||||
|
#else
|
||||||
|
char_list_end = XCL_CHAR_LIST_HIGH_16_END;
|
||||||
|
#endif
|
||||||
|
next_char = (uint16_t*)(buffer + total_size);
|
||||||
|
|
||||||
|
tmp1 = 0;
|
||||||
|
tmp2 = ((sizeof(char_list_starts) / sizeof(uint32_t)) - 1) * XCL_TYPE_BIT_LEN;
|
||||||
|
PCRE2_ASSERT(tmp2 <= 3 * XCL_TYPE_BIT_LEN && tmp2 >= XCL_TYPE_BIT_LEN);
|
||||||
|
range_start = dst[0];
|
||||||
|
range_end = dst[1];
|
||||||
|
|
||||||
|
while (TRUE)
|
||||||
|
{
|
||||||
|
if (range_start >= char_list_start)
|
||||||
|
{
|
||||||
|
if (range_start == range_end || range_end < char_list_end)
|
||||||
|
{
|
||||||
|
tmp1++;
|
||||||
|
next_char--;
|
||||||
|
|
||||||
|
if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
|
||||||
|
*next_char = (uint16_t)((range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END);
|
||||||
|
else
|
||||||
|
*(uint32_t*)(--next_char) =
|
||||||
|
(range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (range_start < range_end)
|
||||||
|
{
|
||||||
|
if (range_start > char_list_start)
|
||||||
|
{
|
||||||
|
tmp1++;
|
||||||
|
next_char--;
|
||||||
|
|
||||||
|
if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
|
||||||
|
*next_char = (uint16_t)(range_start << XCL_CHAR_SHIFT);
|
||||||
|
else
|
||||||
|
*(uint32_t*)(--next_char) = (range_start << XCL_CHAR_SHIFT);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
cranges->char_lists_types |= XCL_BEGIN_WITH_RANGE << tmp2;
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_ASSERT((uint32_t*)next_char >= dst + 2);
|
||||||
|
|
||||||
|
if (dst > buffer)
|
||||||
|
{
|
||||||
|
dst -= 2;
|
||||||
|
range_start = dst[0];
|
||||||
|
range_end = dst[1];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
range_start = 0;
|
||||||
|
range_end = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (range_end >= char_list_start)
|
||||||
|
{
|
||||||
|
PCRE2_ASSERT(range_start < char_list_start);
|
||||||
|
|
||||||
|
if (range_end < char_list_end)
|
||||||
|
{
|
||||||
|
tmp1++;
|
||||||
|
next_char--;
|
||||||
|
|
||||||
|
if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
|
||||||
|
*next_char = (uint16_t)((range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END);
|
||||||
|
else
|
||||||
|
*(uint32_t*)(--next_char) =
|
||||||
|
(range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END;
|
||||||
|
|
||||||
|
PCRE2_ASSERT((uint32_t*)next_char >= dst + 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
cranges->char_lists_types |= XCL_BEGIN_WITH_RANGE << tmp2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tmp1 >= XCL_ITEM_COUNT_MASK)
|
||||||
|
{
|
||||||
|
cranges->char_lists_types |= XCL_ITEM_COUNT_MASK << tmp2;
|
||||||
|
next_char--;
|
||||||
|
|
||||||
|
if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
|
||||||
|
*next_char = (uint16_t)tmp1;
|
||||||
|
else
|
||||||
|
*(uint32_t*)(--next_char) = tmp1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
cranges->char_lists_types |= tmp1 << tmp2;
|
||||||
|
|
||||||
|
if (range_start < XCL_CHAR_LIST_LOW_16_START) break;
|
||||||
|
|
||||||
|
PCRE2_ASSERT(tmp2 >= XCL_TYPE_BIT_LEN);
|
||||||
|
char_list_end = char_list_start - 1;
|
||||||
|
char_list_start = *char_list_next++;
|
||||||
|
tmp1 = 0;
|
||||||
|
tmp2 -= XCL_TYPE_BIT_LEN;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dst[0] < XCL_CHAR_LIST_LOW_16_START) dst += 2;
|
||||||
|
PCRE2_ASSERT((uint16_t*)dst <= next_char);
|
||||||
|
|
||||||
|
cranges->char_lists_size =
|
||||||
|
(size_t)((uint8_t*)(buffer + total_size) - (uint8_t*)next_char);
|
||||||
|
cranges->char_lists_start = (size_t)((uint8_t*)next_char - (uint8_t*)buffer);
|
||||||
|
cranges->range_list_size = (uint16_t)(dst - buffer);
|
||||||
|
return cranges;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* SUPPORT_WIDE_CHARS */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
|
||||||
|
void PRIV(update_classbits)(uint32_t ptype, uint32_t pdata, BOOL negated,
|
||||||
|
uint8_t *classbits)
|
||||||
|
{
|
||||||
|
/* Update PRIV(xclass) when this function is changed. */
|
||||||
|
int c, chartype;
|
||||||
|
const ucd_record *prop;
|
||||||
|
uint32_t gentype;
|
||||||
|
BOOL set_bit;
|
||||||
|
|
||||||
|
if (ptype == PT_ANY)
|
||||||
|
{
|
||||||
|
if (!negated) memset(classbits, 0xff, 32);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (c = 0; c < 256; c++)
|
||||||
|
{
|
||||||
|
prop = GET_UCD(c);
|
||||||
|
set_bit = FALSE;
|
||||||
|
(void)set_bit;
|
||||||
|
|
||||||
|
switch (ptype)
|
||||||
|
{
|
||||||
|
case PT_LAMP:
|
||||||
|
chartype = prop->chartype;
|
||||||
|
set_bit = (chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_GC:
|
||||||
|
set_bit = (PRIV(ucp_gentype)[prop->chartype] == pdata);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_PC:
|
||||||
|
set_bit = (prop->chartype == pdata);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_SC:
|
||||||
|
set_bit = (prop->script == pdata);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_SCX:
|
||||||
|
set_bit = (prop->script == pdata ||
|
||||||
|
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_ALNUM:
|
||||||
|
gentype = PRIV(ucp_gentype)[prop->chartype];
|
||||||
|
set_bit = (gentype == ucp_L || gentype == ucp_N);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_SPACE: /* Perl space */
|
||||||
|
case PT_PXSPACE: /* POSIX space */
|
||||||
|
switch(c)
|
||||||
|
{
|
||||||
|
HSPACE_BYTE_CASES:
|
||||||
|
VSPACE_BYTE_CASES:
|
||||||
|
set_bit = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
set_bit = (PRIV(ucp_gentype)[prop->chartype] == ucp_Z);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_WORD:
|
||||||
|
chartype = prop->chartype;
|
||||||
|
gentype = PRIV(ucp_gentype)[chartype];
|
||||||
|
set_bit = (gentype == ucp_L || gentype == ucp_N ||
|
||||||
|
chartype == ucp_Mn || chartype == ucp_Pc);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_UCNC:
|
||||||
|
set_bit = (c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
||||||
|
c == CHAR_GRAVE_ACCENT || c >= 0xa0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_BIDICL:
|
||||||
|
set_bit = (UCD_BIDICLASS_PROP(prop) == pdata);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_BOOL:
|
||||||
|
set_bit = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||||
|
UCD_BPROPS_PROP(prop), pdata) != 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_PXGRAPH:
|
||||||
|
chartype = prop->chartype;
|
||||||
|
gentype = PRIV(ucp_gentype)[chartype];
|
||||||
|
set_bit = (gentype != ucp_Z && (gentype != ucp_C || chartype == ucp_Cf));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_PXPRINT:
|
||||||
|
chartype = prop->chartype;
|
||||||
|
set_bit = (chartype != ucp_Zl && chartype != ucp_Zp &&
|
||||||
|
(PRIV(ucp_gentype)[chartype] != ucp_C || chartype == ucp_Cf));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_PXPUNCT:
|
||||||
|
gentype = PRIV(ucp_gentype)[prop->chartype];
|
||||||
|
set_bit = (gentype == ucp_P || (c < 128 && gentype == ucp_S));
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
PCRE2_ASSERT(ptype == PT_PXXDIGIT);
|
||||||
|
set_bit = (c >= CHAR_0 && c <= CHAR_9) ||
|
||||||
|
(c >= CHAR_A && c <= CHAR_F) ||
|
||||||
|
(c >= CHAR_a && c <= CHAR_f);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (negated) set_bit = !set_bit;
|
||||||
|
if (set_bit) *classbits |= (uint8_t)(1 << (c & 0x7));
|
||||||
|
if ((c & 0x7) == 0x7) classbits++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef SUPPORT_WIDE_CHARS
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* XClass related properties *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* XClass needs to be generated. */
|
||||||
|
#define XCLASS_REQUIRED 0x1
|
||||||
|
/* XClass has 8 bit character. */
|
||||||
|
#define XCLASS_HAS_8BIT_CHARS 0x2
|
||||||
|
/* XClass has properties. */
|
||||||
|
#define XCLASS_HAS_PROPS 0x4
|
||||||
|
/* XClass has character lists. */
|
||||||
|
#define XCLASS_HAS_CHAR_LISTS 0x8
|
||||||
|
/* XClass matches to all >= 256 characters. */
|
||||||
|
#define XCLASS_HIGH_ANY 0x10
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Internal entry point for add range to class *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function sets the overall range for characters < 256.
|
||||||
|
It also handles non-utf case folding.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
options the options bits
|
||||||
|
xoptions the extra options bits
|
||||||
|
cb compile data
|
||||||
|
start start of range character
|
||||||
|
end end of range character
|
||||||
|
|
||||||
|
Returns: cb->classbits is updated
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void
|
||||||
|
add_to_class(uint32_t options, uint32_t xoptions, compile_block *cb,
|
||||||
|
uint32_t start, uint32_t end)
|
||||||
|
{
|
||||||
|
uint8_t *classbits = cb->classbits.classbits;
|
||||||
|
uint32_t c, byte_start, byte_end;
|
||||||
|
uint32_t classbits_end = (end <= 0xff ? end : 0xff);
|
||||||
|
|
||||||
|
/* If caseless matching is required, scan the range and process alternate
|
||||||
|
cases. In Unicode, there are 8-bit characters that have alternate cases that
|
||||||
|
are greater than 255 and vice-versa (though these may be ignored if caseless
|
||||||
|
restriction is in force). Sometimes we can just extend the original range. */
|
||||||
|
|
||||||
|
if ((options & PCRE2_CASELESS) != 0)
|
||||||
|
{
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
/* UTF mode. This branch is taken if we don't support wide characters (e.g.
|
||||||
|
8-bit library, without UTF), but we do treat those characters as Unicode
|
||||||
|
(if UCP flag is set). In this case, we only need to expand the character class
|
||||||
|
set to include the case pairs which are in the 0-255 codepoint range. */
|
||||||
|
if ((options & (PCRE2_UTF|PCRE2_UCP)) != 0)
|
||||||
|
{
|
||||||
|
BOOL turkish_i = (xoptions & (PCRE2_EXTRA_TURKISH_CASING|PCRE2_EXTRA_CASELESS_RESTRICT)) ==
|
||||||
|
PCRE2_EXTRA_TURKISH_CASING;
|
||||||
|
if (start < 128)
|
||||||
|
{
|
||||||
|
uint32_t lo_end = (classbits_end < 127 ? classbits_end : 127);
|
||||||
|
for (c = start; c <= lo_end; c++)
|
||||||
|
{
|
||||||
|
if (turkish_i && UCD_ANY_I(c)) continue;
|
||||||
|
SETBIT(classbits, cb->fcc[c]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (classbits_end >= 128)
|
||||||
|
{
|
||||||
|
uint32_t hi_start = (start > 128 ? start : 128);
|
||||||
|
for (c = hi_start; c <= classbits_end; c++)
|
||||||
|
{
|
||||||
|
uint32_t co = UCD_OTHERCASE(c);
|
||||||
|
if (co <= 0xff) SETBIT(classbits, co);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
else
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
/* Not UTF mode */
|
||||||
|
{
|
||||||
|
for (c = start; c <= classbits_end; c++)
|
||||||
|
SETBIT(classbits, cb->fcc[c]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Use the bitmap for characters < 256. Otherwise use extra data. */
|
||||||
|
|
||||||
|
byte_start = (start + 7) >> 3;
|
||||||
|
byte_end = (classbits_end + 1) >> 3;
|
||||||
|
|
||||||
|
if (byte_start >= byte_end)
|
||||||
|
{
|
||||||
|
for (c = start; c <= classbits_end; c++)
|
||||||
|
/* Regardless of start, c will always be <= 255. */
|
||||||
|
SETBIT(classbits, c);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (c = byte_start; c < byte_end; c++)
|
||||||
|
classbits[c] = 0xff;
|
||||||
|
|
||||||
|
byte_start <<= 3;
|
||||||
|
byte_end <<= 3;
|
||||||
|
|
||||||
|
for (c = start; c < byte_start; c++)
|
||||||
|
SETBIT(classbits, c);
|
||||||
|
|
||||||
|
for (c = byte_end; c <= classbits_end; c++)
|
||||||
|
SETBIT(classbits, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
/*************************************************
|
||||||
|
* Internal entry point for add list to class *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function is used for adding a list of horizontal or vertical whitespace
|
||||||
|
characters to a class. The list must be in order so that ranges of characters
|
||||||
|
can be detected and handled appropriately. This function sets the overall range
|
||||||
|
so that the internal functions can try to avoid duplication when handling
|
||||||
|
case-independence.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
options the options bits
|
||||||
|
xoptions the extra options bits
|
||||||
|
cb contains pointers to tables etc.
|
||||||
|
p points to row of 32-bit values, terminated by NOTACHAR
|
||||||
|
|
||||||
|
Returns: cb->classbits is updated
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void
|
||||||
|
add_list_to_class(uint32_t options, uint32_t xoptions, compile_block *cb,
|
||||||
|
const uint32_t *p)
|
||||||
|
{
|
||||||
|
while (p[0] < 256)
|
||||||
|
{
|
||||||
|
unsigned int n = 0;
|
||||||
|
|
||||||
|
while(p[n+1] == p[0] + n + 1) n++;
|
||||||
|
add_to_class(options, xoptions, cb, p[0], p[n]);
|
||||||
|
|
||||||
|
p += n + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Add characters not in a list to a class *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function is used for adding the complement of a list of horizontal or
|
||||||
|
vertical whitespace to a class. The list must be in order.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
options the options bits
|
||||||
|
xoptions the extra options bits
|
||||||
|
cb contains pointers to tables etc.
|
||||||
|
p points to row of 32-bit values, terminated by NOTACHAR
|
||||||
|
|
||||||
|
Returns: cb->classbits is updated
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void
|
||||||
|
add_not_list_to_class(uint32_t options, uint32_t xoptions, compile_block *cb,
|
||||||
|
const uint32_t *p)
|
||||||
|
{
|
||||||
|
if (p[0] > 0)
|
||||||
|
add_to_class(options, xoptions, cb, 0, p[0] - 1);
|
||||||
|
while (p[0] < 256)
|
||||||
|
{
|
||||||
|
while (p[1] == p[0] + 1) p++;
|
||||||
|
add_to_class(options, xoptions, cb, p[0] + 1, (p[1] > 255) ? 255 : p[1] - 1);
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Main entry-point to compile a character class *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function consumes a "leaf", which is a set of characters that will
|
||||||
|
become a single OP_CLASS OP_NCLASS, OP_XCLASS, or OP_ALLANY. */
|
||||||
|
|
||||||
|
uint32_t *
|
||||||
|
PRIV(compile_class_not_nested)(uint32_t options, uint32_t xoptions,
|
||||||
|
uint32_t *start_ptr, PCRE2_UCHAR **pcode, BOOL negate_class, BOOL* has_bitmap,
|
||||||
|
int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr)
|
||||||
|
{
|
||||||
|
uint32_t *pptr = start_ptr;
|
||||||
|
PCRE2_UCHAR *code = *pcode;
|
||||||
|
BOOL should_flip_negation;
|
||||||
|
const uint8_t *cbits = cb->cbits;
|
||||||
|
/* Some functions such as add_to_class() or eclass processing
|
||||||
|
expects that the bitset is stored in cb->classbits.classbits. */
|
||||||
|
uint8_t *const classbits = cb->classbits.classbits;
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
BOOL utf = (options & PCRE2_UTF) != 0;
|
||||||
|
#else /* No Unicode support */
|
||||||
|
BOOL utf = FALSE;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Helper variables for OP_XCLASS opcode (for characters > 255). */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_WIDE_CHARS
|
||||||
|
uint32_t xclass_props;
|
||||||
|
PCRE2_UCHAR *class_uchardata;
|
||||||
|
class_ranges* cranges;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* If an XClass contains a negative special such as \S, we need to flip the
|
||||||
|
negation flag at the end, so that support for characters > 255 works correctly
|
||||||
|
(they are all included in the class). An XClass may need to insert specific
|
||||||
|
matching or non-matching code for wide characters.
|
||||||
|
*/
|
||||||
|
|
||||||
|
should_flip_negation = FALSE;
|
||||||
|
|
||||||
|
/* XClass will be used when characters > 255 might match. */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_WIDE_CHARS
|
||||||
|
xclass_props = 0;
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
cranges = NULL;
|
||||||
|
|
||||||
|
if (utf)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
if (lengthptr != NULL)
|
||||||
|
{
|
||||||
|
cranges = compile_optimize_class(pptr, options, xoptions, cb);
|
||||||
|
|
||||||
|
if (cranges == NULL)
|
||||||
|
{
|
||||||
|
*errorcodeptr = ERR21;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Caching the pre-processed character ranges. */
|
||||||
|
if (cb->next_cranges != NULL)
|
||||||
|
cb->next_cranges->next = cranges;
|
||||||
|
else
|
||||||
|
cb->cranges = cranges;
|
||||||
|
|
||||||
|
cb->next_cranges = cranges;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Reuse the pre-processed character ranges. */
|
||||||
|
cranges = cb->cranges;
|
||||||
|
PCRE2_ASSERT(cranges != NULL);
|
||||||
|
cb->cranges = cranges->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cranges->range_list_size > 0)
|
||||||
|
{
|
||||||
|
const uint32_t *ranges = (const uint32_t*)(cranges + 1);
|
||||||
|
|
||||||
|
if (ranges[0] <= 255)
|
||||||
|
xclass_props |= XCLASS_HAS_8BIT_CHARS;
|
||||||
|
|
||||||
|
if (ranges[cranges->range_list_size - 1] == GET_MAX_CHAR_VALUE(utf) &&
|
||||||
|
ranges[cranges->range_list_size - 2] <= 256)
|
||||||
|
xclass_props |= XCLASS_HIGH_ANY;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class_uchardata = code + LINK_SIZE + 2; /* For XCLASS items */
|
||||||
|
#endif /* SUPPORT_WIDE_CHARS */
|
||||||
|
|
||||||
|
/* Initialize the 256-bit (32-byte) bit map to all zeros. We build the map
|
||||||
|
in a temporary bit of memory, in case the class contains fewer than two
|
||||||
|
8-bit characters because in that case the compiled code doesn't use the bit
|
||||||
|
map. */
|
||||||
|
|
||||||
|
memset(classbits, 0, 32);
|
||||||
|
|
||||||
|
/* Process items until end_ptr is reached. */
|
||||||
|
|
||||||
|
while (TRUE)
|
||||||
|
{
|
||||||
|
uint32_t meta = *(pptr++);
|
||||||
|
BOOL local_negate;
|
||||||
|
int posix_class;
|
||||||
|
int taboffset, tabopt;
|
||||||
|
class_bits_storage pbits;
|
||||||
|
uint32_t escape, c;
|
||||||
|
|
||||||
|
/* Handle POSIX classes such as [:alpha:] etc. */
|
||||||
|
switch (META_CODE(meta))
|
||||||
|
{
|
||||||
|
case META_POSIX:
|
||||||
|
case META_POSIX_NEG:
|
||||||
|
|
||||||
|
local_negate = (meta == META_POSIX_NEG);
|
||||||
|
posix_class = *(pptr++);
|
||||||
|
|
||||||
|
if (local_negate) should_flip_negation = TRUE; /* Note negative special */
|
||||||
|
|
||||||
|
/* If matching is caseless, upper and lower are converted to alpha.
|
||||||
|
This relies on the fact that the class table starts with alpha,
|
||||||
|
lower, upper as the first 3 entries. */
|
||||||
|
|
||||||
|
if ((options & PCRE2_CASELESS) != 0 && posix_class <= 2)
|
||||||
|
posix_class = 0;
|
||||||
|
|
||||||
|
/* When PCRE2_UCP is set, some of the POSIX classes are converted to
|
||||||
|
different escape sequences that use Unicode properties \p or \P.
|
||||||
|
Others that are not available via \p or \P have to generate
|
||||||
|
XCL_PROP/XCL_NOTPROP directly, which is done here. */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
/* TODO This entire block of code here appears to be unreachable!? I simply
|
||||||
|
can't see how it can be hit, given that the frontend parser doesn't emit
|
||||||
|
META_POSIX for GRAPH/PRINT/PUNCT when UCP is set. */
|
||||||
|
if ((options & PCRE2_UCP) != 0 &&
|
||||||
|
(xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0)
|
||||||
|
{
|
||||||
|
uint32_t ptype;
|
||||||
|
|
||||||
|
switch(posix_class)
|
||||||
|
{
|
||||||
|
case PC_GRAPH:
|
||||||
|
case PC_PRINT:
|
||||||
|
case PC_PUNCT:
|
||||||
|
ptype = (posix_class == PC_GRAPH)? PT_PXGRAPH :
|
||||||
|
(posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT;
|
||||||
|
|
||||||
|
PRIV(update_classbits)(ptype, 0, local_negate, classbits);
|
||||||
|
|
||||||
|
if ((xclass_props & XCLASS_HIGH_ANY) == 0)
|
||||||
|
{
|
||||||
|
if (lengthptr != NULL)
|
||||||
|
*lengthptr += 3;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
*class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP;
|
||||||
|
*class_uchardata++ = (PCRE2_UCHAR)ptype;
|
||||||
|
*class_uchardata++ = 0;
|
||||||
|
}
|
||||||
|
xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_PROPS;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* For the other POSIX classes (ex: ascii) we are going to
|
||||||
|
fall through to the non-UCP case and build a bit map for
|
||||||
|
characters with code points less than 256. However, if we are in
|
||||||
|
a negated POSIX class, characters with code points greater than
|
||||||
|
255 must either all match or all not match, depending on whether
|
||||||
|
the whole class is not or is negated. For example, for
|
||||||
|
[[:^ascii:]... they must all match, whereas for [^[:^ascii:]...
|
||||||
|
they must not.
|
||||||
|
|
||||||
|
In the special case where there are no xclass items, this is
|
||||||
|
automatically handled by the use of OP_CLASS or OP_NCLASS, but an
|
||||||
|
explicit range is needed for OP_XCLASS. Setting a flag here
|
||||||
|
causes the range to be generated later when it is known that
|
||||||
|
OP_XCLASS is required. In the 8-bit library this is relevant only in
|
||||||
|
utf mode, since no wide characters can exist otherwise. */
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
/* In the non-UCP case, or when UCP makes no difference, we build the
|
||||||
|
bit map for the POSIX class in a chunk of local store because we may
|
||||||
|
be adding and subtracting from it, and we don't want to subtract bits
|
||||||
|
that may be in the main map already. At the end we or the result into
|
||||||
|
the bit map that is being built. */
|
||||||
|
|
||||||
|
posix_class *= 3;
|
||||||
|
|
||||||
|
/* Copy in the first table (always present) */
|
||||||
|
|
||||||
|
memcpy(pbits.classbits, cbits + PRIV(posix_class_maps)[posix_class], 32);
|
||||||
|
|
||||||
|
/* If there is a second table, add or remove it as required. */
|
||||||
|
|
||||||
|
taboffset = PRIV(posix_class_maps)[posix_class + 1];
|
||||||
|
tabopt = PRIV(posix_class_maps)[posix_class + 2];
|
||||||
|
|
||||||
|
if (taboffset >= 0)
|
||||||
|
{
|
||||||
|
if (tabopt >= 0)
|
||||||
|
for (int i = 0; i < 32; i++)
|
||||||
|
pbits.classbits[i] |= cbits[i + taboffset];
|
||||||
|
else
|
||||||
|
for (int i = 0; i < 32; i++)
|
||||||
|
pbits.classbits[i] &= (uint8_t)(~cbits[i + taboffset]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now see if we need to remove any special characters. An option
|
||||||
|
value of 1 removes vertical space and 2 removes underscore. */
|
||||||
|
|
||||||
|
if (tabopt < 0) tabopt = -tabopt;
|
||||||
|
if (tabopt == 1) pbits.classbits[1] &= ~0x3c;
|
||||||
|
else if (tabopt == 2) pbits.classbits[11] &= 0x7f;
|
||||||
|
|
||||||
|
/* Add the POSIX table or its complement into the main table that is
|
||||||
|
being built and we are done. */
|
||||||
|
|
||||||
|
{
|
||||||
|
uint32_t *classwords = cb->classbits.classwords;
|
||||||
|
|
||||||
|
if (local_negate)
|
||||||
|
for (int i = 0; i < 8; i++)
|
||||||
|
classwords[i] |= (uint32_t)(~pbits.classwords[i]);
|
||||||
|
else
|
||||||
|
for (int i = 0; i < 8; i++)
|
||||||
|
classwords[i] |= pbits.classwords[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SUPPORT_WIDE_CHARS
|
||||||
|
/* Every class contains at least one < 256 character. */
|
||||||
|
xclass_props |= XCLASS_HAS_8BIT_CHARS;
|
||||||
|
#endif
|
||||||
|
continue; /* End of POSIX handling */
|
||||||
|
|
||||||
|
/* Other than POSIX classes, the only items we should encounter are
|
||||||
|
\d-type escapes and literal characters (possibly as ranges). */
|
||||||
|
case META_BIGVALUE:
|
||||||
|
meta = *(pptr++);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case META_ESCAPE:
|
||||||
|
escape = META_DATA(meta);
|
||||||
|
|
||||||
|
switch(escape)
|
||||||
|
{
|
||||||
|
case ESC_d:
|
||||||
|
for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ESC_D:
|
||||||
|
should_flip_negation = TRUE;
|
||||||
|
for (int i = 0; i < 32; i++)
|
||||||
|
classbits[i] |= (uint8_t)(~cbits[i+cbit_digit]);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ESC_w:
|
||||||
|
for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ESC_W:
|
||||||
|
should_flip_negation = TRUE;
|
||||||
|
for (int i = 0; i < 32; i++)
|
||||||
|
classbits[i] |= (uint8_t)(~cbits[i+cbit_word]);
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Perl 5.004 onwards omitted VT from \s, but restored it at Perl
|
||||||
|
5.18. Before PCRE 8.34, we had to preserve the VT bit if it was
|
||||||
|
previously set by something earlier in the character class.
|
||||||
|
Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so
|
||||||
|
we could just adjust the appropriate bit. From PCRE 8.34 we no
|
||||||
|
longer treat \s and \S specially. */
|
||||||
|
|
||||||
|
case ESC_s:
|
||||||
|
for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ESC_S:
|
||||||
|
should_flip_negation = TRUE;
|
||||||
|
for (int i = 0; i < 32; i++)
|
||||||
|
classbits[i] |= (uint8_t)(~cbits[i+cbit_space]);
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* When adding the horizontal or vertical space lists to a class, or
|
||||||
|
their complements, disable PCRE2_CASELESS, because it justs wastes
|
||||||
|
time, and in the "not-x" UTF cases can create unwanted duplicates in
|
||||||
|
the XCLASS list (provoked by characters that have more than one other
|
||||||
|
case and by both cases being in the same "not-x" sublist). */
|
||||||
|
|
||||||
|
case ESC_h:
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (cranges != NULL) break;
|
||||||
|
#endif
|
||||||
|
add_list_to_class(options & ~PCRE2_CASELESS, xoptions,
|
||||||
|
cb, PRIV(hspace_list));
|
||||||
|
#else
|
||||||
|
PCRE2_ASSERT(cranges != NULL);
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ESC_H:
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (cranges != NULL) break;
|
||||||
|
#endif
|
||||||
|
add_not_list_to_class(options & ~PCRE2_CASELESS, xoptions,
|
||||||
|
cb, PRIV(hspace_list));
|
||||||
|
#else
|
||||||
|
PCRE2_ASSERT(cranges != NULL);
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ESC_v:
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (cranges != NULL) break;
|
||||||
|
#endif
|
||||||
|
add_list_to_class(options & ~PCRE2_CASELESS, xoptions,
|
||||||
|
cb, PRIV(vspace_list));
|
||||||
|
#else
|
||||||
|
PCRE2_ASSERT(cranges != NULL);
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ESC_V:
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (cranges != NULL) break;
|
||||||
|
#endif
|
||||||
|
add_not_list_to_class(options & ~PCRE2_CASELESS, xoptions,
|
||||||
|
cb, PRIV(vspace_list));
|
||||||
|
#else
|
||||||
|
PCRE2_ASSERT(cranges != NULL);
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* If Unicode is not supported, \P and \p are not allowed and are
|
||||||
|
faulted at parse time, so will never appear here. */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
case ESC_p:
|
||||||
|
case ESC_P:
|
||||||
|
{
|
||||||
|
uint32_t ptype = *pptr >> 16;
|
||||||
|
uint32_t pdata = *(pptr++) & 0xffff;
|
||||||
|
|
||||||
|
/* The "Any" is processed by PRIV(update_classbits)(). */
|
||||||
|
if (ptype == PT_ANY)
|
||||||
|
{
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
if (!utf && escape == ESC_p) memset(classbits, 0xff, 32);
|
||||||
|
#endif
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
PRIV(update_classbits)(ptype, pdata, (escape == ESC_P), classbits);
|
||||||
|
|
||||||
|
if ((xclass_props & XCLASS_HIGH_ANY) == 0)
|
||||||
|
{
|
||||||
|
if (lengthptr != NULL)
|
||||||
|
*lengthptr += 3;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
*class_uchardata++ = (escape == ESC_p)? XCL_PROP : XCL_NOTPROP;
|
||||||
|
*class_uchardata++ = ptype;
|
||||||
|
*class_uchardata++ = pdata;
|
||||||
|
}
|
||||||
|
xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_PROPS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SUPPORT_WIDE_CHARS
|
||||||
|
/* Every non-property class contains at least one < 256 character. */
|
||||||
|
xclass_props |= XCLASS_HAS_8BIT_CHARS;
|
||||||
|
#endif
|
||||||
|
/* End handling \d-type escapes */
|
||||||
|
continue;
|
||||||
|
|
||||||
|
CLASS_END_CASES(meta)
|
||||||
|
/* Literals. */
|
||||||
|
if (meta < META_END) break;
|
||||||
|
/* Non-literals: end of class contents. */
|
||||||
|
goto END_PROCESSING;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* A literal character may be followed by a range meta. At parse time
|
||||||
|
there are checks for out-of-order characters, for ranges where the two
|
||||||
|
characters are equal, and for hyphens that cannot indicate a range. At
|
||||||
|
this point, therefore, no checking is needed. */
|
||||||
|
|
||||||
|
c = meta;
|
||||||
|
|
||||||
|
/* Remember if \r or \n were explicitly used */
|
||||||
|
|
||||||
|
if (c == CHAR_CR || c == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF;
|
||||||
|
|
||||||
|
/* Process a character range */
|
||||||
|
|
||||||
|
if (*pptr == META_RANGE_LITERAL || *pptr == META_RANGE_ESCAPED)
|
||||||
|
{
|
||||||
|
uint32_t d;
|
||||||
|
|
||||||
|
#ifdef EBCDIC
|
||||||
|
BOOL range_is_literal = (*pptr == META_RANGE_LITERAL);
|
||||||
|
#endif
|
||||||
|
++pptr;
|
||||||
|
d = *(pptr++);
|
||||||
|
if (d == META_BIGVALUE) d = *(pptr++);
|
||||||
|
|
||||||
|
/* Remember an explicit \r or \n, and add the range to the class. */
|
||||||
|
|
||||||
|
if (d == CHAR_CR || d == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF;
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (cranges != NULL) continue;
|
||||||
|
xclass_props |= XCLASS_HAS_8BIT_CHARS;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* In an EBCDIC environment, Perl treats alphabetic ranges specially
|
||||||
|
because there are holes in the encoding, and simply using the range
|
||||||
|
A-Z (for example) would include the characters in the holes. This
|
||||||
|
applies only to literal ranges; [\xC1-\xE9] is different to [A-Z]. */
|
||||||
|
|
||||||
|
#ifdef EBCDIC
|
||||||
|
if (range_is_literal &&
|
||||||
|
(cb->ctypes[c] & ctype_letter) != 0 &&
|
||||||
|
(cb->ctypes[d] & ctype_letter) != 0 &&
|
||||||
|
(c <= CHAR_z) == (d <= CHAR_z))
|
||||||
|
{
|
||||||
|
uint32_t uc = (d <= CHAR_z)? 0 : 64;
|
||||||
|
uint32_t C = c - uc;
|
||||||
|
uint32_t D = d - uc;
|
||||||
|
|
||||||
|
if (C <= CHAR_i)
|
||||||
|
{
|
||||||
|
add_to_class(options, xoptions, cb, C + uc,
|
||||||
|
((D < CHAR_i)? D : CHAR_i) + uc);
|
||||||
|
C = CHAR_j;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (C <= D && C <= CHAR_r)
|
||||||
|
{
|
||||||
|
add_to_class(options, xoptions, cb, C + uc,
|
||||||
|
((D < CHAR_r)? D : CHAR_r) + uc);
|
||||||
|
C = CHAR_s;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (C <= D)
|
||||||
|
add_to_class(options, xoptions, cb, C + uc, D + uc);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
/* Not an EBCDIC special range */
|
||||||
|
|
||||||
|
add_to_class(options, xoptions, cb, c, d);
|
||||||
|
#else
|
||||||
|
PCRE2_ASSERT(cranges != NULL);
|
||||||
|
#endif
|
||||||
|
continue;
|
||||||
|
} /* End of range handling */
|
||||||
|
|
||||||
|
/* Character ranges are ignored when class_ranges is present. */
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (cranges != NULL) continue;
|
||||||
|
xclass_props |= XCLASS_HAS_8BIT_CHARS;
|
||||||
|
#endif
|
||||||
|
/* Handle a single character. */
|
||||||
|
|
||||||
|
add_to_class(options, xoptions, cb, meta, meta);
|
||||||
|
#else
|
||||||
|
PCRE2_ASSERT(cranges != NULL);
|
||||||
|
#endif
|
||||||
|
} /* End of main class-processing loop */
|
||||||
|
|
||||||
|
END_PROCESSING:
|
||||||
|
|
||||||
|
#ifdef SUPPORT_WIDE_CHARS
|
||||||
|
PCRE2_ASSERT((xclass_props & XCLASS_HAS_PROPS) == 0 ||
|
||||||
|
(xclass_props & XCLASS_HIGH_ANY) == 0);
|
||||||
|
|
||||||
|
if (cranges != NULL)
|
||||||
|
{
|
||||||
|
uint32_t *range = (uint32_t*)(cranges + 1);
|
||||||
|
uint32_t *end = range + cranges->range_list_size;
|
||||||
|
|
||||||
|
while (range < end && range[0] < 256)
|
||||||
|
{
|
||||||
|
PCRE2_ASSERT((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0);
|
||||||
|
/* Add range to bitset. If we are in UTF or UCP mode, then clear the
|
||||||
|
caseless bit, because the cranges handle caselessness (only) in this
|
||||||
|
condition; see the condition for PARSE_CLASS_CASELESS_UTF in
|
||||||
|
compile_optimize_class(). */
|
||||||
|
add_to_class(((options & (PCRE2_UTF|PCRE2_UCP)) != 0)?
|
||||||
|
(options & ~PCRE2_CASELESS) : options, xoptions, cb, range[0], range[1]);
|
||||||
|
|
||||||
|
if (range[1] > 255) break;
|
||||||
|
range += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cranges->char_lists_size > 0)
|
||||||
|
{
|
||||||
|
/* The cranges structure is still used and freed later. */
|
||||||
|
PCRE2_ASSERT((xclass_props & XCLASS_HIGH_ANY) == 0);
|
||||||
|
xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_CHAR_LISTS;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if ((xclass_props & XCLASS_HIGH_ANY) != 0)
|
||||||
|
{
|
||||||
|
PCRE2_ASSERT(range + 2 == end && range[0] <= 256 &&
|
||||||
|
range[1] >= GET_MAX_CHAR_VALUE(utf));
|
||||||
|
should_flip_negation = TRUE;
|
||||||
|
range = end;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (range < end)
|
||||||
|
{
|
||||||
|
uint32_t range_start = range[0];
|
||||||
|
uint32_t range_end = range[1];
|
||||||
|
|
||||||
|
range += 2;
|
||||||
|
xclass_props |= XCLASS_REQUIRED;
|
||||||
|
|
||||||
|
if (range_start < 256) range_start = 256;
|
||||||
|
|
||||||
|
if (lengthptr != NULL)
|
||||||
|
{
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf)
|
||||||
|
{
|
||||||
|
*lengthptr += 1;
|
||||||
|
|
||||||
|
if (range_start < range_end)
|
||||||
|
*lengthptr += PRIV(ord2utf)(range_start, class_uchardata);
|
||||||
|
|
||||||
|
*lengthptr += PRIV(ord2utf)(range_end, class_uchardata);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
*lengthptr += range_start < range_end ? 3 : 2;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf)
|
||||||
|
{
|
||||||
|
if (range_start < range_end)
|
||||||
|
{
|
||||||
|
*class_uchardata++ = XCL_RANGE;
|
||||||
|
class_uchardata += PRIV(ord2utf)(range_start, class_uchardata);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
*class_uchardata++ = XCL_SINGLE;
|
||||||
|
|
||||||
|
class_uchardata += PRIV(ord2utf)(range_end, class_uchardata);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
/* Without UTF support, character values are constrained
|
||||||
|
by the bit length, and can only be > 256 for 16-bit and
|
||||||
|
32-bit libraries. */
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
|
if (range_start < range_end)
|
||||||
|
{
|
||||||
|
*class_uchardata++ = XCL_RANGE;
|
||||||
|
*class_uchardata++ = range_start;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
*class_uchardata++ = XCL_SINGLE;
|
||||||
|
|
||||||
|
*class_uchardata++ = range_end;
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lengthptr == NULL)
|
||||||
|
cb->cx->memctl.free(cranges, cb->cx->memctl.memory_data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_WIDE_CHARS */
|
||||||
|
|
||||||
|
/* If there are characters with values > 255, or Unicode property settings
|
||||||
|
(\p or \P), we have to compile an extended class, with its own opcode,
|
||||||
|
unless there were no property settings and there was a negated special such
|
||||||
|
as \S in the class, and PCRE2_UCP is not set, because in that case all
|
||||||
|
characters > 255 are in or not in the class, so any that were explicitly
|
||||||
|
given as well can be ignored.
|
||||||
|
|
||||||
|
In the UCP case, if certain negated POSIX classes (ex: [:^ascii:]) were
|
||||||
|
were present in a class, we either have to match or not match all wide
|
||||||
|
characters (depending on whether the whole class is or is not negated).
|
||||||
|
This requirement is indicated by match_all_or_no_wide_chars being true.
|
||||||
|
We do this by including an explicit range, which works in both cases.
|
||||||
|
This applies only in UTF and 16-bit and 32-bit non-UTF modes, since there
|
||||||
|
cannot be any wide characters in 8-bit non-UTF mode.
|
||||||
|
|
||||||
|
When there *are* properties in a positive UTF-8 or any 16-bit or 32_bit
|
||||||
|
class where \S etc is present without PCRE2_UCP, causing an extended class
|
||||||
|
to be compiled, we make sure that all characters > 255 are included by
|
||||||
|
forcing match_all_or_no_wide_chars to be true.
|
||||||
|
|
||||||
|
If, when generating an xclass, there are no characters < 256, we can omit
|
||||||
|
the bitmap in the actual compiled code. */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_WIDE_CHARS /* Defined for 16/32 bits, or 8-bit with Unicode */
|
||||||
|
if ((xclass_props & XCLASS_REQUIRED) != 0)
|
||||||
|
{
|
||||||
|
PCRE2_UCHAR *previous = code;
|
||||||
|
|
||||||
|
if ((xclass_props & XCLASS_HAS_CHAR_LISTS) == 0)
|
||||||
|
*class_uchardata++ = XCL_END; /* Marks the end of extra data */
|
||||||
|
*code++ = OP_XCLASS;
|
||||||
|
code += LINK_SIZE;
|
||||||
|
*code = negate_class? XCL_NOT:0;
|
||||||
|
if ((xclass_props & XCLASS_HAS_PROPS) != 0) *code |= XCL_HASPROP;
|
||||||
|
|
||||||
|
/* If the map is required, move up the extra data to make room for it;
|
||||||
|
otherwise just move the code pointer to the end of the extra data. */
|
||||||
|
|
||||||
|
if ((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0 || has_bitmap != NULL)
|
||||||
|
{
|
||||||
|
if (negate_class)
|
||||||
|
{
|
||||||
|
uint32_t *classwords = cb->classbits.classwords;
|
||||||
|
for (int i = 0; i < 8; i++) classwords[i] = ~classwords[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (has_bitmap == NULL)
|
||||||
|
{
|
||||||
|
*code++ |= XCL_MAP;
|
||||||
|
(void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,
|
||||||
|
CU2BYTES(class_uchardata - code));
|
||||||
|
memcpy(code, classbits, 32);
|
||||||
|
code = class_uchardata + (32 / sizeof(PCRE2_UCHAR));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
code = class_uchardata;
|
||||||
|
if ((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0)
|
||||||
|
*has_bitmap = TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else code = class_uchardata;
|
||||||
|
|
||||||
|
if ((xclass_props & XCLASS_HAS_CHAR_LISTS) != 0)
|
||||||
|
{
|
||||||
|
/* Char lists size is an even number, because all items are 16 or 32
|
||||||
|
bit values. The character list data is always aligned to 32 bits. */
|
||||||
|
size_t char_lists_size = cranges->char_lists_size;
|
||||||
|
PCRE2_ASSERT((char_lists_size & 0x1) == 0 &&
|
||||||
|
(cb->char_lists_size & 0x3) == 0);
|
||||||
|
|
||||||
|
if (lengthptr != NULL)
|
||||||
|
{
|
||||||
|
char_lists_size = CLIST_ALIGN_TO(char_lists_size, sizeof(uint32_t));
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
*lengthptr += 2 + LINK_SIZE;
|
||||||
|
#else
|
||||||
|
*lengthptr += 1 + LINK_SIZE;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
cb->char_lists_size += char_lists_size;
|
||||||
|
|
||||||
|
char_lists_size /= sizeof(PCRE2_UCHAR);
|
||||||
|
|
||||||
|
/* Storage space for character lists is included
|
||||||
|
in the maximum pattern size. */
|
||||||
|
if (*lengthptr > MAX_PATTERN_SIZE ||
|
||||||
|
MAX_PATTERN_SIZE - *lengthptr < char_lists_size)
|
||||||
|
{
|
||||||
|
*errorcodeptr = ERR20; /* Pattern is too large */
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
uint8_t *data;
|
||||||
|
|
||||||
|
PCRE2_ASSERT(cranges->char_lists_types <= XCL_TYPE_MASK);
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
/* Encode as high / low bytes. */
|
||||||
|
code[0] = (uint8_t)(XCL_LIST |
|
||||||
|
(cranges->char_lists_types >> 8));
|
||||||
|
code[1] = (uint8_t)cranges->char_lists_types;
|
||||||
|
code += 2;
|
||||||
|
#else
|
||||||
|
*code++ = (PCRE2_UCHAR)(XCL_LIST | cranges->char_lists_types);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Character lists are stored in backwards direction from
|
||||||
|
byte code start. The non-dfa/dfa matchers can access these
|
||||||
|
lists using the byte code start stored in match blocks.
|
||||||
|
Each list is aligned to 32 bit with an optional unused
|
||||||
|
16 bit value at the beginning of the character list. */
|
||||||
|
|
||||||
|
cb->char_lists_size += char_lists_size;
|
||||||
|
data = (uint8_t*)cb->start_code - cb->char_lists_size;
|
||||||
|
|
||||||
|
memcpy(data, (uint8_t*)(cranges + 1) + cranges->char_lists_start,
|
||||||
|
char_lists_size);
|
||||||
|
|
||||||
|
/* Since character lists total size is less than MAX_PATTERN_SIZE,
|
||||||
|
their starting offset fits into a value which size is LINK_SIZE. */
|
||||||
|
|
||||||
|
char_lists_size = cb->char_lists_size;
|
||||||
|
PUT(code, 0, (uint32_t)(char_lists_size >> 1));
|
||||||
|
code += LINK_SIZE;
|
||||||
|
|
||||||
|
#if defined PCRE2_DEBUG || defined SUPPORT_VALGRIND
|
||||||
|
if ((char_lists_size & 0x2) != 0)
|
||||||
|
{
|
||||||
|
/* In debug the unused 16 bit value is set
|
||||||
|
to a fixed value and marked unused. */
|
||||||
|
((uint16_t*)data)[-1] = 0x5555;
|
||||||
|
#ifdef SUPPORT_VALGRIND
|
||||||
|
VALGRIND_MAKE_MEM_NOACCESS(data - 2, 2);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
cb->char_lists_size =
|
||||||
|
CLIST_ALIGN_TO(char_lists_size, sizeof(uint32_t));
|
||||||
|
|
||||||
|
cb->cx->memctl.free(cranges, cb->cx->memctl.memory_data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now fill in the complete length of the item */
|
||||||
|
|
||||||
|
PUT(previous, 1, (int)(code - previous));
|
||||||
|
goto DONE; /* End of class handling */
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_WIDE_CHARS */
|
||||||
|
|
||||||
|
/* If there are no characters > 255, or they are all to be included or
|
||||||
|
excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the
|
||||||
|
whole class was negated and whether there were negative specials such as \S
|
||||||
|
(non-UCP) in the class. Then copy the 32-byte map into the code vector,
|
||||||
|
negating it if necessary. */
|
||||||
|
|
||||||
|
if (negate_class)
|
||||||
|
{
|
||||||
|
uint32_t *classwords = cb->classbits.classwords;
|
||||||
|
|
||||||
|
for (int i = 0; i < 8; i++) classwords[i] = ~classwords[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((SELECT_VALUE8(!utf, 0) || negate_class != should_flip_negation) &&
|
||||||
|
cb->classbits.classwords[0] == ~(uint32_t)0)
|
||||||
|
{
|
||||||
|
const uint32_t *classwords = cb->classbits.classwords;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < 8; i++)
|
||||||
|
if (classwords[i] != ~(uint32_t)0) break;
|
||||||
|
|
||||||
|
if (i == 8)
|
||||||
|
{
|
||||||
|
*code++ = OP_ALLANY;
|
||||||
|
goto DONE; /* End of class handling */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
|
||||||
|
memcpy(code, classbits, 32);
|
||||||
|
code += 32 / sizeof(PCRE2_UCHAR);
|
||||||
|
|
||||||
|
DONE:
|
||||||
|
*pcode = code;
|
||||||
|
return pptr - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* ===================================================================*/
|
||||||
|
/* Here follows a block of ECLASS-compiling functions. You may well want to
|
||||||
|
read them from top to bottom; they are ordered from leafmost (at the top) to
|
||||||
|
outermost parser (at the bottom of the file). */
|
||||||
|
|
||||||
|
/* This function folds one operand using the negation operator.
|
||||||
|
The new, combined chunk of stack code is written out to *pop_info. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
fold_negation(eclass_op_info *pop_info, PCRE2_SIZE *lengthptr,
|
||||||
|
BOOL preserve_classbits)
|
||||||
|
{
|
||||||
|
/* If the chunk of stack code is already composed of multiple ops, we won't
|
||||||
|
descend in and try and propagate the negation down the tree. (That would lead
|
||||||
|
to O(n^2) compile-time, which could be exploitable with a malicious regex -
|
||||||
|
although maybe that's not really too much of a worry in a library that offers
|
||||||
|
an exponential-time matching function!) */
|
||||||
|
|
||||||
|
if (pop_info->op_single_type == 0)
|
||||||
|
{
|
||||||
|
if (lengthptr != NULL)
|
||||||
|
*lengthptr += 1;
|
||||||
|
else
|
||||||
|
pop_info->code_start[pop_info->length] = ECL_NOT;
|
||||||
|
pop_info->length += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Otherwise, it's a nice single-op item, so we can easily fold in the negation
|
||||||
|
without needing to produce an ECL_NOT. */
|
||||||
|
|
||||||
|
else if (pop_info->op_single_type == ECL_ANY ||
|
||||||
|
pop_info->op_single_type == ECL_NONE)
|
||||||
|
{
|
||||||
|
pop_info->op_single_type = (pop_info->op_single_type == ECL_NONE)?
|
||||||
|
ECL_ANY : ECL_NONE;
|
||||||
|
if (lengthptr == NULL)
|
||||||
|
*(pop_info->code_start) = pop_info->op_single_type;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PCRE2_ASSERT(pop_info->op_single_type == ECL_XCLASS &&
|
||||||
|
pop_info->length >= 1 + LINK_SIZE + 1);
|
||||||
|
if (lengthptr == NULL)
|
||||||
|
pop_info->code_start[1 + LINK_SIZE] ^= XCL_NOT;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!preserve_classbits)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < 8; i++)
|
||||||
|
pop_info->bits.classwords[i] = ~pop_info->bits.classwords[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* This function folds together two operands using a binary operator.
|
||||||
|
The new, combined chunk of stack code is written out to *lhs_op_info. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
fold_binary(int op, eclass_op_info *lhs_op_info, eclass_op_info *rhs_op_info,
|
||||||
|
PCRE2_SIZE *lengthptr)
|
||||||
|
{
|
||||||
|
switch (op)
|
||||||
|
{
|
||||||
|
/* ECL_AND truth table:
|
||||||
|
|
||||||
|
LHS RHS RESULT
|
||||||
|
----------------
|
||||||
|
ANY * RHS
|
||||||
|
* ANY LHS
|
||||||
|
NONE * NONE
|
||||||
|
* NONE NONE
|
||||||
|
X Y X & Y
|
||||||
|
*/
|
||||||
|
|
||||||
|
case ECL_AND:
|
||||||
|
if (rhs_op_info->op_single_type == ECL_ANY)
|
||||||
|
{
|
||||||
|
/* no-op: drop the RHS */
|
||||||
|
}
|
||||||
|
else if (lhs_op_info->op_single_type == ECL_ANY)
|
||||||
|
{
|
||||||
|
/* no-op: drop the LHS, and memmove the RHS into its place */
|
||||||
|
if (lengthptr == NULL)
|
||||||
|
memmove(lhs_op_info->code_start, rhs_op_info->code_start,
|
||||||
|
CU2BYTES(rhs_op_info->length));
|
||||||
|
lhs_op_info->length = rhs_op_info->length;
|
||||||
|
lhs_op_info->op_single_type = rhs_op_info->op_single_type;
|
||||||
|
}
|
||||||
|
else if (rhs_op_info->op_single_type == ECL_NONE)
|
||||||
|
{
|
||||||
|
/* the result is ECL_NONE: write into the LHS */
|
||||||
|
if (lengthptr == NULL)
|
||||||
|
lhs_op_info->code_start[0] = ECL_NONE;
|
||||||
|
lhs_op_info->length = 1;
|
||||||
|
lhs_op_info->op_single_type = ECL_NONE;
|
||||||
|
}
|
||||||
|
else if (lhs_op_info->op_single_type == ECL_NONE)
|
||||||
|
{
|
||||||
|
/* the result is ECL_NONE: drop the RHS */
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */
|
||||||
|
if (lengthptr != NULL)
|
||||||
|
*lengthptr += 1;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PCRE2_ASSERT(rhs_op_info->code_start ==
|
||||||
|
lhs_op_info->code_start + lhs_op_info->length);
|
||||||
|
rhs_op_info->code_start[rhs_op_info->length] = ECL_AND;
|
||||||
|
}
|
||||||
|
lhs_op_info->length += rhs_op_info->length + 1;
|
||||||
|
lhs_op_info->op_single_type = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 8; i++)
|
||||||
|
lhs_op_info->bits.classwords[i] &= rhs_op_info->bits.classwords[i];
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* ECL_OR truth table:
|
||||||
|
|
||||||
|
LHS RHS RESULT
|
||||||
|
----------------
|
||||||
|
ANY * ANY
|
||||||
|
* ANY ANY
|
||||||
|
NONE * RHS
|
||||||
|
* NONE LHS
|
||||||
|
X Y X | Y
|
||||||
|
*/
|
||||||
|
|
||||||
|
case ECL_OR:
|
||||||
|
if (rhs_op_info->op_single_type == ECL_NONE)
|
||||||
|
{
|
||||||
|
/* no-op: drop the RHS */
|
||||||
|
}
|
||||||
|
else if (lhs_op_info->op_single_type == ECL_NONE)
|
||||||
|
{
|
||||||
|
/* no-op: drop the LHS, and memmove the RHS into its place */
|
||||||
|
if (lengthptr == NULL)
|
||||||
|
memmove(lhs_op_info->code_start, rhs_op_info->code_start,
|
||||||
|
CU2BYTES(rhs_op_info->length));
|
||||||
|
lhs_op_info->length = rhs_op_info->length;
|
||||||
|
lhs_op_info->op_single_type = rhs_op_info->op_single_type;
|
||||||
|
}
|
||||||
|
else if (rhs_op_info->op_single_type == ECL_ANY)
|
||||||
|
{
|
||||||
|
/* the result is ECL_ANY: write into the LHS */
|
||||||
|
if (lengthptr == NULL)
|
||||||
|
lhs_op_info->code_start[0] = ECL_ANY;
|
||||||
|
lhs_op_info->length = 1;
|
||||||
|
lhs_op_info->op_single_type = ECL_ANY;
|
||||||
|
}
|
||||||
|
else if (lhs_op_info->op_single_type == ECL_ANY)
|
||||||
|
{
|
||||||
|
/* the result is ECL_ANY: drop the RHS */
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */
|
||||||
|
if (lengthptr != NULL)
|
||||||
|
*lengthptr += 1;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PCRE2_ASSERT(rhs_op_info->code_start ==
|
||||||
|
lhs_op_info->code_start + lhs_op_info->length);
|
||||||
|
rhs_op_info->code_start[rhs_op_info->length] = ECL_OR;
|
||||||
|
}
|
||||||
|
lhs_op_info->length += rhs_op_info->length + 1;
|
||||||
|
lhs_op_info->op_single_type = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 8; i++)
|
||||||
|
lhs_op_info->bits.classwords[i] |= rhs_op_info->bits.classwords[i];
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* ECL_XOR truth table:
|
||||||
|
|
||||||
|
LHS RHS RESULT
|
||||||
|
----------------
|
||||||
|
ANY * !RHS
|
||||||
|
* ANY !LHS
|
||||||
|
NONE * RHS
|
||||||
|
* NONE LHS
|
||||||
|
X Y X ^ Y
|
||||||
|
*/
|
||||||
|
|
||||||
|
case ECL_XOR:
|
||||||
|
if (rhs_op_info->op_single_type == ECL_NONE)
|
||||||
|
{
|
||||||
|
/* no-op: drop the RHS */
|
||||||
|
}
|
||||||
|
else if (lhs_op_info->op_single_type == ECL_NONE)
|
||||||
|
{
|
||||||
|
/* no-op: drop the LHS, and memmove the RHS into its place */
|
||||||
|
if (lengthptr == NULL)
|
||||||
|
memmove(lhs_op_info->code_start, rhs_op_info->code_start,
|
||||||
|
CU2BYTES(rhs_op_info->length));
|
||||||
|
lhs_op_info->length = rhs_op_info->length;
|
||||||
|
lhs_op_info->op_single_type = rhs_op_info->op_single_type;
|
||||||
|
}
|
||||||
|
else if (rhs_op_info->op_single_type == ECL_ANY)
|
||||||
|
{
|
||||||
|
/* the result is !LHS: fold in the negation, and drop the RHS */
|
||||||
|
/* Preserve the classbits, because we promise to deal with them later. */
|
||||||
|
fold_negation(lhs_op_info, lengthptr, TRUE);
|
||||||
|
}
|
||||||
|
else if (lhs_op_info->op_single_type == ECL_ANY)
|
||||||
|
{
|
||||||
|
/* the result is !RHS: drop the LHS, memmove the RHS into its place, and
|
||||||
|
fold in the negation */
|
||||||
|
if (lengthptr == NULL)
|
||||||
|
memmove(lhs_op_info->code_start, rhs_op_info->code_start,
|
||||||
|
CU2BYTES(rhs_op_info->length));
|
||||||
|
lhs_op_info->length = rhs_op_info->length;
|
||||||
|
lhs_op_info->op_single_type = rhs_op_info->op_single_type;
|
||||||
|
|
||||||
|
/* Preserve the classbits, because we promise to deal with them later. */
|
||||||
|
fold_negation(lhs_op_info, lengthptr, TRUE);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */
|
||||||
|
if (lengthptr != NULL)
|
||||||
|
*lengthptr += 1;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PCRE2_ASSERT(rhs_op_info->code_start ==
|
||||||
|
lhs_op_info->code_start + lhs_op_info->length);
|
||||||
|
rhs_op_info->code_start[rhs_op_info->length] = ECL_XOR;
|
||||||
|
}
|
||||||
|
lhs_op_info->length += rhs_op_info->length + 1;
|
||||||
|
lhs_op_info->op_single_type = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 8; i++)
|
||||||
|
lhs_op_info->bits.classwords[i] ^= rhs_op_info->bits.classwords[i];
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
PCRE2_DEBUG_UNREACHABLE();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static BOOL
|
||||||
|
compile_eclass_nested(eclass_context *context, BOOL negated,
|
||||||
|
uint32_t **pptr, PCRE2_UCHAR **pcode,
|
||||||
|
eclass_op_info *pop_info, PCRE2_SIZE *lengthptr);
|
||||||
|
|
||||||
|
/* This function consumes a group of implicitly-unioned class elements.
|
||||||
|
These can be characters, ranges, properties, or nested classes, as long
|
||||||
|
as they are all joined by being placed adjacently. */
|
||||||
|
|
||||||
|
static BOOL
|
||||||
|
compile_class_operand(eclass_context *context, BOOL negated,
|
||||||
|
uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
|
||||||
|
PCRE2_SIZE *lengthptr)
|
||||||
|
{
|
||||||
|
uint32_t *ptr = *pptr;
|
||||||
|
uint32_t *prev_ptr;
|
||||||
|
PCRE2_UCHAR *code = *pcode;
|
||||||
|
PCRE2_UCHAR *code_start = code;
|
||||||
|
PCRE2_SIZE prev_length = (lengthptr != NULL)? *lengthptr : 0;
|
||||||
|
PCRE2_SIZE extra_length;
|
||||||
|
uint32_t meta = META_CODE(*ptr);
|
||||||
|
|
||||||
|
switch (meta)
|
||||||
|
{
|
||||||
|
case META_CLASS_EMPTY_NOT:
|
||||||
|
case META_CLASS_EMPTY:
|
||||||
|
++ptr;
|
||||||
|
pop_info->length = 1;
|
||||||
|
if ((meta == META_CLASS_EMPTY) == negated)
|
||||||
|
{
|
||||||
|
*code++ = pop_info->op_single_type = ECL_ANY;
|
||||||
|
memset(pop_info->bits.classbits, 0xff, 32);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
*code++ = pop_info->op_single_type = ECL_NONE;
|
||||||
|
memset(pop_info->bits.classbits, 0, 32);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case META_CLASS:
|
||||||
|
case META_CLASS_NOT:
|
||||||
|
if ((*ptr & CLASS_IS_ECLASS) != 0)
|
||||||
|
{
|
||||||
|
if (!compile_eclass_nested(context, negated, &ptr, &code,
|
||||||
|
pop_info, lengthptr))
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
PCRE2_ASSERT(*ptr == META_CLASS_END);
|
||||||
|
ptr++;
|
||||||
|
goto DONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
ptr++;
|
||||||
|
/* Fall through */
|
||||||
|
|
||||||
|
default:
|
||||||
|
/* Scan forward characters, ranges, and properties.
|
||||||
|
For example: inside [a-z_ -- m] we don't have brackets around "a-z_" but
|
||||||
|
we still need to collect that fragment up into a "leaf" OP_CLASS. */
|
||||||
|
|
||||||
|
prev_ptr = ptr;
|
||||||
|
ptr = PRIV(compile_class_not_nested)(
|
||||||
|
context->options, context->xoptions, ptr, &code,
|
||||||
|
(meta != META_CLASS_NOT) == negated, &context->needs_bitmap,
|
||||||
|
context->errorcodeptr, context->cb, lengthptr);
|
||||||
|
if (ptr == NULL) return FALSE;
|
||||||
|
|
||||||
|
/* We must have a 100% guarantee that ptr increases when
|
||||||
|
compile_class_operand() returns, even on Release builds, so that we can
|
||||||
|
statically prove our loops terminate. */
|
||||||
|
if (ptr <= prev_ptr)
|
||||||
|
{
|
||||||
|
PCRE2_DEBUG_UNREACHABLE();
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we fell through above, consume the closing ']'. */
|
||||||
|
if (meta == META_CLASS || meta == META_CLASS_NOT)
|
||||||
|
{
|
||||||
|
PCRE2_ASSERT(*ptr == META_CLASS_END);
|
||||||
|
ptr++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Regardless of whether (lengthptr == NULL), some data will still be written
|
||||||
|
out to *pcode, which we need: we have to peek at it, to transform the opcode
|
||||||
|
into the ECLASS version (since we need to hoist up the bitmaps). */
|
||||||
|
PCRE2_ASSERT(code > code_start);
|
||||||
|
extra_length = (lengthptr != NULL)? *lengthptr - prev_length : 0;
|
||||||
|
|
||||||
|
/* Easiest case: convert OP_ALLANY to ECL_ANY */
|
||||||
|
|
||||||
|
if (*code_start == OP_ALLANY)
|
||||||
|
{
|
||||||
|
PCRE2_ASSERT(code - code_start == 1 && extra_length == 0);
|
||||||
|
pop_info->length = 1;
|
||||||
|
*code_start = pop_info->op_single_type = ECL_ANY;
|
||||||
|
memset(pop_info->bits.classbits, 0xff, 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* For OP_CLASS and OP_NCLASS, we hoist out the bitmap and convert to
|
||||||
|
ECL_NONE / ECL_ANY respectively. */
|
||||||
|
|
||||||
|
else if (*code_start == OP_CLASS || *code_start == OP_NCLASS)
|
||||||
|
{
|
||||||
|
PCRE2_ASSERT(code - code_start == 1 + 32 / sizeof(PCRE2_UCHAR) &&
|
||||||
|
extra_length == 0);
|
||||||
|
pop_info->length = 1;
|
||||||
|
*code_start = pop_info->op_single_type =
|
||||||
|
(*code_start == OP_CLASS)? ECL_NONE : ECL_ANY;
|
||||||
|
memcpy(pop_info->bits.classbits, code_start + 1, 32);
|
||||||
|
/* Rewind the code pointer, but make sure we adjust *lengthptr, because we
|
||||||
|
do need to reserve that space (even though we only use it temporarily). */
|
||||||
|
if (lengthptr != NULL)
|
||||||
|
*lengthptr += code - (code_start + 1);
|
||||||
|
code = code_start + 1;
|
||||||
|
|
||||||
|
if (!context->needs_bitmap && *code_start == ECL_NONE)
|
||||||
|
{
|
||||||
|
uint32_t *classwords = pop_info->bits.classwords;
|
||||||
|
|
||||||
|
for (int i = 0; i < 8; i++)
|
||||||
|
if (classwords[i] != 0)
|
||||||
|
{
|
||||||
|
context->needs_bitmap = TRUE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
context->needs_bitmap = TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Finally, for OP_XCLASS we hoist out the bitmap (if any), and convert to
|
||||||
|
ECL_XCLASS. */
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PCRE2_ASSERT(*code_start == OP_XCLASS);
|
||||||
|
*code_start = pop_info->op_single_type = ECL_XCLASS;
|
||||||
|
|
||||||
|
PCRE2_ASSERT(code - code_start >= 1 + LINK_SIZE + 1);
|
||||||
|
|
||||||
|
memcpy(pop_info->bits.classbits, context->cb->classbits.classbits, 32);
|
||||||
|
pop_info->length = (code - code_start) + extra_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
} /* End of switch(meta) */
|
||||||
|
|
||||||
|
pop_info->code_start = (lengthptr == NULL)? code_start : NULL;
|
||||||
|
|
||||||
|
if (lengthptr != NULL)
|
||||||
|
{
|
||||||
|
*lengthptr += code - code_start;
|
||||||
|
code = code_start;
|
||||||
|
}
|
||||||
|
|
||||||
|
DONE:
|
||||||
|
PCRE2_ASSERT(lengthptr == NULL || (code == code_start));
|
||||||
|
|
||||||
|
*pptr = ptr;
|
||||||
|
*pcode = code;
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* This function consumes a group of implicitly-unioned class elements.
|
||||||
|
These can be characters, ranges, properties, or nested classes, as long
|
||||||
|
as they are all joined by being placed adjacently. */
|
||||||
|
|
||||||
|
static BOOL
|
||||||
|
compile_class_juxtaposition(eclass_context *context, BOOL negated,
|
||||||
|
uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
|
||||||
|
PCRE2_SIZE *lengthptr)
|
||||||
|
{
|
||||||
|
uint32_t *ptr = *pptr;
|
||||||
|
PCRE2_UCHAR *code = *pcode;
|
||||||
|
#ifdef PCRE2_DEBUG
|
||||||
|
PCRE2_UCHAR *start_code = *pcode;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* See compile_class_binary_loose() for comments on compile-time folding of
|
||||||
|
the "negated" flag. */
|
||||||
|
|
||||||
|
/* Because it's a non-empty class, there must be an operand at the start. */
|
||||||
|
if (!compile_class_operand(context, negated, &ptr, &code, pop_info, lengthptr))
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
while (*ptr != META_CLASS_END &&
|
||||||
|
!(*ptr >= META_ECLASS_AND && *ptr <= META_ECLASS_NOT))
|
||||||
|
{
|
||||||
|
uint32_t op;
|
||||||
|
BOOL rhs_negated;
|
||||||
|
eclass_op_info rhs_op_info;
|
||||||
|
|
||||||
|
if (negated)
|
||||||
|
{
|
||||||
|
/* !(A juxtapose B) -> !A && !B */
|
||||||
|
op = ECL_AND;
|
||||||
|
rhs_negated = TRUE;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* A juxtapose B -> A || B */
|
||||||
|
op = ECL_OR;
|
||||||
|
rhs_negated = FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* An operand must follow the operator. */
|
||||||
|
if (!compile_class_operand(context, rhs_negated, &ptr, &code,
|
||||||
|
&rhs_op_info, lengthptr))
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
/* Convert infix to postfix (RPN). */
|
||||||
|
fold_binary(op, pop_info, &rhs_op_info, lengthptr);
|
||||||
|
if (lengthptr == NULL)
|
||||||
|
code = pop_info->code_start + pop_info->length;
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_ASSERT(lengthptr == NULL || code == start_code);
|
||||||
|
|
||||||
|
*pptr = ptr;
|
||||||
|
*pcode = code;
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* This function consumes unary prefix operators. */
|
||||||
|
|
||||||
|
static BOOL
|
||||||
|
compile_class_unary(eclass_context *context, BOOL negated,
|
||||||
|
uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
|
||||||
|
PCRE2_SIZE *lengthptr)
|
||||||
|
{
|
||||||
|
uint32_t *ptr = *pptr;
|
||||||
|
#ifdef PCRE2_DEBUG
|
||||||
|
PCRE2_UCHAR *start_code = *pcode;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
while (*ptr == META_ECLASS_NOT)
|
||||||
|
{
|
||||||
|
++ptr;
|
||||||
|
negated = !negated;
|
||||||
|
}
|
||||||
|
|
||||||
|
*pptr = ptr;
|
||||||
|
/* Because it's a non-empty class, there must be an operand. */
|
||||||
|
if (!compile_class_juxtaposition(context, negated, pptr, pcode,
|
||||||
|
pop_info, lengthptr))
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
PCRE2_ASSERT(lengthptr == NULL || *pcode == start_code);
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* This function consumes tightly-binding binary operators. */
|
||||||
|
|
||||||
|
static BOOL
|
||||||
|
compile_class_binary_tight(eclass_context *context, BOOL negated,
|
||||||
|
uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
|
||||||
|
PCRE2_SIZE *lengthptr)
|
||||||
|
{
|
||||||
|
uint32_t *ptr = *pptr;
|
||||||
|
PCRE2_UCHAR *code = *pcode;
|
||||||
|
#ifdef PCRE2_DEBUG
|
||||||
|
PCRE2_UCHAR *start_code = *pcode;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* See compile_class_binary_loose() for comments on compile-time folding of
|
||||||
|
the "negated" flag. */
|
||||||
|
|
||||||
|
/* Because it's a non-empty class, there must be an operand at the start. */
|
||||||
|
if (!compile_class_unary(context, negated, &ptr, &code, pop_info, lengthptr))
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
while (*ptr == META_ECLASS_AND)
|
||||||
|
{
|
||||||
|
uint32_t op;
|
||||||
|
BOOL rhs_negated;
|
||||||
|
eclass_op_info rhs_op_info;
|
||||||
|
|
||||||
|
if (negated)
|
||||||
|
{
|
||||||
|
/* !(A && B) -> !A || !B */
|
||||||
|
op = ECL_OR;
|
||||||
|
rhs_negated = TRUE;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* A && B -> A && B */
|
||||||
|
op = ECL_AND;
|
||||||
|
rhs_negated = FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
++ptr;
|
||||||
|
|
||||||
|
/* An operand must follow the operator. */
|
||||||
|
if (!compile_class_unary(context, rhs_negated, &ptr, &code,
|
||||||
|
&rhs_op_info, lengthptr))
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
/* Convert infix to postfix (RPN). */
|
||||||
|
fold_binary(op, pop_info, &rhs_op_info, lengthptr);
|
||||||
|
if (lengthptr == NULL)
|
||||||
|
code = pop_info->code_start + pop_info->length;
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_ASSERT(lengthptr == NULL || code == start_code);
|
||||||
|
|
||||||
|
*pptr = ptr;
|
||||||
|
*pcode = code;
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* This function consumes loosely-binding binary operators. */
|
||||||
|
|
||||||
|
static BOOL
|
||||||
|
compile_class_binary_loose(eclass_context *context, BOOL negated,
|
||||||
|
uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
|
||||||
|
PCRE2_SIZE *lengthptr)
|
||||||
|
{
|
||||||
|
uint32_t *ptr = *pptr;
|
||||||
|
PCRE2_UCHAR *code = *pcode;
|
||||||
|
#ifdef PCRE2_DEBUG
|
||||||
|
PCRE2_UCHAR *start_code = *pcode;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* We really want to fold the negation operator, if at all possible, so that
|
||||||
|
simple cases can be reduced down. In particular, in 8-bit no-UTF mode, we want
|
||||||
|
to produce a fully-folded expression, so that we can guarantee not to emit any
|
||||||
|
OP_ECLASS codes (in the same way that we never emit OP_XCLASS in this mode).
|
||||||
|
|
||||||
|
This has the consequence that with a little ingenuity, we can in fact avoid
|
||||||
|
emitting (nearly...) all cases of the "NOT" operator. Imagine that we have:
|
||||||
|
!(A ...
|
||||||
|
We have parsed the preceding "!", and we are about to parse the "A" operand. We
|
||||||
|
don't know yet whether there will even be a following binary operand! Both of
|
||||||
|
these are possibilities for what follows:
|
||||||
|
!(A && B)
|
||||||
|
!(A)
|
||||||
|
However, we can still fold the "!" into the "A" operand, because no matter what
|
||||||
|
the following binary operator will be, we can produce an expression which is
|
||||||
|
equivalent. */
|
||||||
|
|
||||||
|
/* Because it's a non-empty class, there must be an operand at the start. */
|
||||||
|
if (!compile_class_binary_tight(context, negated, &ptr, &code,
|
||||||
|
pop_info, lengthptr))
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
while (*ptr >= META_ECLASS_OR && *ptr <= META_ECLASS_XOR)
|
||||||
|
{
|
||||||
|
uint32_t op;
|
||||||
|
BOOL op_neg;
|
||||||
|
BOOL rhs_negated;
|
||||||
|
eclass_op_info rhs_op_info;
|
||||||
|
|
||||||
|
if (negated)
|
||||||
|
{
|
||||||
|
/* The whole expression is being negated; we respond by unconditionally
|
||||||
|
negating the LHS A, before seeing what follows. And hooray! We can recover,
|
||||||
|
no matter what follows. */
|
||||||
|
/* !(A || B) -> !A && !B */
|
||||||
|
/* !(A -- B) -> !(A && !B) -> !A || B */
|
||||||
|
/* !(A XOR B) -> !(!A XOR !B) -> !A XNOR !B */
|
||||||
|
op = (*ptr == META_ECLASS_OR )? ECL_AND :
|
||||||
|
(*ptr == META_ECLASS_SUB)? ECL_OR :
|
||||||
|
/*ptr == META_ECLASS_XOR*/ ECL_XOR;
|
||||||
|
op_neg = (*ptr == META_ECLASS_XOR);
|
||||||
|
rhs_negated = *ptr != META_ECLASS_SUB;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* A || B -> A || B */
|
||||||
|
/* A -- B -> A && !B */
|
||||||
|
/* A XOR B -> A XOR B */
|
||||||
|
op = (*ptr == META_ECLASS_OR )? ECL_OR :
|
||||||
|
(*ptr == META_ECLASS_SUB)? ECL_AND :
|
||||||
|
/*ptr == META_ECLASS_XOR*/ ECL_XOR;
|
||||||
|
op_neg = FALSE;
|
||||||
|
rhs_negated = *ptr == META_ECLASS_SUB;
|
||||||
|
}
|
||||||
|
|
||||||
|
++ptr;
|
||||||
|
|
||||||
|
/* An operand must follow the operator. */
|
||||||
|
if (!compile_class_binary_tight(context, rhs_negated, &ptr, &code,
|
||||||
|
&rhs_op_info, lengthptr))
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
/* Convert infix to postfix (RPN). */
|
||||||
|
fold_binary(op, pop_info, &rhs_op_info, lengthptr);
|
||||||
|
if (op_neg) fold_negation(pop_info, lengthptr, FALSE);
|
||||||
|
if (lengthptr == NULL)
|
||||||
|
code = pop_info->code_start + pop_info->length;
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_ASSERT(lengthptr == NULL || code == start_code);
|
||||||
|
|
||||||
|
*pptr = ptr;
|
||||||
|
*pcode = code;
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* This function converts the META codes in pptr into opcodes written to
|
||||||
|
pcode. The pptr must start at a META_CLASS or META_CLASS_NOT.
|
||||||
|
|
||||||
|
The class is compiled as a left-associative sequence of operator
|
||||||
|
applications.
|
||||||
|
|
||||||
|
The pptr will be left pointing at the matching META_CLASS_END. */
|
||||||
|
|
||||||
|
static BOOL
|
||||||
|
compile_eclass_nested(eclass_context *context, BOOL negated,
|
||||||
|
uint32_t **pptr, PCRE2_UCHAR **pcode,
|
||||||
|
eclass_op_info *pop_info, PCRE2_SIZE *lengthptr)
|
||||||
|
{
|
||||||
|
uint32_t *ptr = *pptr;
|
||||||
|
#ifdef PCRE2_DEBUG
|
||||||
|
PCRE2_UCHAR *start_code = *pcode;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* The CLASS_IS_ECLASS bit must be set since it is a nested class. */
|
||||||
|
PCRE2_ASSERT(*ptr == (META_CLASS | CLASS_IS_ECLASS) ||
|
||||||
|
*ptr == (META_CLASS_NOT | CLASS_IS_ECLASS));
|
||||||
|
|
||||||
|
if (*ptr++ == (META_CLASS_NOT | CLASS_IS_ECLASS))
|
||||||
|
negated = !negated;
|
||||||
|
|
||||||
|
(*pptr)++;
|
||||||
|
|
||||||
|
/* Because it's a non-empty class, there must be an operand at the start. */
|
||||||
|
if (!compile_class_binary_loose(context, negated, pptr, pcode,
|
||||||
|
pop_info, lengthptr))
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
PCRE2_ASSERT(**pptr == META_CLASS_END);
|
||||||
|
PCRE2_ASSERT(lengthptr == NULL || *pcode == start_code);
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOL
|
||||||
|
PRIV(compile_class_nested)(uint32_t options, uint32_t xoptions,
|
||||||
|
uint32_t **pptr, PCRE2_UCHAR **pcode, int *errorcodeptr,
|
||||||
|
compile_block *cb, PCRE2_SIZE *lengthptr)
|
||||||
|
{
|
||||||
|
eclass_context context;
|
||||||
|
eclass_op_info op_info;
|
||||||
|
PCRE2_SIZE previous_length = (lengthptr != NULL)? *lengthptr : 0;
|
||||||
|
PCRE2_UCHAR *code = *pcode;
|
||||||
|
PCRE2_UCHAR *previous;
|
||||||
|
BOOL allbitsone = TRUE;
|
||||||
|
|
||||||
|
context.needs_bitmap = FALSE;
|
||||||
|
context.options = options;
|
||||||
|
context.xoptions = xoptions;
|
||||||
|
context.errorcodeptr = errorcodeptr;
|
||||||
|
context.cb = cb;
|
||||||
|
|
||||||
|
previous = code;
|
||||||
|
*code++ = OP_ECLASS;
|
||||||
|
code += LINK_SIZE;
|
||||||
|
*code++ = 0; /* Flags, currently zero. */
|
||||||
|
if (!compile_eclass_nested(&context, FALSE, pptr, &code, &op_info, lengthptr))
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
if (lengthptr != NULL)
|
||||||
|
{
|
||||||
|
*lengthptr += code - previous;
|
||||||
|
code = previous;
|
||||||
|
/* (*lengthptr - previous_length) now holds the amount of buffer that
|
||||||
|
we require to make the call to compile_class_nested() with
|
||||||
|
lengthptr = NULL, and including the (1+LINK_SIZE+1) that we write out
|
||||||
|
before that call. */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Do some useful counting of what's in the bitmap. */
|
||||||
|
for (int i = 0; i < 8; i++)
|
||||||
|
if (op_info.bits.classwords[i] != 0xffffffff)
|
||||||
|
{
|
||||||
|
allbitsone = FALSE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* After constant-folding the extended class syntax, it may turn out to be
|
||||||
|
a simple class after all. In that case, we can unwrap it from the
|
||||||
|
OP_ECLASS container - and in fact, we must do so, because in 8-bit
|
||||||
|
no-Unicode mode the matcher is compiled without support for OP_ECLASS. */
|
||||||
|
|
||||||
|
#ifndef SUPPORT_WIDE_CHARS
|
||||||
|
PCRE2_ASSERT(op_info.op_single_type != 0);
|
||||||
|
#else
|
||||||
|
if (op_info.op_single_type != 0)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
/* Rewind back over the OP_ECLASS. */
|
||||||
|
code = previous;
|
||||||
|
|
||||||
|
/* If the bits are all ones, and the "high characters" are all matched
|
||||||
|
too, we use a special-cased encoding of OP_ALLANY. */
|
||||||
|
|
||||||
|
if (op_info.op_single_type == ECL_ANY && allbitsone)
|
||||||
|
{
|
||||||
|
/* Advancing code means rewinding lengthptr, at this point. */
|
||||||
|
if (lengthptr != NULL) *lengthptr -= 1;
|
||||||
|
*code++ = OP_ALLANY;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If the high bits are all matched / all not-matched, then we emit an
|
||||||
|
OP_NCLASS/OP_CLASS respectively. */
|
||||||
|
|
||||||
|
else if (op_info.op_single_type == ECL_ANY ||
|
||||||
|
op_info.op_single_type == ECL_NONE)
|
||||||
|
{
|
||||||
|
PCRE2_SIZE required_len = 1 + (32 / sizeof(PCRE2_UCHAR));
|
||||||
|
|
||||||
|
if (lengthptr != NULL)
|
||||||
|
{
|
||||||
|
if (required_len > (*lengthptr - previous_length))
|
||||||
|
*lengthptr = previous_length + required_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Advancing code means rewinding lengthptr, at this point. */
|
||||||
|
if (lengthptr != NULL) *lengthptr -= required_len;
|
||||||
|
*code++ = (op_info.op_single_type == ECL_ANY)? OP_NCLASS : OP_CLASS;
|
||||||
|
memcpy(code, op_info.bits.classbits, 32);
|
||||||
|
code += 32 / sizeof(PCRE2_UCHAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Otherwise, we have an ECL_XCLASS, so we have the OP_XCLASS data
|
||||||
|
there, but, we pulled out its bitmap into op_info, so now we have to
|
||||||
|
put that back into the OP_XCLASS. */
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
#ifndef SUPPORT_WIDE_CHARS
|
||||||
|
PCRE2_DEBUG_UNREACHABLE();
|
||||||
|
#else
|
||||||
|
BOOL need_map = context.needs_bitmap;
|
||||||
|
PCRE2_SIZE required_len;
|
||||||
|
|
||||||
|
PCRE2_ASSERT(op_info.op_single_type == ECL_XCLASS);
|
||||||
|
required_len = op_info.length + (need_map? 32/sizeof(PCRE2_UCHAR) : 0);
|
||||||
|
|
||||||
|
if (lengthptr != NULL)
|
||||||
|
{
|
||||||
|
/* Don't unconditionally request all the space we need - we may
|
||||||
|
already have asked for more during processing of the ECLASS. */
|
||||||
|
if (required_len > (*lengthptr - previous_length))
|
||||||
|
*lengthptr = previous_length + required_len;
|
||||||
|
|
||||||
|
/* The code we write out here won't be ignored, even during the
|
||||||
|
(lengthptr != NULL) phase, because if there's a following quantifier
|
||||||
|
it will peek backwards. So we do have to write out a (truncated)
|
||||||
|
OP_XCLASS, even on this branch. */
|
||||||
|
*lengthptr -= 1 + LINK_SIZE + 1;
|
||||||
|
*code++ = OP_XCLASS;
|
||||||
|
PUT(code, 0, 1 + LINK_SIZE + 1);
|
||||||
|
code += LINK_SIZE;
|
||||||
|
*code++ = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PCRE2_UCHAR *rest;
|
||||||
|
PCRE2_SIZE rest_len;
|
||||||
|
PCRE2_UCHAR flags;
|
||||||
|
|
||||||
|
/* 1 unit: OP_XCLASS | LINK_SIZE units | 1 unit: flags | ...rest */
|
||||||
|
PCRE2_ASSERT(op_info.length >= 1 + LINK_SIZE + 1);
|
||||||
|
rest = op_info.code_start + 1 + LINK_SIZE + 1;
|
||||||
|
rest_len = (op_info.code_start + op_info.length) - rest;
|
||||||
|
|
||||||
|
/* First read any data we use, before memmove splats it. */
|
||||||
|
flags = op_info.code_start[1 + LINK_SIZE];
|
||||||
|
PCRE2_ASSERT((flags & XCL_MAP) == 0);
|
||||||
|
|
||||||
|
/* Next do the memmove before any writes. */
|
||||||
|
memmove(code + 1 + LINK_SIZE + 1 + (need_map? 32/sizeof(PCRE2_UCHAR) : 0),
|
||||||
|
rest, CU2BYTES(rest_len));
|
||||||
|
|
||||||
|
/* Finally write the header data. */
|
||||||
|
*code++ = OP_XCLASS;
|
||||||
|
PUT(code, 0, (int)required_len);
|
||||||
|
code += LINK_SIZE;
|
||||||
|
*code++ = flags | (need_map? XCL_MAP : 0);
|
||||||
|
if (need_map)
|
||||||
|
{
|
||||||
|
memcpy(code, op_info.bits.classbits, 32);
|
||||||
|
code += 32 / sizeof(PCRE2_UCHAR);
|
||||||
|
}
|
||||||
|
code += rest_len;
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_WIDE_CHARS */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Otherwise, we're going to keep the OP_ECLASS. However, again we need
|
||||||
|
to do some adjustment to insert the bitmap if we have one. */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_WIDE_CHARS
|
||||||
|
else
|
||||||
|
{
|
||||||
|
BOOL need_map = context.needs_bitmap;
|
||||||
|
PCRE2_SIZE required_len =
|
||||||
|
1 + LINK_SIZE + 1 + (need_map? 32/sizeof(PCRE2_UCHAR) : 0) + op_info.length;
|
||||||
|
|
||||||
|
if (lengthptr != NULL)
|
||||||
|
{
|
||||||
|
if (required_len > (*lengthptr - previous_length))
|
||||||
|
*lengthptr = previous_length + required_len;
|
||||||
|
|
||||||
|
/* As for the XCLASS branch above, we do have to write out a dummy
|
||||||
|
OP_ECLASS, because of the backwards peek by the quantifier code. Write
|
||||||
|
out a (truncated) OP_ECLASS, even on this branch. */
|
||||||
|
*lengthptr -= 1 + LINK_SIZE + 1;
|
||||||
|
*code++ = OP_ECLASS;
|
||||||
|
PUT(code, 0, 1 + LINK_SIZE + 1);
|
||||||
|
code += LINK_SIZE;
|
||||||
|
*code++ = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (need_map)
|
||||||
|
{
|
||||||
|
PCRE2_UCHAR *map_start = previous + 1 + LINK_SIZE + 1;
|
||||||
|
previous[1 + LINK_SIZE] |= ECL_MAP;
|
||||||
|
memmove(map_start + 32/sizeof(PCRE2_UCHAR), map_start,
|
||||||
|
CU2BYTES(code - map_start));
|
||||||
|
memcpy(map_start, op_info.bits.classbits, 32);
|
||||||
|
code += 32 / sizeof(PCRE2_UCHAR);
|
||||||
|
}
|
||||||
|
PUT(previous, 1, (int)(code - previous));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_WIDE_CHARS */
|
||||||
|
|
||||||
|
*pcode = code;
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre2_compile_class.c */
|
||||||
252
3rd/pcre2/src/pcre2_config.c
Normal file
252
3rd/pcre2/src/pcre2_config.c
Normal file
@@ -0,0 +1,252 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes
|
||||||
|
its value gets changed by pcre2_intmodedep.h (included by pcre2_internal.h) to
|
||||||
|
be in code units. */
|
||||||
|
|
||||||
|
static int configured_link_size = LINK_SIZE;
|
||||||
|
|
||||||
|
#include "pcre2_internal.h"
|
||||||
|
|
||||||
|
/* These macros are the standard way of turning unquoted text into C strings.
|
||||||
|
They allow macros like PCRE2_MAJOR to be defined without quotes, which is
|
||||||
|
convenient for user programs that want to test their values. */
|
||||||
|
|
||||||
|
#define STRING(a) # a
|
||||||
|
#define XSTRING(s) STRING(s)
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Return info about what features are configured *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* If where is NULL, the length of memory required is returned.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
what what information is required
|
||||||
|
where where to put the information
|
||||||
|
|
||||||
|
Returns: 0 if a numerical value is returned
|
||||||
|
>= 0 if a string value
|
||||||
|
PCRE2_ERROR_BADOPTION if "where" not recognized
|
||||||
|
or JIT target requested when JIT not enabled
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_config(uint32_t what, void *where)
|
||||||
|
{
|
||||||
|
if (where == NULL) /* Requests a length */
|
||||||
|
{
|
||||||
|
switch(what)
|
||||||
|
{
|
||||||
|
default:
|
||||||
|
return PCRE2_ERROR_BADOPTION;
|
||||||
|
|
||||||
|
case PCRE2_CONFIG_BSR:
|
||||||
|
case PCRE2_CONFIG_COMPILED_WIDTHS:
|
||||||
|
case PCRE2_CONFIG_DEPTHLIMIT:
|
||||||
|
case PCRE2_CONFIG_HEAPLIMIT:
|
||||||
|
case PCRE2_CONFIG_JIT:
|
||||||
|
case PCRE2_CONFIG_LINKSIZE:
|
||||||
|
case PCRE2_CONFIG_MATCHLIMIT:
|
||||||
|
case PCRE2_CONFIG_NEVER_BACKSLASH_C:
|
||||||
|
case PCRE2_CONFIG_NEWLINE:
|
||||||
|
case PCRE2_CONFIG_PARENSLIMIT:
|
||||||
|
case PCRE2_CONFIG_STACKRECURSE: /* Obsolete */
|
||||||
|
case PCRE2_CONFIG_TABLES_LENGTH:
|
||||||
|
case PCRE2_CONFIG_UNICODE:
|
||||||
|
return sizeof(uint32_t);
|
||||||
|
|
||||||
|
/* These are handled below */
|
||||||
|
|
||||||
|
case PCRE2_CONFIG_JITTARGET:
|
||||||
|
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||||
|
case PCRE2_CONFIG_VERSION:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (what)
|
||||||
|
{
|
||||||
|
default:
|
||||||
|
return PCRE2_ERROR_BADOPTION;
|
||||||
|
|
||||||
|
case PCRE2_CONFIG_BSR:
|
||||||
|
#ifdef BSR_ANYCRLF
|
||||||
|
*((uint32_t *)where) = PCRE2_BSR_ANYCRLF;
|
||||||
|
#else
|
||||||
|
*((uint32_t *)where) = PCRE2_BSR_UNICODE;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE2_CONFIG_COMPILED_WIDTHS:
|
||||||
|
*((uint32_t *)where) = 0
|
||||||
|
#ifdef SUPPORT_PCRE2_8
|
||||||
|
+ 1
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_16
|
||||||
|
+ 2
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_32
|
||||||
|
+ 4
|
||||||
|
#endif
|
||||||
|
;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE2_CONFIG_DEPTHLIMIT:
|
||||||
|
*((uint32_t *)where) = MATCH_LIMIT_DEPTH;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE2_CONFIG_HEAPLIMIT:
|
||||||
|
*((uint32_t *)where) = HEAP_LIMIT;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE2_CONFIG_JIT:
|
||||||
|
#ifdef SUPPORT_JIT
|
||||||
|
*((uint32_t *)where) = 1;
|
||||||
|
#else
|
||||||
|
*((uint32_t *)where) = 0;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE2_CONFIG_JITTARGET:
|
||||||
|
#ifdef SUPPORT_JIT
|
||||||
|
{
|
||||||
|
const char *v = PRIV(jit_get_target)();
|
||||||
|
return (int)(1 + ((where == NULL)?
|
||||||
|
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
return PCRE2_ERROR_BADOPTION;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
case PCRE2_CONFIG_LINKSIZE:
|
||||||
|
*((uint32_t *)where) = (uint32_t)configured_link_size;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE2_CONFIG_MATCHLIMIT:
|
||||||
|
*((uint32_t *)where) = MATCH_LIMIT;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE2_CONFIG_NEWLINE:
|
||||||
|
*((uint32_t *)where) = NEWLINE_DEFAULT;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE2_CONFIG_NEVER_BACKSLASH_C:
|
||||||
|
#ifdef NEVER_BACKSLASH_C
|
||||||
|
*((uint32_t *)where) = 1;
|
||||||
|
#else
|
||||||
|
*((uint32_t *)where) = 0;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE2_CONFIG_PARENSLIMIT:
|
||||||
|
*((uint32_t *)where) = PARENS_NEST_LIMIT;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* This is now obsolete. The stack is no longer used via recursion for
|
||||||
|
handling backtracking in pcre2_match(). */
|
||||||
|
|
||||||
|
case PCRE2_CONFIG_STACKRECURSE:
|
||||||
|
*((uint32_t *)where) = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE2_CONFIG_TABLES_LENGTH:
|
||||||
|
*((uint32_t *)where) = TABLES_LENGTH;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||||
|
{
|
||||||
|
#if defined SUPPORT_UNICODE
|
||||||
|
const char *v = PRIV(unicode_version);
|
||||||
|
#else
|
||||||
|
const char *v = "Unicode not supported";
|
||||||
|
#endif
|
||||||
|
return (int)(1 + ((where == NULL)?
|
||||||
|
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE2_CONFIG_UNICODE:
|
||||||
|
#if defined SUPPORT_UNICODE
|
||||||
|
*((uint32_t *)where) = 1;
|
||||||
|
#else
|
||||||
|
*((uint32_t *)where) = 0;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* The hackery in setting "v" below is to cope with the case when
|
||||||
|
PCRE2_PRERELEASE is set to an empty string (which it is for real releases).
|
||||||
|
If the second alternative is used in this case, it does not leave a space
|
||||||
|
before the date. On the other hand, if all four macros are put into a single
|
||||||
|
XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted.
|
||||||
|
There are problems using an "obvious" approach like this:
|
||||||
|
|
||||||
|
XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE2_MINOR)
|
||||||
|
XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE2_DATE)
|
||||||
|
|
||||||
|
because, when PCRE2_PRERELEASE is empty, this leads to an attempted expansion
|
||||||
|
of STRING(). The C standard states: "If (before argument substitution) any
|
||||||
|
argument consists of no preprocessing tokens, the behavior is undefined." It
|
||||||
|
turns out the gcc treats this case as a single empty string - which is what
|
||||||
|
we really want - but Visual C grumbles about the lack of an argument for the
|
||||||
|
macro. Unfortunately, both are within their rights. As there seems to be no
|
||||||
|
way to test for a macro's value being empty at compile time, we have to
|
||||||
|
resort to a runtime test. */
|
||||||
|
|
||||||
|
case PCRE2_CONFIG_VERSION:
|
||||||
|
{
|
||||||
|
const char *v = (XSTRING(Z PCRE2_PRERELEASE)[1] == 0)?
|
||||||
|
XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
|
||||||
|
XSTRING(PCRE2_MAJOR.PCRE2_MINOR) XSTRING(PCRE2_PRERELEASE PCRE2_DATE);
|
||||||
|
return (int)(1 + ((where == NULL)?
|
||||||
|
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre2_config.c */
|
||||||
556
3rd/pcre2/src/pcre2_context.c
Normal file
556
3rd/pcre2/src/pcre2_context.c
Normal file
@@ -0,0 +1,556 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "pcre2_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Default malloc/free functions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* Ignore the "user data" argument in each case. */
|
||||||
|
|
||||||
|
static void *default_malloc(size_t size, void *data)
|
||||||
|
{
|
||||||
|
(void)data;
|
||||||
|
return malloc(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void default_free(void *block, void *data)
|
||||||
|
{
|
||||||
|
(void)data;
|
||||||
|
free(block);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Get a block and save memory control *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This internal function is called to get a block of memory in which the
|
||||||
|
memory control data is to be stored at the start for future use.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
size amount of memory required
|
||||||
|
memctl pointer to a memctl block or NULL
|
||||||
|
|
||||||
|
Returns: pointer to memory or NULL on failure
|
||||||
|
*/
|
||||||
|
|
||||||
|
extern void *
|
||||||
|
PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl)
|
||||||
|
{
|
||||||
|
pcre2_memctl *newmemctl;
|
||||||
|
void *yield = (memctl == NULL)? malloc(size) :
|
||||||
|
memctl->malloc(size, memctl->memory_data);
|
||||||
|
if (yield == NULL) return NULL;
|
||||||
|
newmemctl = (pcre2_memctl *)yield;
|
||||||
|
if (memctl == NULL)
|
||||||
|
{
|
||||||
|
newmemctl->malloc = default_malloc;
|
||||||
|
newmemctl->free = default_free;
|
||||||
|
newmemctl->memory_data = NULL;
|
||||||
|
}
|
||||||
|
else *newmemctl = *memctl;
|
||||||
|
return yield;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Create and initialize contexts *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* Initializing for compile and match contexts is done in separate, private
|
||||||
|
functions so that these can be called from functions such as pcre2_compile()
|
||||||
|
when an external context is not supplied. The initializing functions have an
|
||||||
|
option to set up default memory management. */
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_general_context_create(void *(*private_malloc)(size_t, void *),
|
||||||
|
void (*private_free)(void *, void *), void *memory_data)
|
||||||
|
{
|
||||||
|
pcre2_general_context *gcontext;
|
||||||
|
if (private_malloc == NULL) private_malloc = default_malloc;
|
||||||
|
if (private_free == NULL) private_free = default_free;
|
||||||
|
gcontext = private_malloc(sizeof(pcre2_real_general_context), memory_data);
|
||||||
|
if (gcontext == NULL) return NULL;
|
||||||
|
gcontext->memctl.malloc = private_malloc;
|
||||||
|
gcontext->memctl.free = private_free;
|
||||||
|
gcontext->memctl.memory_data = memory_data;
|
||||||
|
return gcontext;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* A default compile context is set up to save having to initialize at run time
|
||||||
|
when no context is supplied to the compile function. */
|
||||||
|
|
||||||
|
pcre2_compile_context PRIV(default_compile_context) = {
|
||||||
|
{ default_malloc, default_free, NULL }, /* Default memory handling */
|
||||||
|
NULL, /* Stack guard */
|
||||||
|
NULL, /* Stack guard data */
|
||||||
|
PRIV(default_tables), /* Character tables */
|
||||||
|
PCRE2_UNSET, /* Max pattern length */
|
||||||
|
PCRE2_UNSET, /* Max pattern compiled length */
|
||||||
|
BSR_DEFAULT, /* Backslash R default */
|
||||||
|
NEWLINE_DEFAULT, /* Newline convention */
|
||||||
|
PARENS_NEST_LIMIT, /* As it says */
|
||||||
|
0, /* Extra options */
|
||||||
|
MAX_VARLOOKBEHIND, /* As it says */
|
||||||
|
PCRE2_OPTIMIZATION_ALL /* All optimizations enabled */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* The create function copies the default into the new memory, but must
|
||||||
|
override the default memory handling functions if a gcontext was provided. */
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_compile_context_create(pcre2_general_context *gcontext)
|
||||||
|
{
|
||||||
|
pcre2_compile_context *ccontext = PRIV(memctl_malloc)(
|
||||||
|
sizeof(pcre2_real_compile_context), (pcre2_memctl *)gcontext);
|
||||||
|
if (ccontext == NULL) return NULL;
|
||||||
|
*ccontext = PRIV(default_compile_context);
|
||||||
|
if (gcontext != NULL)
|
||||||
|
*((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
|
||||||
|
return ccontext;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* A default match context is set up to save having to initialize at run time
|
||||||
|
when no context is supplied to a match function. */
|
||||||
|
|
||||||
|
pcre2_match_context PRIV(default_match_context) = {
|
||||||
|
{ default_malloc, default_free, NULL },
|
||||||
|
#ifdef SUPPORT_JIT
|
||||||
|
NULL, /* JIT callback */
|
||||||
|
NULL, /* JIT callback data */
|
||||||
|
#endif
|
||||||
|
NULL, /* Callout function */
|
||||||
|
NULL, /* Callout data */
|
||||||
|
NULL, /* Substitute callout function */
|
||||||
|
NULL, /* Substitute callout data */
|
||||||
|
NULL, /* Substitute case callout function */
|
||||||
|
NULL, /* Substitute case callout data */
|
||||||
|
PCRE2_UNSET, /* Offset limit */
|
||||||
|
HEAP_LIMIT,
|
||||||
|
MATCH_LIMIT,
|
||||||
|
MATCH_LIMIT_DEPTH };
|
||||||
|
|
||||||
|
/* The create function copies the default into the new memory, but must
|
||||||
|
override the default memory handling functions if a gcontext was provided. */
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_match_context_create(pcre2_general_context *gcontext)
|
||||||
|
{
|
||||||
|
pcre2_match_context *mcontext = PRIV(memctl_malloc)(
|
||||||
|
sizeof(pcre2_real_match_context), (pcre2_memctl *)gcontext);
|
||||||
|
if (mcontext == NULL) return NULL;
|
||||||
|
*mcontext = PRIV(default_match_context);
|
||||||
|
if (gcontext != NULL)
|
||||||
|
*((pcre2_memctl *)mcontext) = *((pcre2_memctl *)gcontext);
|
||||||
|
return mcontext;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* A default convert context is set up to save having to initialize at run time
|
||||||
|
when no context is supplied to the convert function. */
|
||||||
|
|
||||||
|
pcre2_convert_context PRIV(default_convert_context) = {
|
||||||
|
{ default_malloc, default_free, NULL }, /* Default memory handling */
|
||||||
|
#ifdef _WIN32
|
||||||
|
CHAR_BACKSLASH, /* Default path separator */
|
||||||
|
CHAR_GRAVE_ACCENT /* Default escape character */
|
||||||
|
#else /* Not Windows */
|
||||||
|
CHAR_SLASH, /* Default path separator */
|
||||||
|
CHAR_BACKSLASH /* Default escape character */
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
/* The create function copies the default into the new memory, but must
|
||||||
|
override the default memory handling functions if a gcontext was provided. */
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_convert_context_create(pcre2_general_context *gcontext)
|
||||||
|
{
|
||||||
|
pcre2_convert_context *ccontext = PRIV(memctl_malloc)(
|
||||||
|
sizeof(pcre2_real_convert_context), (pcre2_memctl *)gcontext);
|
||||||
|
if (ccontext == NULL) return NULL;
|
||||||
|
*ccontext = PRIV(default_convert_context);
|
||||||
|
if (gcontext != NULL)
|
||||||
|
*((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
|
||||||
|
return ccontext;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Context copy functions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_general_context_copy(pcre2_general_context *gcontext)
|
||||||
|
{
|
||||||
|
pcre2_general_context *newcontext =
|
||||||
|
gcontext->memctl.malloc(sizeof(pcre2_real_general_context),
|
||||||
|
gcontext->memctl.memory_data);
|
||||||
|
if (newcontext == NULL) return NULL;
|
||||||
|
memcpy(newcontext, gcontext, sizeof(pcre2_real_general_context));
|
||||||
|
return newcontext;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_compile_context_copy(pcre2_compile_context *ccontext)
|
||||||
|
{
|
||||||
|
pcre2_compile_context *newcontext =
|
||||||
|
ccontext->memctl.malloc(sizeof(pcre2_real_compile_context),
|
||||||
|
ccontext->memctl.memory_data);
|
||||||
|
if (newcontext == NULL) return NULL;
|
||||||
|
memcpy(newcontext, ccontext, sizeof(pcre2_real_compile_context));
|
||||||
|
return newcontext;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_match_context_copy(pcre2_match_context *mcontext)
|
||||||
|
{
|
||||||
|
pcre2_match_context *newcontext =
|
||||||
|
mcontext->memctl.malloc(sizeof(pcre2_real_match_context),
|
||||||
|
mcontext->memctl.memory_data);
|
||||||
|
if (newcontext == NULL) return NULL;
|
||||||
|
memcpy(newcontext, mcontext, sizeof(pcre2_real_match_context));
|
||||||
|
return newcontext;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_convert_context_copy(pcre2_convert_context *ccontext)
|
||||||
|
{
|
||||||
|
pcre2_convert_context *newcontext =
|
||||||
|
ccontext->memctl.malloc(sizeof(pcre2_real_convert_context),
|
||||||
|
ccontext->memctl.memory_data);
|
||||||
|
if (newcontext == NULL) return NULL;
|
||||||
|
memcpy(newcontext, ccontext, sizeof(pcre2_real_convert_context));
|
||||||
|
return newcontext;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Context free functions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_general_context_free(pcre2_general_context *gcontext)
|
||||||
|
{
|
||||||
|
if (gcontext != NULL)
|
||||||
|
gcontext->memctl.free(gcontext, gcontext->memctl.memory_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_compile_context_free(pcre2_compile_context *ccontext)
|
||||||
|
{
|
||||||
|
if (ccontext != NULL)
|
||||||
|
ccontext->memctl.free(ccontext, ccontext->memctl.memory_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_match_context_free(pcre2_match_context *mcontext)
|
||||||
|
{
|
||||||
|
if (mcontext != NULL)
|
||||||
|
mcontext->memctl.free(mcontext, mcontext->memctl.memory_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_convert_context_free(pcre2_convert_context *ccontext)
|
||||||
|
{
|
||||||
|
if (ccontext != NULL)
|
||||||
|
ccontext->memctl.free(ccontext, ccontext->memctl.memory_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Set values in contexts *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* All these functions return 0 for success or PCRE2_ERROR_BADDATA if invalid
|
||||||
|
data is given. Only some of the functions are able to test the validity of the
|
||||||
|
data. */
|
||||||
|
|
||||||
|
|
||||||
|
/* ------------ Compile context ------------ */
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_character_tables(pcre2_compile_context *ccontext,
|
||||||
|
const uint8_t *tables)
|
||||||
|
{
|
||||||
|
ccontext->tables = tables;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_bsr(pcre2_compile_context *ccontext, uint32_t value)
|
||||||
|
{
|
||||||
|
switch(value)
|
||||||
|
{
|
||||||
|
case PCRE2_BSR_ANYCRLF:
|
||||||
|
case PCRE2_BSR_UNICODE:
|
||||||
|
ccontext->bsr_convention = value;
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return PCRE2_ERROR_BADDATA;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, PCRE2_SIZE length)
|
||||||
|
{
|
||||||
|
ccontext->max_pattern_length = length;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_max_pattern_compiled_length(pcre2_compile_context *ccontext, PCRE2_SIZE length)
|
||||||
|
{
|
||||||
|
ccontext->max_pattern_compiled_length = length;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline)
|
||||||
|
{
|
||||||
|
switch(newline)
|
||||||
|
{
|
||||||
|
case PCRE2_NEWLINE_CR:
|
||||||
|
case PCRE2_NEWLINE_LF:
|
||||||
|
case PCRE2_NEWLINE_CRLF:
|
||||||
|
case PCRE2_NEWLINE_ANY:
|
||||||
|
case PCRE2_NEWLINE_ANYCRLF:
|
||||||
|
case PCRE2_NEWLINE_NUL:
|
||||||
|
ccontext->newline_convention = newline;
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return PCRE2_ERROR_BADDATA;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_max_varlookbehind(pcre2_compile_context *ccontext, uint32_t limit)
|
||||||
|
{
|
||||||
|
ccontext->max_varlookbehind = limit;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, uint32_t limit)
|
||||||
|
{
|
||||||
|
ccontext->parens_nest_limit = limit;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, uint32_t options)
|
||||||
|
{
|
||||||
|
ccontext->extra_options = options;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
|
||||||
|
int (*guard)(uint32_t, void *), void *user_data)
|
||||||
|
{
|
||||||
|
ccontext->stack_guard = guard;
|
||||||
|
ccontext->stack_guard_data = user_data;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_optimize(pcre2_compile_context *ccontext, uint32_t directive)
|
||||||
|
{
|
||||||
|
if (ccontext == NULL)
|
||||||
|
return PCRE2_ERROR_NULL;
|
||||||
|
|
||||||
|
switch (directive)
|
||||||
|
{
|
||||||
|
case PCRE2_OPTIMIZATION_NONE:
|
||||||
|
ccontext->optimization_flags = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE2_OPTIMIZATION_FULL:
|
||||||
|
ccontext->optimization_flags = PCRE2_OPTIMIZATION_ALL;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
if (directive >= PCRE2_AUTO_POSSESS && directive <= PCRE2_START_OPTIMIZE_OFF)
|
||||||
|
{
|
||||||
|
/* Even directive numbers starting from 64 switch a bit on;
|
||||||
|
* Odd directive numbers starting from 65 switch a bit off */
|
||||||
|
if ((directive & 1) != 0)
|
||||||
|
ccontext->optimization_flags &= ~(1u << ((directive >> 1) - 32));
|
||||||
|
else
|
||||||
|
ccontext->optimization_flags |= 1u << ((directive >> 1) - 32);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return PCRE2_ERROR_BADOPTION;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ------------ Match context ------------ */
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_callout(pcre2_match_context *mcontext,
|
||||||
|
int (*callout)(pcre2_callout_block *, void *), void *callout_data)
|
||||||
|
{
|
||||||
|
mcontext->callout = callout;
|
||||||
|
mcontext->callout_data = callout_data;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_substitute_callout(pcre2_match_context *mcontext,
|
||||||
|
int (*substitute_callout)(pcre2_substitute_callout_block *, void *),
|
||||||
|
void *substitute_callout_data)
|
||||||
|
{
|
||||||
|
mcontext->substitute_callout = substitute_callout;
|
||||||
|
mcontext->substitute_callout_data = substitute_callout_data;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_substitute_case_callout(pcre2_match_context *mcontext,
|
||||||
|
PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *,
|
||||||
|
PCRE2_SIZE, int, void *),
|
||||||
|
void *substitute_case_callout_data)
|
||||||
|
{
|
||||||
|
mcontext->substitute_case_callout = substitute_case_callout;
|
||||||
|
mcontext->substitute_case_callout_data = substitute_case_callout_data;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||||
|
{
|
||||||
|
mcontext->heap_limit = limit;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||||
|
{
|
||||||
|
mcontext->match_limit = limit;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_depth_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||||
|
{
|
||||||
|
mcontext->depth_limit = limit;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE limit)
|
||||||
|
{
|
||||||
|
mcontext->offset_limit = limit;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* These functions became obsolete at release 10.30. The first is kept as a
|
||||||
|
synonym for backwards compatibility. The second now does nothing. Exclude both
|
||||||
|
from coverage reports. */
|
||||||
|
|
||||||
|
/* LCOV_EXCL_START */
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||||
|
{
|
||||||
|
return pcre2_set_depth_limit(mcontext, limit);
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
|
||||||
|
void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *),
|
||||||
|
void *mydata)
|
||||||
|
{
|
||||||
|
(void)mcontext;
|
||||||
|
(void)mymalloc;
|
||||||
|
(void)myfree;
|
||||||
|
(void)mydata;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* LCOV_EXCL_STOP */
|
||||||
|
|
||||||
|
|
||||||
|
/* ------------ Convert context ------------ */
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_glob_separator(pcre2_convert_context *ccontext, uint32_t separator)
|
||||||
|
{
|
||||||
|
if (separator != CHAR_SLASH && separator != CHAR_BACKSLASH &&
|
||||||
|
separator != CHAR_DOT) return PCRE2_ERROR_BADDATA;
|
||||||
|
ccontext->glob_separator = separator;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_glob_escape(pcre2_convert_context *ccontext, uint32_t escape)
|
||||||
|
{
|
||||||
|
if (escape > 255 || (escape != 0 && !ispunct(escape)))
|
||||||
|
return PCRE2_ERROR_BADDATA;
|
||||||
|
ccontext->glob_escape = escape;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre2_context.c */
|
||||||
|
|
||||||
1191
3rd/pcre2/src/pcre2_convert.c
Normal file
1191
3rd/pcre2/src/pcre2_convert.c
Normal file
@@ -0,0 +1,1191 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "pcre2_internal.h"
|
||||||
|
|
||||||
|
#define TYPE_OPTIONS (PCRE2_CONVERT_GLOB| \
|
||||||
|
PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED)
|
||||||
|
|
||||||
|
#define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \
|
||||||
|
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \
|
||||||
|
PCRE2_CONVERT_GLOB_NO_STARSTAR| \
|
||||||
|
TYPE_OPTIONS)
|
||||||
|
|
||||||
|
#define DUMMY_BUFFER_SIZE 100
|
||||||
|
|
||||||
|
/* Generated pattern fragments */
|
||||||
|
|
||||||
|
#define STR_BACKSLASH_A STR_BACKSLASH STR_A
|
||||||
|
#define STR_BACKSLASH_z STR_BACKSLASH STR_z
|
||||||
|
#define STR_COLON_RIGHT_SQUARE_BRACKET STR_COLON STR_RIGHT_SQUARE_BRACKET
|
||||||
|
#define STR_DOT_STAR_LOOKBEHIND STR_DOT STR_ASTERISK STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_LESS_THAN_SIGN STR_EQUALS_SIGN
|
||||||
|
#define STR_LOOKAHEAD_NOT_DOT STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_EXCLAMATION_MARK STR_BACKSLASH STR_DOT STR_RIGHT_PARENTHESIS
|
||||||
|
#define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS
|
||||||
|
#define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
|
||||||
|
|
||||||
|
/* States for POSIX processing */
|
||||||
|
|
||||||
|
enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET,
|
||||||
|
POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED };
|
||||||
|
|
||||||
|
/* Macro to add a character string to the output buffer, checking for overflow. */
|
||||||
|
|
||||||
|
#define PUTCHARS(string) \
|
||||||
|
{ \
|
||||||
|
for (const char *s = string; *s != 0; s++) \
|
||||||
|
{ \
|
||||||
|
if (p >= endp) return PCRE2_ERROR_NOMEMORY; \
|
||||||
|
*p++ = *s; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Literals that must be escaped: \ ? * + | . ^ $ { } [ ] ( ) */
|
||||||
|
|
||||||
|
static const char *pcre2_escaped_literals =
|
||||||
|
STR_BACKSLASH STR_QUESTION_MARK STR_ASTERISK STR_PLUS
|
||||||
|
STR_VERTICAL_LINE STR_DOT STR_CIRCUMFLEX_ACCENT STR_DOLLAR_SIGN
|
||||||
|
STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
|
||||||
|
STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
|
||||||
|
STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS;
|
||||||
|
|
||||||
|
/* Recognized escaped metacharacters in POSIX basic patterns. */
|
||||||
|
|
||||||
|
static const char *posix_meta_escapes =
|
||||||
|
STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS
|
||||||
|
STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
|
||||||
|
STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Convert a POSIX pattern *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function handles both basic and extended POSIX patterns.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
pattype the pattern type
|
||||||
|
pattern the pattern
|
||||||
|
plength length in code units
|
||||||
|
utf TRUE if UTF
|
||||||
|
use_buffer where to put the output
|
||||||
|
use_length length of use_buffer
|
||||||
|
bufflenptr where to put the used length
|
||||||
|
dummyrun TRUE if a dummy run
|
||||||
|
ccontext the convert context
|
||||||
|
|
||||||
|
Returns: 0 => success
|
||||||
|
!0 => error code
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int
|
||||||
|
convert_posix(uint32_t pattype, PCRE2_SPTR pattern, PCRE2_SIZE plength,
|
||||||
|
BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
|
||||||
|
PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
|
||||||
|
{
|
||||||
|
PCRE2_SPTR posix = pattern;
|
||||||
|
PCRE2_UCHAR *p = use_buffer;
|
||||||
|
PCRE2_UCHAR *pp = p;
|
||||||
|
PCRE2_UCHAR *endp = p + use_length - 1; /* Allow for trailing zero */
|
||||||
|
PCRE2_SIZE convlength = 0;
|
||||||
|
|
||||||
|
uint32_t bracount = 0;
|
||||||
|
uint32_t posix_state = POSIX_START_REGEX;
|
||||||
|
uint32_t lastspecial = 0;
|
||||||
|
BOOL extended = (pattype & PCRE2_CONVERT_POSIX_EXTENDED) != 0;
|
||||||
|
BOOL nextisliteral = FALSE;
|
||||||
|
|
||||||
|
(void)utf; /* Not used when Unicode not supported */
|
||||||
|
(void)ccontext; /* Not currently used */
|
||||||
|
|
||||||
|
/* Initialize default for error offset as end of input. */
|
||||||
|
|
||||||
|
*bufflenptr = plength;
|
||||||
|
PUTCHARS(STR_STAR_NUL);
|
||||||
|
|
||||||
|
/* Now scan the input. */
|
||||||
|
|
||||||
|
while (plength > 0)
|
||||||
|
{
|
||||||
|
uint32_t c, sc;
|
||||||
|
int clength = 1;
|
||||||
|
|
||||||
|
/* Add in the length of the last item, then, if in the dummy run, pull the
|
||||||
|
pointer back to the start of the (temporary) buffer and then remember the
|
||||||
|
start of the next item. */
|
||||||
|
|
||||||
|
convlength += p - pp;
|
||||||
|
if (dummyrun) p = use_buffer;
|
||||||
|
pp = p;
|
||||||
|
|
||||||
|
/* Pick up the next character */
|
||||||
|
|
||||||
|
#ifndef SUPPORT_UNICODE
|
||||||
|
c = *posix;
|
||||||
|
#else
|
||||||
|
GETCHARLENTEST(c, posix, clength);
|
||||||
|
#endif
|
||||||
|
posix += clength;
|
||||||
|
plength -= clength;
|
||||||
|
|
||||||
|
sc = nextisliteral? 0 : c;
|
||||||
|
nextisliteral = FALSE;
|
||||||
|
|
||||||
|
/* Handle a character within a class. */
|
||||||
|
|
||||||
|
if (posix_state >= POSIX_CLASS_NOT_STARTED)
|
||||||
|
{
|
||||||
|
if (c == CHAR_RIGHT_SQUARE_BRACKET)
|
||||||
|
{
|
||||||
|
PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
|
||||||
|
posix_state = POSIX_NOT_BRACKET;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Not the end of the class */
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
switch (posix_state)
|
||||||
|
{
|
||||||
|
case POSIX_CLASS_STARTED:
|
||||||
|
if (c <= 127 && islower(c)) break; /* Remain in started state */
|
||||||
|
posix_state = POSIX_CLASS_NOT_STARTED;
|
||||||
|
if (c == CHAR_COLON && plength > 0 &&
|
||||||
|
*posix == CHAR_RIGHT_SQUARE_BRACKET)
|
||||||
|
{
|
||||||
|
PUTCHARS(STR_COLON_RIGHT_SQUARE_BRACKET);
|
||||||
|
plength--;
|
||||||
|
posix++;
|
||||||
|
continue; /* With next character after :] */
|
||||||
|
}
|
||||||
|
/* Fall through */
|
||||||
|
|
||||||
|
case POSIX_CLASS_NOT_STARTED:
|
||||||
|
if (c == CHAR_LEFT_SQUARE_BRACKET)
|
||||||
|
posix_state = POSIX_CLASS_STARTING;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case POSIX_CLASS_STARTING:
|
||||||
|
if (c == CHAR_COLON) posix_state = POSIX_CLASS_STARTED;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c == CHAR_BACKSLASH) PUTCHARS(STR_BACKSLASH);
|
||||||
|
if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
|
||||||
|
memcpy(p, posix - clength, CU2BYTES(clength));
|
||||||
|
p += clength;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Handle a character not within a class. */
|
||||||
|
|
||||||
|
else switch(sc)
|
||||||
|
{
|
||||||
|
case CHAR_LEFT_SQUARE_BRACKET:
|
||||||
|
PUTCHARS(STR_LEFT_SQUARE_BRACKET);
|
||||||
|
|
||||||
|
#ifdef NEVER
|
||||||
|
/* We could handle special cases [[:<:]] and [[:>:]] (which PCRE does
|
||||||
|
support) but they are not part of POSIX 1003.1. */
|
||||||
|
|
||||||
|
if (plength >= 6)
|
||||||
|
{
|
||||||
|
if (posix[0] == CHAR_LEFT_SQUARE_BRACKET &&
|
||||||
|
posix[1] == CHAR_COLON &&
|
||||||
|
(posix[2] == CHAR_LESS_THAN_SIGN ||
|
||||||
|
posix[2] == CHAR_GREATER_THAN_SIGN) &&
|
||||||
|
posix[3] == CHAR_COLON &&
|
||||||
|
posix[4] == CHAR_RIGHT_SQUARE_BRACKET &&
|
||||||
|
posix[5] == CHAR_RIGHT_SQUARE_BRACKET)
|
||||||
|
{
|
||||||
|
if (p + 6 > endp) return PCRE2_ERROR_NOMEMORY;
|
||||||
|
memcpy(p, posix, CU2BYTES(6));
|
||||||
|
p += 6;
|
||||||
|
posix += 6;
|
||||||
|
plength -= 6;
|
||||||
|
continue; /* With next character */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Handle start of "normal" character classes */
|
||||||
|
|
||||||
|
posix_state = POSIX_CLASS_NOT_STARTED;
|
||||||
|
|
||||||
|
/* Handle ^ and ] as first characters */
|
||||||
|
|
||||||
|
if (plength > 0)
|
||||||
|
{
|
||||||
|
if (*posix == CHAR_CIRCUMFLEX_ACCENT)
|
||||||
|
{
|
||||||
|
posix++;
|
||||||
|
plength--;
|
||||||
|
PUTCHARS(STR_CIRCUMFLEX_ACCENT);
|
||||||
|
}
|
||||||
|
if (plength > 0 && *posix == CHAR_RIGHT_SQUARE_BRACKET)
|
||||||
|
{
|
||||||
|
posix++;
|
||||||
|
plength--;
|
||||||
|
PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case CHAR_BACKSLASH:
|
||||||
|
if (plength == 0) return PCRE2_ERROR_END_BACKSLASH;
|
||||||
|
if (extended) nextisliteral = TRUE; else
|
||||||
|
{
|
||||||
|
if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL)
|
||||||
|
{
|
||||||
|
if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
|
||||||
|
if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
|
||||||
|
lastspecial = *p++ = *posix++;
|
||||||
|
plength--;
|
||||||
|
}
|
||||||
|
else nextisliteral = TRUE;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case CHAR_RIGHT_PARENTHESIS:
|
||||||
|
if (!extended || bracount == 0) goto ESCAPE_LITERAL;
|
||||||
|
bracount--;
|
||||||
|
goto COPY_SPECIAL;
|
||||||
|
|
||||||
|
case CHAR_LEFT_PARENTHESIS:
|
||||||
|
bracount++;
|
||||||
|
/* Fall through */
|
||||||
|
|
||||||
|
case CHAR_QUESTION_MARK:
|
||||||
|
case CHAR_PLUS:
|
||||||
|
case CHAR_LEFT_CURLY_BRACKET:
|
||||||
|
case CHAR_RIGHT_CURLY_BRACKET:
|
||||||
|
case CHAR_VERTICAL_LINE:
|
||||||
|
if (!extended) goto ESCAPE_LITERAL;
|
||||||
|
/* Fall through */
|
||||||
|
|
||||||
|
case CHAR_DOT:
|
||||||
|
case CHAR_DOLLAR_SIGN:
|
||||||
|
posix_state = POSIX_NOT_BRACKET;
|
||||||
|
COPY_SPECIAL:
|
||||||
|
lastspecial = c;
|
||||||
|
if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
|
||||||
|
*p++ = c;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case CHAR_ASTERISK:
|
||||||
|
if (lastspecial != CHAR_ASTERISK)
|
||||||
|
{
|
||||||
|
if (!extended && (posix_state < POSIX_NOT_BRACKET ||
|
||||||
|
lastspecial == CHAR_LEFT_PARENTHESIS))
|
||||||
|
goto ESCAPE_LITERAL;
|
||||||
|
goto COPY_SPECIAL;
|
||||||
|
}
|
||||||
|
break; /* Ignore second and subsequent asterisks */
|
||||||
|
|
||||||
|
case CHAR_CIRCUMFLEX_ACCENT:
|
||||||
|
if (extended) goto COPY_SPECIAL;
|
||||||
|
if (posix_state == POSIX_START_REGEX ||
|
||||||
|
lastspecial == CHAR_LEFT_PARENTHESIS)
|
||||||
|
{
|
||||||
|
posix_state = POSIX_ANCHORED;
|
||||||
|
goto COPY_SPECIAL;
|
||||||
|
}
|
||||||
|
/* Fall through */
|
||||||
|
|
||||||
|
default:
|
||||||
|
if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
|
||||||
|
{
|
||||||
|
ESCAPE_LITERAL:
|
||||||
|
PUTCHARS(STR_BACKSLASH);
|
||||||
|
}
|
||||||
|
lastspecial = 0xff; /* Indicates nothing special */
|
||||||
|
if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
|
||||||
|
memcpy(p, posix - clength, CU2BYTES(clength));
|
||||||
|
p += clength;
|
||||||
|
posix_state = POSIX_NOT_BRACKET;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (posix_state >= POSIX_CLASS_NOT_STARTED)
|
||||||
|
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
|
||||||
|
convlength += p - pp; /* Final segment */
|
||||||
|
*bufflenptr = convlength;
|
||||||
|
*p++ = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Convert a glob pattern *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* Context for writing the output into a buffer. */
|
||||||
|
|
||||||
|
typedef struct pcre2_output_context {
|
||||||
|
PCRE2_UCHAR *output; /* current output position */
|
||||||
|
PCRE2_SPTR output_end; /* output end */
|
||||||
|
PCRE2_SIZE output_size; /* size of the output */
|
||||||
|
uint8_t out_str[8]; /* string copied to the output */
|
||||||
|
} pcre2_output_context;
|
||||||
|
|
||||||
|
|
||||||
|
/* Write a character into the output.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
out output context
|
||||||
|
chr the next character
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void
|
||||||
|
convert_glob_write(pcre2_output_context *out, PCRE2_UCHAR chr)
|
||||||
|
{
|
||||||
|
out->output_size++;
|
||||||
|
|
||||||
|
if (out->output < out->output_end)
|
||||||
|
*out->output++ = chr;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Write a string into the output.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
out output context
|
||||||
|
length length of out->out_str
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void
|
||||||
|
convert_glob_write_str(pcre2_output_context *out, PCRE2_SIZE length)
|
||||||
|
{
|
||||||
|
uint8_t *out_str = out->out_str;
|
||||||
|
PCRE2_UCHAR *output = out->output;
|
||||||
|
PCRE2_SPTR output_end = out->output_end;
|
||||||
|
PCRE2_SIZE output_size = out->output_size;
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
output_size++;
|
||||||
|
|
||||||
|
if (output < output_end)
|
||||||
|
*output++ = *out_str++;
|
||||||
|
}
|
||||||
|
while (--length != 0);
|
||||||
|
|
||||||
|
out->output = output;
|
||||||
|
out->output_size = output_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Prints the separator into the output.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
out output context
|
||||||
|
separator glob separator
|
||||||
|
with_escape backslash is needed before separator
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void
|
||||||
|
convert_glob_print_separator(pcre2_output_context *out,
|
||||||
|
PCRE2_UCHAR separator, BOOL with_escape)
|
||||||
|
{
|
||||||
|
if (with_escape)
|
||||||
|
convert_glob_write(out, CHAR_BACKSLASH);
|
||||||
|
|
||||||
|
convert_glob_write(out, separator);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Prints a wildcard into the output.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
out output context
|
||||||
|
separator glob separator
|
||||||
|
with_escape backslash is needed before separator
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void
|
||||||
|
convert_glob_print_wildcard(pcre2_output_context *out,
|
||||||
|
PCRE2_UCHAR separator, BOOL with_escape)
|
||||||
|
{
|
||||||
|
out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
|
||||||
|
out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
|
||||||
|
convert_glob_write_str(out, 2);
|
||||||
|
|
||||||
|
convert_glob_print_separator(out, separator, with_escape);
|
||||||
|
|
||||||
|
convert_glob_write(out, CHAR_RIGHT_SQUARE_BRACKET);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Parse a posix class.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
from starting point of scanning the range
|
||||||
|
pattern_end end of pattern
|
||||||
|
out output context
|
||||||
|
|
||||||
|
Returns: >0 => class index
|
||||||
|
0 => malformed class
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int
|
||||||
|
convert_glob_parse_class(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
|
||||||
|
pcre2_output_context *out)
|
||||||
|
{
|
||||||
|
static const char *posix_classes = "alnum:alpha:ascii:blank:cntrl:digit:"
|
||||||
|
"graph:lower:print:punct:space:upper:word:xdigit:";
|
||||||
|
PCRE2_SPTR start = *from + 1;
|
||||||
|
PCRE2_SPTR pattern = start;
|
||||||
|
const char *class_ptr;
|
||||||
|
PCRE2_UCHAR c;
|
||||||
|
int class_index;
|
||||||
|
|
||||||
|
while (TRUE)
|
||||||
|
{
|
||||||
|
if (pattern >= pattern_end) return 0;
|
||||||
|
|
||||||
|
c = *pattern++;
|
||||||
|
|
||||||
|
if (c < CHAR_a || c > CHAR_z) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c != CHAR_COLON || pattern >= pattern_end ||
|
||||||
|
*pattern != CHAR_RIGHT_SQUARE_BRACKET)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
class_ptr = posix_classes;
|
||||||
|
class_index = 1;
|
||||||
|
|
||||||
|
while (TRUE)
|
||||||
|
{
|
||||||
|
if (*class_ptr == CHAR_NUL) return 0;
|
||||||
|
|
||||||
|
pattern = start;
|
||||||
|
|
||||||
|
while (*pattern == (PCRE2_UCHAR) *class_ptr)
|
||||||
|
{
|
||||||
|
if (*pattern == CHAR_COLON)
|
||||||
|
{
|
||||||
|
pattern += 2;
|
||||||
|
start -= 2;
|
||||||
|
|
||||||
|
do convert_glob_write(out, *start++); while (start < pattern);
|
||||||
|
|
||||||
|
*from = pattern;
|
||||||
|
return class_index;
|
||||||
|
}
|
||||||
|
pattern++;
|
||||||
|
class_ptr++;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (*class_ptr != CHAR_COLON) class_ptr++;
|
||||||
|
class_ptr++;
|
||||||
|
class_index++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Checks whether the character is in the class.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
class_index class index
|
||||||
|
c character
|
||||||
|
|
||||||
|
Returns: !0 => character is found in the class
|
||||||
|
0 => otherwise
|
||||||
|
*/
|
||||||
|
|
||||||
|
static BOOL
|
||||||
|
convert_glob_char_in_class(int class_index, PCRE2_UCHAR c)
|
||||||
|
{
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
|
if (c > 0xff)
|
||||||
|
{
|
||||||
|
/* ctype functions are not sane for c > 0xff */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
switch (class_index)
|
||||||
|
{
|
||||||
|
case 1: return isalnum(c);
|
||||||
|
case 2: return isalpha(c);
|
||||||
|
case 3: return 1;
|
||||||
|
case 4: return c == CHAR_HT || c == CHAR_SPACE;
|
||||||
|
case 5: return iscntrl(c);
|
||||||
|
case 6: return isdigit(c);
|
||||||
|
case 7: return isgraph(c);
|
||||||
|
case 8: return islower(c);
|
||||||
|
case 9: return isprint(c);
|
||||||
|
case 10: return ispunct(c);
|
||||||
|
case 11: return isspace(c);
|
||||||
|
case 12: return isupper(c);
|
||||||
|
case 13: return isalnum(c) || c == CHAR_UNDERSCORE;
|
||||||
|
default: return isxdigit(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parse a range of characters.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
from starting point of scanning the range
|
||||||
|
pattern_end end of pattern
|
||||||
|
out output context
|
||||||
|
separator glob separator
|
||||||
|
with_escape backslash is needed before separator
|
||||||
|
|
||||||
|
Returns: 0 => success
|
||||||
|
!0 => error code
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int
|
||||||
|
convert_glob_parse_range(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
|
||||||
|
pcre2_output_context *out, BOOL utf, PCRE2_UCHAR separator,
|
||||||
|
BOOL with_escape, PCRE2_UCHAR escape, BOOL no_wildsep)
|
||||||
|
{
|
||||||
|
BOOL is_negative = FALSE;
|
||||||
|
BOOL separator_seen = FALSE;
|
||||||
|
BOOL has_prev_c;
|
||||||
|
PCRE2_SPTR pattern = *from;
|
||||||
|
PCRE2_SPTR char_start = NULL;
|
||||||
|
uint32_t c, prev_c;
|
||||||
|
int len, class_index;
|
||||||
|
|
||||||
|
(void)utf; /* Avoid compiler warning. */
|
||||||
|
|
||||||
|
if (pattern >= pattern_end)
|
||||||
|
{
|
||||||
|
*from = pattern;
|
||||||
|
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*pattern == CHAR_EXCLAMATION_MARK
|
||||||
|
|| *pattern == CHAR_CIRCUMFLEX_ACCENT)
|
||||||
|
{
|
||||||
|
pattern++;
|
||||||
|
|
||||||
|
if (pattern >= pattern_end)
|
||||||
|
{
|
||||||
|
*from = pattern;
|
||||||
|
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
|
||||||
|
}
|
||||||
|
|
||||||
|
is_negative = TRUE;
|
||||||
|
|
||||||
|
out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
|
||||||
|
out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
|
||||||
|
len = 2;
|
||||||
|
|
||||||
|
if (!no_wildsep)
|
||||||
|
{
|
||||||
|
if (with_escape)
|
||||||
|
{
|
||||||
|
out->out_str[len] = CHAR_BACKSLASH;
|
||||||
|
len++;
|
||||||
|
}
|
||||||
|
out->out_str[len] = (uint8_t) separator;
|
||||||
|
}
|
||||||
|
|
||||||
|
convert_glob_write_str(out, len + 1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
convert_glob_write(out, CHAR_LEFT_SQUARE_BRACKET);
|
||||||
|
|
||||||
|
has_prev_c = FALSE;
|
||||||
|
prev_c = 0;
|
||||||
|
|
||||||
|
if (*pattern == CHAR_RIGHT_SQUARE_BRACKET)
|
||||||
|
{
|
||||||
|
out->out_str[0] = CHAR_BACKSLASH;
|
||||||
|
out->out_str[1] = CHAR_RIGHT_SQUARE_BRACKET;
|
||||||
|
convert_glob_write_str(out, 2);
|
||||||
|
has_prev_c = TRUE;
|
||||||
|
prev_c = CHAR_RIGHT_SQUARE_BRACKET;
|
||||||
|
pattern++;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (pattern < pattern_end)
|
||||||
|
{
|
||||||
|
char_start = pattern;
|
||||||
|
GETCHARINCTEST(c, pattern);
|
||||||
|
|
||||||
|
if (c == CHAR_RIGHT_SQUARE_BRACKET)
|
||||||
|
{
|
||||||
|
convert_glob_write(out, c);
|
||||||
|
|
||||||
|
if (!is_negative && !no_wildsep && separator_seen)
|
||||||
|
{
|
||||||
|
out->out_str[0] = CHAR_LEFT_PARENTHESIS;
|
||||||
|
out->out_str[1] = CHAR_QUESTION_MARK;
|
||||||
|
out->out_str[2] = CHAR_LESS_THAN_SIGN;
|
||||||
|
out->out_str[3] = CHAR_EXCLAMATION_MARK;
|
||||||
|
convert_glob_write_str(out, 4);
|
||||||
|
|
||||||
|
convert_glob_print_separator(out, separator, with_escape);
|
||||||
|
convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
|
||||||
|
}
|
||||||
|
|
||||||
|
*from = pattern;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pattern >= pattern_end) break;
|
||||||
|
|
||||||
|
if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
|
||||||
|
{
|
||||||
|
*from = pattern;
|
||||||
|
class_index = convert_glob_parse_class(from, pattern_end, out);
|
||||||
|
|
||||||
|
if (class_index != 0)
|
||||||
|
{
|
||||||
|
pattern = *from;
|
||||||
|
|
||||||
|
has_prev_c = FALSE;
|
||||||
|
prev_c = 0;
|
||||||
|
|
||||||
|
if (!is_negative &&
|
||||||
|
convert_glob_char_in_class (class_index, separator))
|
||||||
|
separator_seen = TRUE;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (c == CHAR_MINUS && has_prev_c &&
|
||||||
|
*pattern != CHAR_RIGHT_SQUARE_BRACKET)
|
||||||
|
{
|
||||||
|
convert_glob_write(out, CHAR_MINUS);
|
||||||
|
|
||||||
|
char_start = pattern;
|
||||||
|
GETCHARINCTEST(c, pattern);
|
||||||
|
|
||||||
|
if (pattern >= pattern_end) break;
|
||||||
|
|
||||||
|
if (escape != 0 && c == escape)
|
||||||
|
{
|
||||||
|
char_start = pattern;
|
||||||
|
GETCHARINCTEST(c, pattern);
|
||||||
|
}
|
||||||
|
else if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
|
||||||
|
{
|
||||||
|
*from = pattern;
|
||||||
|
return PCRE2_ERROR_CONVERT_SYNTAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (prev_c > c)
|
||||||
|
{
|
||||||
|
*from = pattern;
|
||||||
|
return PCRE2_ERROR_CONVERT_SYNTAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (prev_c < separator && separator < c) separator_seen = TRUE;
|
||||||
|
|
||||||
|
has_prev_c = FALSE;
|
||||||
|
prev_c = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (escape != 0 && c == escape)
|
||||||
|
{
|
||||||
|
char_start = pattern;
|
||||||
|
GETCHARINCTEST(c, pattern);
|
||||||
|
|
||||||
|
if (pattern >= pattern_end) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
has_prev_c = TRUE;
|
||||||
|
prev_c = c;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c == CHAR_LEFT_SQUARE_BRACKET || c == CHAR_RIGHT_SQUARE_BRACKET ||
|
||||||
|
c == CHAR_BACKSLASH || c == CHAR_MINUS)
|
||||||
|
convert_glob_write(out, CHAR_BACKSLASH);
|
||||||
|
|
||||||
|
if (c == separator) separator_seen = TRUE;
|
||||||
|
|
||||||
|
do convert_glob_write(out, *char_start++); while (char_start < pattern);
|
||||||
|
}
|
||||||
|
|
||||||
|
*from = pattern;
|
||||||
|
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Prints a (*COMMIT) into the output.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
out output context
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void
|
||||||
|
convert_glob_print_commit(pcre2_output_context *out)
|
||||||
|
{
|
||||||
|
out->out_str[0] = CHAR_LEFT_PARENTHESIS;
|
||||||
|
out->out_str[1] = CHAR_ASTERISK;
|
||||||
|
out->out_str[2] = CHAR_C;
|
||||||
|
out->out_str[3] = CHAR_O;
|
||||||
|
out->out_str[4] = CHAR_M;
|
||||||
|
out->out_str[5] = CHAR_M;
|
||||||
|
out->out_str[6] = CHAR_I;
|
||||||
|
out->out_str[7] = CHAR_T;
|
||||||
|
convert_glob_write_str(out, 8);
|
||||||
|
convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Bash glob converter.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
pattype the pattern type
|
||||||
|
pattern the pattern
|
||||||
|
plength length in code units
|
||||||
|
utf TRUE if UTF
|
||||||
|
use_buffer where to put the output
|
||||||
|
use_length length of use_buffer
|
||||||
|
bufflenptr where to put the used length
|
||||||
|
dummyrun TRUE if a dummy run
|
||||||
|
ccontext the convert context
|
||||||
|
|
||||||
|
Returns: 0 => success
|
||||||
|
!0 => error code
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int
|
||||||
|
convert_glob(uint32_t options, PCRE2_SPTR pattern, PCRE2_SIZE plength,
|
||||||
|
BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
|
||||||
|
PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
|
||||||
|
{
|
||||||
|
pcre2_output_context out;
|
||||||
|
PCRE2_SPTR pattern_start = pattern;
|
||||||
|
PCRE2_SPTR pattern_end = pattern + plength;
|
||||||
|
PCRE2_UCHAR separator = ccontext->glob_separator;
|
||||||
|
PCRE2_UCHAR escape = ccontext->glob_escape;
|
||||||
|
PCRE2_UCHAR c;
|
||||||
|
BOOL no_wildsep = (options & PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR) != 0;
|
||||||
|
BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0;
|
||||||
|
BOOL in_atomic = FALSE;
|
||||||
|
BOOL after_starstar = FALSE;
|
||||||
|
BOOL no_slash_z = FALSE;
|
||||||
|
BOOL with_escape, is_start, after_separator;
|
||||||
|
int result = 0;
|
||||||
|
|
||||||
|
(void)utf; /* Avoid compiler warning. */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf && (separator >= 128 || escape >= 128))
|
||||||
|
{
|
||||||
|
/* Currently only ASCII characters are supported. */
|
||||||
|
*bufflenptr = 0;
|
||||||
|
return PCRE2_ERROR_CONVERT_SYNTAX;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
with_escape = strchr(pcre2_escaped_literals, separator) != NULL;
|
||||||
|
|
||||||
|
/* Initialize default for error offset as end of input. */
|
||||||
|
out.output = use_buffer;
|
||||||
|
out.output_end = use_buffer + use_length;
|
||||||
|
out.output_size = 0;
|
||||||
|
|
||||||
|
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
|
||||||
|
out.out_str[1] = CHAR_QUESTION_MARK;
|
||||||
|
out.out_str[2] = CHAR_s;
|
||||||
|
out.out_str[3] = CHAR_RIGHT_PARENTHESIS;
|
||||||
|
convert_glob_write_str(&out, 4);
|
||||||
|
|
||||||
|
is_start = TRUE;
|
||||||
|
|
||||||
|
if (pattern < pattern_end && pattern[0] == CHAR_ASTERISK)
|
||||||
|
{
|
||||||
|
if (no_wildsep)
|
||||||
|
is_start = FALSE;
|
||||||
|
else if (!no_starstar && pattern + 1 < pattern_end &&
|
||||||
|
pattern[1] == CHAR_ASTERISK)
|
||||||
|
is_start = FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_start)
|
||||||
|
{
|
||||||
|
out.out_str[0] = CHAR_BACKSLASH;
|
||||||
|
out.out_str[1] = CHAR_A;
|
||||||
|
convert_glob_write_str(&out, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (pattern < pattern_end)
|
||||||
|
{
|
||||||
|
c = *pattern++;
|
||||||
|
|
||||||
|
if (c == CHAR_ASTERISK)
|
||||||
|
{
|
||||||
|
is_start = pattern == pattern_start + 1;
|
||||||
|
|
||||||
|
if (in_atomic)
|
||||||
|
{
|
||||||
|
convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
|
||||||
|
in_atomic = FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!no_starstar && pattern < pattern_end && *pattern == CHAR_ASTERISK)
|
||||||
|
{
|
||||||
|
after_separator = is_start || (pattern[-2] == separator);
|
||||||
|
|
||||||
|
do pattern++; while (pattern < pattern_end &&
|
||||||
|
*pattern == CHAR_ASTERISK);
|
||||||
|
|
||||||
|
if (pattern >= pattern_end)
|
||||||
|
{
|
||||||
|
no_slash_z = TRUE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
after_starstar = TRUE;
|
||||||
|
|
||||||
|
if (after_separator && escape != 0 && *pattern == escape &&
|
||||||
|
pattern + 1 < pattern_end && pattern[1] == separator)
|
||||||
|
pattern++;
|
||||||
|
|
||||||
|
if (is_start)
|
||||||
|
{
|
||||||
|
if (*pattern != separator) continue;
|
||||||
|
|
||||||
|
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
|
||||||
|
out.out_str[1] = CHAR_QUESTION_MARK;
|
||||||
|
out.out_str[2] = CHAR_COLON;
|
||||||
|
out.out_str[3] = CHAR_BACKSLASH;
|
||||||
|
out.out_str[4] = CHAR_A;
|
||||||
|
out.out_str[5] = CHAR_VERTICAL_LINE;
|
||||||
|
convert_glob_write_str(&out, 6);
|
||||||
|
|
||||||
|
convert_glob_print_separator(&out, separator, with_escape);
|
||||||
|
convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
|
||||||
|
|
||||||
|
pattern++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
convert_glob_print_commit(&out);
|
||||||
|
|
||||||
|
if (!after_separator || *pattern != separator)
|
||||||
|
{
|
||||||
|
out.out_str[0] = CHAR_DOT;
|
||||||
|
out.out_str[1] = CHAR_ASTERISK;
|
||||||
|
out.out_str[2] = CHAR_QUESTION_MARK;
|
||||||
|
convert_glob_write_str(&out, 3);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
|
||||||
|
out.out_str[1] = CHAR_QUESTION_MARK;
|
||||||
|
out.out_str[2] = CHAR_COLON;
|
||||||
|
out.out_str[3] = CHAR_DOT;
|
||||||
|
out.out_str[4] = CHAR_ASTERISK;
|
||||||
|
out.out_str[5] = CHAR_QUESTION_MARK;
|
||||||
|
|
||||||
|
convert_glob_write_str(&out, 6);
|
||||||
|
|
||||||
|
convert_glob_print_separator(&out, separator, with_escape);
|
||||||
|
|
||||||
|
out.out_str[0] = CHAR_RIGHT_PARENTHESIS;
|
||||||
|
out.out_str[1] = CHAR_QUESTION_MARK;
|
||||||
|
out.out_str[2] = CHAR_QUESTION_MARK;
|
||||||
|
convert_glob_write_str(&out, 3);
|
||||||
|
|
||||||
|
pattern++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pattern < pattern_end && *pattern == CHAR_ASTERISK)
|
||||||
|
{
|
||||||
|
do pattern++; while (pattern < pattern_end &&
|
||||||
|
*pattern == CHAR_ASTERISK);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (no_wildsep)
|
||||||
|
{
|
||||||
|
if (pattern >= pattern_end)
|
||||||
|
{
|
||||||
|
no_slash_z = TRUE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Start check must be after the end check. */
|
||||||
|
if (is_start) continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is_start)
|
||||||
|
{
|
||||||
|
if (after_starstar)
|
||||||
|
{
|
||||||
|
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
|
||||||
|
out.out_str[1] = CHAR_QUESTION_MARK;
|
||||||
|
out.out_str[2] = CHAR_GREATER_THAN_SIGN;
|
||||||
|
convert_glob_write_str(&out, 3);
|
||||||
|
in_atomic = TRUE;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
convert_glob_print_commit(&out);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (no_wildsep)
|
||||||
|
convert_glob_write(&out, CHAR_DOT);
|
||||||
|
else
|
||||||
|
convert_glob_print_wildcard(&out, separator, with_escape);
|
||||||
|
|
||||||
|
out.out_str[0] = CHAR_ASTERISK;
|
||||||
|
out.out_str[1] = CHAR_QUESTION_MARK;
|
||||||
|
if (pattern >= pattern_end)
|
||||||
|
out.out_str[1] = CHAR_PLUS;
|
||||||
|
convert_glob_write_str(&out, 2);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c == CHAR_QUESTION_MARK)
|
||||||
|
{
|
||||||
|
if (no_wildsep)
|
||||||
|
convert_glob_write(&out, CHAR_DOT);
|
||||||
|
else
|
||||||
|
convert_glob_print_wildcard(&out, separator, with_escape);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c == CHAR_LEFT_SQUARE_BRACKET)
|
||||||
|
{
|
||||||
|
result = convert_glob_parse_range(&pattern, pattern_end,
|
||||||
|
&out, utf, separator, with_escape, escape, no_wildsep);
|
||||||
|
if (result != 0) break;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (escape != 0 && c == escape)
|
||||||
|
{
|
||||||
|
if (pattern >= pattern_end)
|
||||||
|
{
|
||||||
|
result = PCRE2_ERROR_CONVERT_SYNTAX;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
c = *pattern++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
|
||||||
|
convert_glob_write(&out, CHAR_BACKSLASH);
|
||||||
|
|
||||||
|
convert_glob_write(&out, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result == 0)
|
||||||
|
{
|
||||||
|
if (!no_slash_z)
|
||||||
|
{
|
||||||
|
out.out_str[0] = CHAR_BACKSLASH;
|
||||||
|
out.out_str[1] = CHAR_z;
|
||||||
|
convert_glob_write_str(&out, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in_atomic)
|
||||||
|
convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
|
||||||
|
|
||||||
|
convert_glob_write(&out, CHAR_NUL);
|
||||||
|
|
||||||
|
if (!dummyrun && out.output_size != (PCRE2_SIZE) (out.output - use_buffer))
|
||||||
|
result = PCRE2_ERROR_NOMEMORY;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result != 0)
|
||||||
|
{
|
||||||
|
*bufflenptr = pattern - pattern_start;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
*bufflenptr = out.output_size - 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Convert pattern *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This is the external-facing function for converting other forms of pattern
|
||||||
|
into PCRE2 regular expression patterns. On error, the bufflenptr argument is
|
||||||
|
used to return an offset in the original pattern.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
pattern the input pattern
|
||||||
|
plength length of input, or PCRE2_ZERO_TERMINATED
|
||||||
|
options options bits
|
||||||
|
buffptr pointer to pointer to output buffer
|
||||||
|
bufflenptr pointer to length of output buffer
|
||||||
|
ccontext convert context or NULL
|
||||||
|
|
||||||
|
Returns: 0 for success, else an error code (+ve or -ve)
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE plength, uint32_t options,
|
||||||
|
PCRE2_UCHAR **buffptr, PCRE2_SIZE *bufflenptr,
|
||||||
|
pcre2_convert_context *ccontext)
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
PCRE2_UCHAR dummy_buffer[DUMMY_BUFFER_SIZE];
|
||||||
|
PCRE2_UCHAR *use_buffer = dummy_buffer;
|
||||||
|
PCRE2_SIZE use_length = DUMMY_BUFFER_SIZE;
|
||||||
|
BOOL utf = (options & PCRE2_CONVERT_UTF) != 0;
|
||||||
|
uint32_t pattype = options & TYPE_OPTIONS;
|
||||||
|
|
||||||
|
if (pattern == NULL || bufflenptr == NULL) return PCRE2_ERROR_NULL;
|
||||||
|
|
||||||
|
if ((options & ~ALL_OPTIONS) != 0 || /* Undefined bit set */
|
||||||
|
(pattype & (~pattype+1)) != pattype || /* More than one type set */
|
||||||
|
pattype == 0) /* No type set */
|
||||||
|
{
|
||||||
|
*bufflenptr = 0; /* Error offset */
|
||||||
|
return PCRE2_ERROR_BADOPTION;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (plength == PCRE2_ZERO_TERMINATED) plength = PRIV(strlen)(pattern);
|
||||||
|
if (ccontext == NULL) ccontext =
|
||||||
|
(pcre2_convert_context *)(&PRIV(default_convert_context));
|
||||||
|
|
||||||
|
/* Check UTF if required. */
|
||||||
|
|
||||||
|
#ifndef SUPPORT_UNICODE
|
||||||
|
if (utf)
|
||||||
|
{
|
||||||
|
*bufflenptr = 0; /* Error offset */
|
||||||
|
return PCRE2_ERROR_UNICODE_NOT_SUPPORTED;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0)
|
||||||
|
{
|
||||||
|
PCRE2_SIZE erroroffset;
|
||||||
|
rc = PRIV(valid_utf)(pattern, plength, &erroroffset);
|
||||||
|
if (rc != 0)
|
||||||
|
{
|
||||||
|
*bufflenptr = erroroffset;
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* If buffptr is not NULL, and what it points to is not NULL, we are being
|
||||||
|
provided with a buffer and a length, so set them as the buffer to use. */
|
||||||
|
|
||||||
|
if (buffptr != NULL && *buffptr != NULL)
|
||||||
|
{
|
||||||
|
use_buffer = *buffptr;
|
||||||
|
use_length = *bufflenptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Call an individual converter, either just once (if a buffer was provided or
|
||||||
|
just the length is needed), or twice (if a memory allocation is required). */
|
||||||
|
|
||||||
|
for (int i = 0; i < 2; i++)
|
||||||
|
{
|
||||||
|
PCRE2_UCHAR *allocated;
|
||||||
|
BOOL dummyrun = buffptr == NULL || *buffptr == NULL;
|
||||||
|
|
||||||
|
switch(pattype)
|
||||||
|
{
|
||||||
|
case PCRE2_CONVERT_GLOB:
|
||||||
|
rc = convert_glob(options & ~PCRE2_CONVERT_GLOB, pattern, plength, utf,
|
||||||
|
use_buffer, use_length, bufflenptr, dummyrun, ccontext);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE2_CONVERT_POSIX_BASIC:
|
||||||
|
case PCRE2_CONVERT_POSIX_EXTENDED:
|
||||||
|
rc = convert_posix(pattype, pattern, plength, utf, use_buffer, use_length,
|
||||||
|
bufflenptr, dummyrun, ccontext);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
goto EXIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rc != 0 || /* Error */
|
||||||
|
buffptr == NULL || /* Just the length is required */
|
||||||
|
*buffptr != NULL) /* Buffer was provided or allocated */
|
||||||
|
return rc;
|
||||||
|
|
||||||
|
/* Allocate memory for the buffer, with hidden space for an allocator at
|
||||||
|
the start. The next time round the loop runs the conversion for real. */
|
||||||
|
|
||||||
|
allocated = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
|
||||||
|
(*bufflenptr + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)ccontext);
|
||||||
|
if (allocated == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||||
|
*buffptr = (PCRE2_UCHAR *)(((char *)allocated) + sizeof(pcre2_memctl));
|
||||||
|
|
||||||
|
use_buffer = *buffptr;
|
||||||
|
use_length = *bufflenptr + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Something went terribly wrong. Trigger an assert and return an error */
|
||||||
|
PCRE2_DEBUG_UNREACHABLE();
|
||||||
|
|
||||||
|
EXIT:
|
||||||
|
|
||||||
|
*bufflenptr = 0; /* Error offset */
|
||||||
|
return PCRE2_ERROR_INTERNAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Free converted pattern *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This frees a converted pattern that was put in newly-allocated memory.
|
||||||
|
|
||||||
|
Argument: the converted pattern
|
||||||
|
Returns: nothing
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_converted_pattern_free(PCRE2_UCHAR *converted)
|
||||||
|
{
|
||||||
|
if (converted != NULL)
|
||||||
|
{
|
||||||
|
pcre2_memctl *memctl =
|
||||||
|
(pcre2_memctl *)((char *)converted - sizeof(pcre2_memctl));
|
||||||
|
memctl->free(memctl, memctl->memory_data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre2_convert.c */
|
||||||
4110
3rd/pcre2/src/pcre2_dfa_match.c
Normal file
4110
3rd/pcre2/src/pcre2_dfa_match.c
Normal file
@@ -0,0 +1,4110 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains the external function pcre2_dfa_match(), which is an
|
||||||
|
alternative matching function that uses a sort of DFA algorithm (not a true
|
||||||
|
FSM). This is NOT Perl-compatible, but it has advantages in certain
|
||||||
|
applications. */
|
||||||
|
|
||||||
|
|
||||||
|
/* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
|
||||||
|
the performance of his patterns greatly. I could not use it as it stood, as it
|
||||||
|
was not thread safe, and made assumptions about pattern sizes. Also, it caused
|
||||||
|
test 7 to loop, and test 9 to crash with a segfault.
|
||||||
|
|
||||||
|
The issue is the check for duplicate states, which is done by a simple linear
|
||||||
|
search up the state list. (Grep for "duplicate" below to find the code.) For
|
||||||
|
many patterns, there will never be many states active at one time, so a simple
|
||||||
|
linear search is fine. In patterns that have many active states, it might be a
|
||||||
|
bottleneck. The suggested code used an indexing scheme to remember which states
|
||||||
|
had previously been used for each character, and avoided the linear search when
|
||||||
|
it knew there was no chance of a duplicate. This was implemented when adding
|
||||||
|
states to the state lists.
|
||||||
|
|
||||||
|
I wrote some thread-safe, not-limited code to try something similar at the time
|
||||||
|
of checking for duplicates (instead of when adding states), using index vectors
|
||||||
|
on the stack. It did give a 13% improvement with one specially constructed
|
||||||
|
pattern for certain subject strings, but on other strings and on many of the
|
||||||
|
simpler patterns in the test suite it did worse. The major problem, I think,
|
||||||
|
was the extra time to initialize the index. This had to be done for each call
|
||||||
|
of internal_dfa_match(). (The supplied patch used a static vector, initialized
|
||||||
|
only once - I suspect this was the cause of the problems with the tests.)
|
||||||
|
|
||||||
|
Overall, I concluded that the gains in some cases did not outweigh the losses
|
||||||
|
in others, so I abandoned this code. */
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define NLBLOCK mb /* Block containing newline information */
|
||||||
|
#define PSSTART start_subject /* Field containing processed string start */
|
||||||
|
#define PSEND end_subject /* Field containing processed string end */
|
||||||
|
|
||||||
|
#include "pcre2_internal.h"
|
||||||
|
|
||||||
|
#define PUBLIC_DFA_MATCH_OPTIONS \
|
||||||
|
(PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
|
||||||
|
PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
|
||||||
|
PCRE2_PARTIAL_SOFT|PCRE2_DFA_SHORTEST|PCRE2_DFA_RESTART| \
|
||||||
|
PCRE2_COPY_MATCHED_SUBJECT)
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Code parameters and static tables *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
|
||||||
|
into others, under special conditions. A gap of 20 between the blocks should be
|
||||||
|
enough. The resulting opcodes don't have to be less than 256 because they are
|
||||||
|
never stored, so we push them well clear of the normal opcodes. */
|
||||||
|
|
||||||
|
#define OP_PROP_EXTRA 300
|
||||||
|
#define OP_EXTUNI_EXTRA 320
|
||||||
|
#define OP_ANYNL_EXTRA 340
|
||||||
|
#define OP_HSPACE_EXTRA 360
|
||||||
|
#define OP_VSPACE_EXTRA 380
|
||||||
|
|
||||||
|
|
||||||
|
/* This table identifies those opcodes that are followed immediately by a
|
||||||
|
character that is to be tested in some way. This makes it possible to
|
||||||
|
centralize the loading of these characters. In the case of Type * etc, the
|
||||||
|
"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
|
||||||
|
small value. Non-zero values in the table are the offsets from the opcode where
|
||||||
|
the character is to be found. ***NOTE*** If the start of this table is
|
||||||
|
modified, the three tables that follow must also be modified. */
|
||||||
|
|
||||||
|
static const uint8_t coptable[] = {
|
||||||
|
0, /* End */
|
||||||
|
0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */
|
||||||
|
0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */
|
||||||
|
0, 0, 0, /* Any, AllAny, Anybyte */
|
||||||
|
0, 0, /* \P, \p */
|
||||||
|
0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
|
||||||
|
0, /* \X */
|
||||||
|
0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
|
||||||
|
1, /* Char */
|
||||||
|
1, /* Chari */
|
||||||
|
1, /* not */
|
||||||
|
1, /* noti */
|
||||||
|
/* Positive single-char repeats */
|
||||||
|
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
|
||||||
|
1+IMM2_SIZE, 1+IMM2_SIZE, /* upto, minupto */
|
||||||
|
1+IMM2_SIZE, /* exact */
|
||||||
|
1, 1, 1, 1+IMM2_SIZE, /* *+, ++, ?+, upto+ */
|
||||||
|
1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */
|
||||||
|
1+IMM2_SIZE, 1+IMM2_SIZE, /* upto I, minupto I */
|
||||||
|
1+IMM2_SIZE, /* exact I */
|
||||||
|
1, 1, 1, 1+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */
|
||||||
|
/* Negative single-char repeats - only for chars < 256 */
|
||||||
|
1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
|
||||||
|
1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto, minupto */
|
||||||
|
1+IMM2_SIZE, /* NOT exact */
|
||||||
|
1, 1, 1, 1+IMM2_SIZE, /* NOT *+, ++, ?+, upto+ */
|
||||||
|
1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */
|
||||||
|
1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto I, minupto I */
|
||||||
|
1+IMM2_SIZE, /* NOT exact I */
|
||||||
|
1, 1, 1, 1+IMM2_SIZE, /* NOT *+I, ++I, ?+I, upto+I */
|
||||||
|
/* Positive type repeats */
|
||||||
|
1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
|
||||||
|
1+IMM2_SIZE, 1+IMM2_SIZE, /* Type upto, minupto */
|
||||||
|
1+IMM2_SIZE, /* Type exact */
|
||||||
|
1, 1, 1, 1+IMM2_SIZE, /* Type *+, ++, ?+, upto+ */
|
||||||
|
/* Character class & ref repeats */
|
||||||
|
0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */
|
||||||
|
0, 0, /* CRRANGE, CRMINRANGE */
|
||||||
|
0, 0, 0, 0, /* Possessive *+, ++, ?+, CRPOSRANGE */
|
||||||
|
0, /* CLASS */
|
||||||
|
0, /* NCLASS */
|
||||||
|
0, /* XCLASS - variable length */
|
||||||
|
0, /* ECLASS - variable length */
|
||||||
|
0, /* REF */
|
||||||
|
0, /* REFI */
|
||||||
|
0, /* DNREF */
|
||||||
|
0, /* DNREFI */
|
||||||
|
0, /* RECURSE */
|
||||||
|
0, /* CALLOUT */
|
||||||
|
0, /* CALLOUT_STR */
|
||||||
|
0, /* Alt */
|
||||||
|
0, /* Ket */
|
||||||
|
0, /* KetRmax */
|
||||||
|
0, /* KetRmin */
|
||||||
|
0, /* KetRpos */
|
||||||
|
0, 0, /* Reverse, Vreverse */
|
||||||
|
0, /* Assert */
|
||||||
|
0, /* Assert not */
|
||||||
|
0, /* Assert behind */
|
||||||
|
0, /* Assert behind not */
|
||||||
|
0, /* NA assert */
|
||||||
|
0, /* NA assert behind */
|
||||||
|
0, /* Assert scan substring */
|
||||||
|
0, /* ONCE */
|
||||||
|
0, /* SCRIPT_RUN */
|
||||||
|
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
|
||||||
|
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
|
||||||
|
0, 0, /* CREF, DNCREF */
|
||||||
|
0, 0, /* RREF, DNRREF */
|
||||||
|
0, 0, /* FALSE, TRUE */
|
||||||
|
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
|
||||||
|
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
|
||||||
|
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
|
||||||
|
0, 0, /* COMMIT, COMMIT_ARG */
|
||||||
|
0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */
|
||||||
|
0, 0, 0, /* CLOSE, SKIPZERO, DEFINE */
|
||||||
|
0, 0, /* \B and \b in UCP mode */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* This table identifies those opcodes that inspect a character. It is used to
|
||||||
|
remember the fact that a character could have been inspected when the end of
|
||||||
|
the subject is reached. ***NOTE*** If the start of this table is modified, the
|
||||||
|
two tables that follow must also be modified. */
|
||||||
|
|
||||||
|
static const uint8_t poptable[] = {
|
||||||
|
0, /* End */
|
||||||
|
0, 0, 0, 1, 1, /* \A, \G, \K, \B, \b */
|
||||||
|
1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */
|
||||||
|
1, 1, 1, /* Any, AllAny, Anybyte */
|
||||||
|
1, 1, /* \P, \p */
|
||||||
|
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */
|
||||||
|
1, /* \X */
|
||||||
|
0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
|
||||||
|
1, /* Char */
|
||||||
|
1, /* Chari */
|
||||||
|
1, /* not */
|
||||||
|
1, /* noti */
|
||||||
|
/* Positive single-char repeats */
|
||||||
|
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
|
||||||
|
1, 1, 1, /* upto, minupto, exact */
|
||||||
|
1, 1, 1, 1, /* *+, ++, ?+, upto+ */
|
||||||
|
1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */
|
||||||
|
1, 1, 1, /* upto I, minupto I, exact I */
|
||||||
|
1, 1, 1, 1, /* *+I, ++I, ?+I, upto+I */
|
||||||
|
/* Negative single-char repeats - only for chars < 256 */
|
||||||
|
1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
|
||||||
|
1, 1, 1, /* NOT upto, minupto, exact */
|
||||||
|
1, 1, 1, 1, /* NOT *+, ++, ?+, upto+ */
|
||||||
|
1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */
|
||||||
|
1, 1, 1, /* NOT upto I, minupto I, exact I */
|
||||||
|
1, 1, 1, 1, /* NOT *+I, ++I, ?+I, upto+I */
|
||||||
|
/* Positive type repeats */
|
||||||
|
1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
|
||||||
|
1, 1, 1, /* Type upto, minupto, exact */
|
||||||
|
1, 1, 1, 1, /* Type *+, ++, ?+, upto+ */
|
||||||
|
/* Character class & ref repeats */
|
||||||
|
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
|
||||||
|
1, 1, /* CRRANGE, CRMINRANGE */
|
||||||
|
1, 1, 1, 1, /* Possessive *+, ++, ?+, CRPOSRANGE */
|
||||||
|
1, /* CLASS */
|
||||||
|
1, /* NCLASS */
|
||||||
|
1, /* XCLASS - variable length */
|
||||||
|
1, /* ECLASS - variable length */
|
||||||
|
0, /* REF */
|
||||||
|
0, /* REFI */
|
||||||
|
0, /* DNREF */
|
||||||
|
0, /* DNREFI */
|
||||||
|
0, /* RECURSE */
|
||||||
|
0, /* CALLOUT */
|
||||||
|
0, /* CALLOUT_STR */
|
||||||
|
0, /* Alt */
|
||||||
|
0, /* Ket */
|
||||||
|
0, /* KetRmax */
|
||||||
|
0, /* KetRmin */
|
||||||
|
0, /* KetRpos */
|
||||||
|
0, 0, /* Reverse, Vreverse */
|
||||||
|
0, /* Assert */
|
||||||
|
0, /* Assert not */
|
||||||
|
0, /* Assert behind */
|
||||||
|
0, /* Assert behind not */
|
||||||
|
0, /* NA assert */
|
||||||
|
0, /* NA assert behind */
|
||||||
|
0, /* Assert scan substring */
|
||||||
|
0, /* ONCE */
|
||||||
|
0, /* SCRIPT_RUN */
|
||||||
|
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
|
||||||
|
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
|
||||||
|
0, 0, /* CREF, DNCREF */
|
||||||
|
0, 0, /* RREF, DNRREF */
|
||||||
|
0, 0, /* FALSE, TRUE */
|
||||||
|
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
|
||||||
|
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
|
||||||
|
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
|
||||||
|
0, 0, /* COMMIT, COMMIT_ARG */
|
||||||
|
0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */
|
||||||
|
0, 0, 0, /* CLOSE, SKIPZERO, DEFINE */
|
||||||
|
1, 1, /* \B and \b in UCP mode */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Compile-time check that these tables have the correct size. */
|
||||||
|
STATIC_ASSERT(sizeof(coptable) == OP_TABLE_LENGTH, coptable);
|
||||||
|
STATIC_ASSERT(sizeof(poptable) == OP_TABLE_LENGTH, poptable);
|
||||||
|
|
||||||
|
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
|
||||||
|
and \w */
|
||||||
|
|
||||||
|
static const uint8_t toptable1[] = {
|
||||||
|
0, 0, 0, 0, 0, 0,
|
||||||
|
ctype_digit, ctype_digit,
|
||||||
|
ctype_space, ctype_space,
|
||||||
|
ctype_word, ctype_word,
|
||||||
|
0, 0 /* OP_ANY, OP_ALLANY */
|
||||||
|
};
|
||||||
|
|
||||||
|
static const uint8_t toptable2[] = {
|
||||||
|
0, 0, 0, 0, 0, 0,
|
||||||
|
ctype_digit, 0,
|
||||||
|
ctype_space, 0,
|
||||||
|
ctype_word, 0,
|
||||||
|
1, 1 /* OP_ANY, OP_ALLANY */
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/* Structure for holding data about a particular state, which is in effect the
|
||||||
|
current data for an active path through the match tree. It must consist
|
||||||
|
entirely of ints because the working vector we are passed, and which we put
|
||||||
|
these structures in, is a vector of ints. */
|
||||||
|
|
||||||
|
typedef struct stateblock {
|
||||||
|
int offset; /* Offset to opcode (-ve has meaning) */
|
||||||
|
int count; /* Count for repeats */
|
||||||
|
int data; /* Some use extra data */
|
||||||
|
} stateblock;
|
||||||
|
|
||||||
|
#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))
|
||||||
|
|
||||||
|
|
||||||
|
/* Before version 10.32 the recursive calls of internal_dfa_match() were passed
|
||||||
|
local working space and output vectors that were created on the stack. This has
|
||||||
|
caused issues for some patterns, especially in small-stack environments such as
|
||||||
|
Windows. A new scheme is now in use which sets up a vector on the stack, but if
|
||||||
|
this is too small, heap memory is used, up to the heap_limit. The main
|
||||||
|
parameters are all numbers of ints because the workspace is a vector of ints.
|
||||||
|
|
||||||
|
The size of the starting stack vector, DFA_START_RWS_SIZE, is in bytes, and is
|
||||||
|
defined in pcre2_internal.h so as to be available to pcre2test when it is
|
||||||
|
finding the minimum heap requirement for a match. */
|
||||||
|
|
||||||
|
#define OVEC_UNIT (sizeof(PCRE2_SIZE)/sizeof(int))
|
||||||
|
|
||||||
|
#define RWS_BASE_SIZE (DFA_START_RWS_SIZE/sizeof(int)) /* Stack vector */
|
||||||
|
#define RWS_RSIZE 1000 /* Work size for recursion */
|
||||||
|
#define RWS_OVEC_RSIZE (1000*OVEC_UNIT) /* Ovector for recursion */
|
||||||
|
#define RWS_OVEC_OSIZE (2*OVEC_UNIT) /* Ovector in other cases */
|
||||||
|
|
||||||
|
/* This structure is at the start of each workspace block. */
|
||||||
|
|
||||||
|
typedef struct RWS_anchor {
|
||||||
|
struct RWS_anchor *next;
|
||||||
|
uint32_t size; /* Number of ints */
|
||||||
|
uint32_t free; /* Number of ints */
|
||||||
|
} RWS_anchor;
|
||||||
|
|
||||||
|
#define RWS_ANCHOR_SIZE (sizeof(RWS_anchor)/sizeof(int))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Process a callout *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function is called to perform a callout.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
code current code pointer
|
||||||
|
offsets points to current capture offsets
|
||||||
|
current_subject start of current subject match
|
||||||
|
ptr current position in subject
|
||||||
|
mb the match block
|
||||||
|
extracode extra code offset when called from condition
|
||||||
|
lengthptr where to return the callout length
|
||||||
|
|
||||||
|
Returns: the return from the callout
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int
|
||||||
|
do_callout_dfa(PCRE2_SPTR code, PCRE2_SIZE *offsets, PCRE2_SPTR current_subject,
|
||||||
|
PCRE2_SPTR ptr, dfa_match_block *mb, PCRE2_SIZE extracode,
|
||||||
|
PCRE2_SIZE *lengthptr)
|
||||||
|
{
|
||||||
|
pcre2_callout_block *cb = mb->cb;
|
||||||
|
|
||||||
|
*lengthptr = (code[extracode] == OP_CALLOUT)?
|
||||||
|
(PCRE2_SIZE)PRIV(OP_lengths)[OP_CALLOUT] :
|
||||||
|
(PCRE2_SIZE)GET(code, 1 + 2*LINK_SIZE + extracode);
|
||||||
|
|
||||||
|
if (mb->callout == NULL) return 0; /* No callout provided */
|
||||||
|
|
||||||
|
/* Fixed fields in the callout block are set once and for all at the start of
|
||||||
|
matching. */
|
||||||
|
|
||||||
|
cb->offset_vector = offsets;
|
||||||
|
cb->start_match = (PCRE2_SIZE)(current_subject - mb->start_subject);
|
||||||
|
cb->current_position = (PCRE2_SIZE)(ptr - mb->start_subject);
|
||||||
|
cb->pattern_position = GET(code, 1 + extracode);
|
||||||
|
cb->next_item_length = GET(code, 1 + LINK_SIZE + extracode);
|
||||||
|
|
||||||
|
if (code[extracode] == OP_CALLOUT)
|
||||||
|
{
|
||||||
|
cb->callout_number = code[1 + 2*LINK_SIZE + extracode];
|
||||||
|
cb->callout_string_offset = 0;
|
||||||
|
cb->callout_string = NULL;
|
||||||
|
cb->callout_string_length = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cb->callout_number = 0;
|
||||||
|
cb->callout_string_offset = GET(code, 1 + 3*LINK_SIZE + extracode);
|
||||||
|
cb->callout_string = code + (1 + 4*LINK_SIZE + extracode) + 1;
|
||||||
|
cb->callout_string_length = *lengthptr - (1 + 4*LINK_SIZE) - 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (mb->callout)(cb, mb->callout_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Expand local workspace memory *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function is called when internal_dfa_match() is about to be called
|
||||||
|
recursively and there is insufficient working space left in the current
|
||||||
|
workspace block. If there's an existing next block, use it; otherwise get a new
|
||||||
|
block unless the heap limit is reached.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
rwsptr pointer to block pointer (updated)
|
||||||
|
ovecsize space needed for an ovector
|
||||||
|
mb the match block
|
||||||
|
|
||||||
|
Returns: 0 rwsptr has been updated
|
||||||
|
!0 an error code
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int
|
||||||
|
more_workspace(RWS_anchor **rwsptr, unsigned int ovecsize, dfa_match_block *mb)
|
||||||
|
{
|
||||||
|
RWS_anchor *rws = *rwsptr;
|
||||||
|
RWS_anchor *new;
|
||||||
|
|
||||||
|
if (rws->next != NULL)
|
||||||
|
{
|
||||||
|
new = rws->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sizes in the RWS_anchor blocks are in units of sizeof(int), but
|
||||||
|
mb->heap_limit and mb->heap_used are in kibibytes. Play carefully, to avoid
|
||||||
|
overflow. */
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
uint32_t newsize = (rws->size >= UINT32_MAX/(sizeof(int)*2))? UINT32_MAX/sizeof(int) : rws->size * 2;
|
||||||
|
uint32_t newsizeK = newsize/(1024/sizeof(int));
|
||||||
|
|
||||||
|
if (newsizeK + mb->heap_used > mb->heap_limit)
|
||||||
|
newsizeK = (uint32_t)(mb->heap_limit - mb->heap_used);
|
||||||
|
newsize = newsizeK*(1024/sizeof(int));
|
||||||
|
|
||||||
|
if (newsize < RWS_RSIZE + ovecsize + RWS_ANCHOR_SIZE)
|
||||||
|
return PCRE2_ERROR_HEAPLIMIT;
|
||||||
|
new = mb->memctl.malloc(newsize*sizeof(int), mb->memctl.memory_data);
|
||||||
|
if (new == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||||
|
mb->heap_used += newsizeK;
|
||||||
|
new->next = NULL;
|
||||||
|
new->size = newsize;
|
||||||
|
rws->next = new;
|
||||||
|
}
|
||||||
|
|
||||||
|
new->free = new->size - RWS_ANCHOR_SIZE;
|
||||||
|
*rwsptr = new;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Match a Regular Expression - DFA engine *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This internal function applies a compiled pattern to a subject string,
|
||||||
|
starting at a given point, using a DFA engine. This function is called from the
|
||||||
|
external one, possibly multiple times if the pattern is not anchored. The
|
||||||
|
function calls itself recursively for some kinds of subpattern.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
mb the match_data block with fixed information
|
||||||
|
this_start_code the opening bracket of this subexpression's code
|
||||||
|
current_subject where we currently are in the subject string
|
||||||
|
start_offset start offset in the subject string
|
||||||
|
offsets vector to contain the matching string offsets
|
||||||
|
offsetcount size of same
|
||||||
|
workspace vector of workspace
|
||||||
|
wscount size of same
|
||||||
|
rlevel function call recursion level
|
||||||
|
|
||||||
|
Returns: > 0 => number of match offset pairs placed in offsets
|
||||||
|
= 0 => offsets overflowed; longest matches are present
|
||||||
|
-1 => failed to match
|
||||||
|
< -1 => some kind of unexpected problem
|
||||||
|
|
||||||
|
The following macros are used for adding states to the two state vectors (one
|
||||||
|
for the current character, one for the following character). */
|
||||||
|
|
||||||
|
#define ADD_ACTIVE(x,y) \
|
||||||
|
if (active_count++ < wscount) \
|
||||||
|
{ \
|
||||||
|
next_active_state->offset = (x); \
|
||||||
|
next_active_state->count = (y); \
|
||||||
|
next_active_state++; \
|
||||||
|
} \
|
||||||
|
else return PCRE2_ERROR_DFA_WSSIZE
|
||||||
|
|
||||||
|
#define ADD_ACTIVE_DATA(x,y,z) \
|
||||||
|
if (active_count++ < wscount) \
|
||||||
|
{ \
|
||||||
|
next_active_state->offset = (x); \
|
||||||
|
next_active_state->count = (y); \
|
||||||
|
next_active_state->data = (z); \
|
||||||
|
next_active_state++; \
|
||||||
|
} \
|
||||||
|
else return PCRE2_ERROR_DFA_WSSIZE
|
||||||
|
|
||||||
|
#define ADD_NEW(x,y) \
|
||||||
|
if (new_count++ < wscount) \
|
||||||
|
{ \
|
||||||
|
next_new_state->offset = (x); \
|
||||||
|
next_new_state->count = (y); \
|
||||||
|
next_new_state++; \
|
||||||
|
} \
|
||||||
|
else return PCRE2_ERROR_DFA_WSSIZE
|
||||||
|
|
||||||
|
#define ADD_NEW_DATA(x,y,z) \
|
||||||
|
if (new_count++ < wscount) \
|
||||||
|
{ \
|
||||||
|
next_new_state->offset = (x); \
|
||||||
|
next_new_state->count = (y); \
|
||||||
|
next_new_state->data = (z); \
|
||||||
|
next_new_state++; \
|
||||||
|
} \
|
||||||
|
else return PCRE2_ERROR_DFA_WSSIZE
|
||||||
|
|
||||||
|
/* And now, here is the code */
|
||||||
|
|
||||||
|
static int
|
||||||
|
internal_dfa_match(
|
||||||
|
dfa_match_block *mb,
|
||||||
|
PCRE2_SPTR this_start_code,
|
||||||
|
PCRE2_SPTR current_subject,
|
||||||
|
PCRE2_SIZE start_offset,
|
||||||
|
PCRE2_SIZE *offsets,
|
||||||
|
uint32_t offsetcount,
|
||||||
|
int *workspace,
|
||||||
|
int wscount,
|
||||||
|
uint32_t rlevel,
|
||||||
|
int *RWS)
|
||||||
|
{
|
||||||
|
stateblock *active_states, *new_states, *temp_states;
|
||||||
|
stateblock *next_active_state, *next_new_state;
|
||||||
|
const uint8_t *ctypes, *lcc, *fcc;
|
||||||
|
PCRE2_SPTR ptr;
|
||||||
|
PCRE2_SPTR end_code;
|
||||||
|
dfa_recursion_info new_recursive;
|
||||||
|
int active_count, new_count, match_count;
|
||||||
|
|
||||||
|
/* Some fields in the mb block are frequently referenced, so we load them into
|
||||||
|
independent variables in the hope that this will perform better. */
|
||||||
|
|
||||||
|
PCRE2_SPTR start_subject = mb->start_subject;
|
||||||
|
PCRE2_SPTR end_subject = mb->end_subject;
|
||||||
|
PCRE2_SPTR start_code = mb->start_code;
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
|
||||||
|
BOOL utf_or_ucp = utf || (mb->poptions & PCRE2_UCP) != 0;
|
||||||
|
#else
|
||||||
|
BOOL utf = FALSE;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
BOOL reset_could_continue = FALSE;
|
||||||
|
|
||||||
|
if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
|
||||||
|
if (rlevel++ > mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;
|
||||||
|
offsetcount &= (uint32_t)(-2); /* Round down */
|
||||||
|
|
||||||
|
wscount -= 2;
|
||||||
|
wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
|
||||||
|
(2 * INTS_PER_STATEBLOCK);
|
||||||
|
|
||||||
|
ctypes = mb->tables + ctypes_offset;
|
||||||
|
lcc = mb->tables + lcc_offset;
|
||||||
|
fcc = mb->tables + fcc_offset;
|
||||||
|
|
||||||
|
match_count = PCRE2_ERROR_NOMATCH; /* A negative number */
|
||||||
|
|
||||||
|
active_states = (stateblock *)(workspace + 2);
|
||||||
|
next_new_state = new_states = active_states + wscount;
|
||||||
|
new_count = 0;
|
||||||
|
|
||||||
|
/* The first thing in any (sub) pattern is a bracket of some sort. Push all
|
||||||
|
the alternative states onto the list, and find out where the end is. This
|
||||||
|
makes is possible to use this function recursively, when we want to stop at a
|
||||||
|
matching internal ket rather than at the end.
|
||||||
|
|
||||||
|
If we are dealing with a backward assertion we have to find out the maximum
|
||||||
|
amount to move back, and set up each alternative appropriately. */
|
||||||
|
|
||||||
|
if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT)
|
||||||
|
{
|
||||||
|
size_t max_back = 0;
|
||||||
|
size_t gone_back;
|
||||||
|
|
||||||
|
end_code = this_start_code;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
size_t back = (size_t)GET2(end_code, 2+LINK_SIZE);
|
||||||
|
if (back > max_back) max_back = back;
|
||||||
|
end_code += GET(end_code, 1);
|
||||||
|
}
|
||||||
|
while (*end_code == OP_ALT);
|
||||||
|
|
||||||
|
/* If we can't go back the amount required for the longest lookbehind
|
||||||
|
pattern, go back as far as we can; some alternatives may still be viable. */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
/* In character mode we have to step back character by character */
|
||||||
|
|
||||||
|
if (utf)
|
||||||
|
{
|
||||||
|
for (gone_back = 0; gone_back < max_back; gone_back++)
|
||||||
|
{
|
||||||
|
if (current_subject <= start_subject) break;
|
||||||
|
current_subject--;
|
||||||
|
ACROSSCHAR(current_subject > start_subject, current_subject,
|
||||||
|
current_subject--);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* In byte-mode we can do this quickly. */
|
||||||
|
|
||||||
|
{
|
||||||
|
size_t current_offset = (size_t)(current_subject - start_subject);
|
||||||
|
gone_back = (current_offset < max_back)? current_offset : max_back;
|
||||||
|
current_subject -= gone_back;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Save the earliest consulted character */
|
||||||
|
|
||||||
|
if (current_subject < mb->start_used_ptr)
|
||||||
|
mb->start_used_ptr = current_subject;
|
||||||
|
|
||||||
|
/* Now we can process the individual branches. There will be an OP_REVERSE at
|
||||||
|
the start of each branch, except when the length of the branch is zero. */
|
||||||
|
|
||||||
|
end_code = this_start_code;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
uint32_t revlen = (end_code[1+LINK_SIZE] == OP_REVERSE)? 1 + IMM2_SIZE : 0;
|
||||||
|
size_t back = (revlen == 0)? 0 : (size_t)GET2(end_code, 2+LINK_SIZE);
|
||||||
|
if (back <= gone_back)
|
||||||
|
{
|
||||||
|
int bstate = (int)(end_code - start_code + 1 + LINK_SIZE + revlen);
|
||||||
|
ADD_NEW_DATA(-bstate, 0, (int)(gone_back - back));
|
||||||
|
}
|
||||||
|
end_code += GET(end_code, 1);
|
||||||
|
}
|
||||||
|
while (*end_code == OP_ALT);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This is the code for a "normal" subpattern (not a backward assertion). The
|
||||||
|
start of a whole pattern is always one of these. If we are at the top level,
|
||||||
|
we may be asked to restart matching from the same point that we reached for a
|
||||||
|
previous partial match. We still have to scan through the top-level branches to
|
||||||
|
find the end state. */
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
end_code = this_start_code;
|
||||||
|
|
||||||
|
/* Restarting */
|
||||||
|
|
||||||
|
if (rlevel == 1 && (mb->moptions & PCRE2_DFA_RESTART) != 0)
|
||||||
|
{
|
||||||
|
do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
|
||||||
|
new_count = workspace[1];
|
||||||
|
if (!workspace[0])
|
||||||
|
memcpy(new_states, active_states, (size_t)new_count * sizeof(stateblock));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Not restarting */
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int length = 1 + LINK_SIZE +
|
||||||
|
((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
|
||||||
|
*this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
|
||||||
|
? IMM2_SIZE:0);
|
||||||
|
do
|
||||||
|
{
|
||||||
|
ADD_NEW((int)(end_code - start_code + length), 0);
|
||||||
|
end_code += GET(end_code, 1);
|
||||||
|
length = 1 + LINK_SIZE;
|
||||||
|
}
|
||||||
|
while (*end_code == OP_ALT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
workspace[0] = 0; /* Bit indicating which vector is current */
|
||||||
|
|
||||||
|
/* Loop for scanning the subject */
|
||||||
|
|
||||||
|
ptr = current_subject;
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
int clen, dlen;
|
||||||
|
uint32_t c, d;
|
||||||
|
BOOL partial_newline = FALSE;
|
||||||
|
BOOL could_continue = reset_could_continue;
|
||||||
|
reset_could_continue = FALSE;
|
||||||
|
|
||||||
|
if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr;
|
||||||
|
|
||||||
|
/* Make the new state list into the active state list and empty the
|
||||||
|
new state list. */
|
||||||
|
|
||||||
|
temp_states = active_states;
|
||||||
|
active_states = new_states;
|
||||||
|
new_states = temp_states;
|
||||||
|
active_count = new_count;
|
||||||
|
new_count = 0;
|
||||||
|
|
||||||
|
workspace[0] ^= 1; /* Remember for the restarting feature */
|
||||||
|
workspace[1] = active_count;
|
||||||
|
|
||||||
|
/* Set the pointers for adding new states */
|
||||||
|
|
||||||
|
next_active_state = active_states + active_count;
|
||||||
|
next_new_state = new_states;
|
||||||
|
|
||||||
|
/* Load the current character from the subject outside the loop, as many
|
||||||
|
different states may want to look at it, and we assume that at least one
|
||||||
|
will. */
|
||||||
|
|
||||||
|
if (ptr < end_subject)
|
||||||
|
{
|
||||||
|
clen = 1; /* Number of data items in the character */
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
GETCHARLENTEST(c, ptr, clen);
|
||||||
|
#else
|
||||||
|
c = *ptr;
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
clen = 0; /* This indicates the end of the subject */
|
||||||
|
c = NOTACHAR; /* This value should never actually be used */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Scan up the active states and act on each one. The result of an action
|
||||||
|
may be to add more states to the currently active list (e.g. on hitting a
|
||||||
|
parenthesis) or it may be to put states on the new list, for considering
|
||||||
|
when we move the character pointer on. */
|
||||||
|
|
||||||
|
for (i = 0; i < active_count; i++)
|
||||||
|
{
|
||||||
|
stateblock *current_state = active_states + i;
|
||||||
|
BOOL caseless = FALSE;
|
||||||
|
PCRE2_SPTR code;
|
||||||
|
uint32_t codevalue;
|
||||||
|
int state_offset = current_state->offset;
|
||||||
|
int rrc;
|
||||||
|
int count;
|
||||||
|
|
||||||
|
/* A negative offset is a special case meaning "hold off going to this
|
||||||
|
(negated) state until the number of characters in the data field have
|
||||||
|
been skipped". If the could_continue flag was passed over from a previous
|
||||||
|
state, arrange for it to passed on. */
|
||||||
|
|
||||||
|
if (state_offset < 0)
|
||||||
|
{
|
||||||
|
if (current_state->data > 0)
|
||||||
|
{
|
||||||
|
ADD_NEW_DATA(state_offset, current_state->count,
|
||||||
|
current_state->data - 1);
|
||||||
|
if (could_continue) reset_could_continue = TRUE;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
current_state->offset = state_offset = -state_offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check for a duplicate state with the same count, and skip if found.
|
||||||
|
See the note at the head of this module about the possibility of improving
|
||||||
|
performance here. */
|
||||||
|
|
||||||
|
for (j = 0; j < i; j++)
|
||||||
|
{
|
||||||
|
if (active_states[j].offset == state_offset &&
|
||||||
|
active_states[j].count == current_state->count)
|
||||||
|
goto NEXT_ACTIVE_STATE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The state offset is the offset to the opcode */
|
||||||
|
|
||||||
|
code = start_code + state_offset;
|
||||||
|
codevalue = *code;
|
||||||
|
|
||||||
|
/* If this opcode inspects a character, but we are at the end of the
|
||||||
|
subject, remember the fact for use when testing for a partial match. */
|
||||||
|
|
||||||
|
if (clen == 0 && poptable[codevalue] != 0)
|
||||||
|
could_continue = TRUE;
|
||||||
|
|
||||||
|
/* If this opcode is followed by an inline character, load it. It is
|
||||||
|
tempting to test for the presence of a subject character here, but that
|
||||||
|
is wrong, because sometimes zero repetitions of the subject are
|
||||||
|
permitted.
|
||||||
|
|
||||||
|
We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
|
||||||
|
argument that is not a data character - but is always one byte long because
|
||||||
|
the values are small. We have to take special action to deal with \P, \p,
|
||||||
|
\H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
|
||||||
|
these ones to new opcodes. */
|
||||||
|
|
||||||
|
if (coptable[codevalue] > 0)
|
||||||
|
{
|
||||||
|
dlen = 1;
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
d = code[coptable[codevalue]];
|
||||||
|
if (codevalue >= OP_TYPESTAR)
|
||||||
|
{
|
||||||
|
switch(d)
|
||||||
|
{
|
||||||
|
case OP_ANYBYTE: return PCRE2_ERROR_DFA_UITEM;
|
||||||
|
case OP_NOTPROP:
|
||||||
|
case OP_PROP: codevalue += OP_PROP_EXTRA; break;
|
||||||
|
case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
|
||||||
|
case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
|
||||||
|
case OP_NOT_HSPACE:
|
||||||
|
case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
|
||||||
|
case OP_NOT_VSPACE:
|
||||||
|
case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
dlen = 0; /* Not strictly necessary, but compilers moan */
|
||||||
|
d = NOTACHAR; /* if these variables are not set. */
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Now process the individual opcodes */
|
||||||
|
|
||||||
|
switch (codevalue)
|
||||||
|
{
|
||||||
|
/* ========================================================================== */
|
||||||
|
/* Reached a closing bracket. If not at the end of the pattern, carry
|
||||||
|
on with the next opcode. For repeating opcodes, also add the repeat
|
||||||
|
state. Note that KETRPOS will always be encountered at the end of the
|
||||||
|
subpattern, because the possessive subpattern repeats are always handled
|
||||||
|
using recursive calls. Thus, it never adds any new states.
|
||||||
|
|
||||||
|
At the end of the (sub)pattern, unless we have an empty string and
|
||||||
|
PCRE2_NOTEMPTY is set, or PCRE2_NOTEMPTY_ATSTART is set and we are at the
|
||||||
|
start of the subject, save the match data, shifting up all previous
|
||||||
|
matches so we always have the longest first. */
|
||||||
|
|
||||||
|
case OP_KET:
|
||||||
|
case OP_KETRMIN:
|
||||||
|
case OP_KETRMAX:
|
||||||
|
case OP_KETRPOS:
|
||||||
|
if (code != end_code)
|
||||||
|
{
|
||||||
|
ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
|
||||||
|
if (codevalue != OP_KET)
|
||||||
|
{
|
||||||
|
ADD_ACTIVE(state_offset - (int)GET(code, 1), 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (ptr > current_subject ||
|
||||||
|
((mb->moptions & PCRE2_NOTEMPTY) == 0 &&
|
||||||
|
((mb->moptions & PCRE2_NOTEMPTY_ATSTART) == 0 ||
|
||||||
|
current_subject > start_subject + mb->start_offset)))
|
||||||
|
{
|
||||||
|
if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
|
||||||
|
else if (match_count > 0 && ++match_count * 2 > (int)offsetcount)
|
||||||
|
match_count = 0;
|
||||||
|
count = ((match_count == 0)? (int)offsetcount : match_count * 2) - 2;
|
||||||
|
if (count > 0) (void)memmove(offsets + 2, offsets,
|
||||||
|
(size_t)count * sizeof(PCRE2_SIZE));
|
||||||
|
if (offsetcount >= 2)
|
||||||
|
{
|
||||||
|
offsets[0] = (PCRE2_SIZE)(current_subject - start_subject);
|
||||||
|
offsets[1] = (PCRE2_SIZE)(ptr - start_subject);
|
||||||
|
}
|
||||||
|
if ((mb->moptions & PCRE2_DFA_SHORTEST) != 0) return match_count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* ========================================================================== */
|
||||||
|
/* These opcodes add to the current list of states without looking
|
||||||
|
at the current character. */
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_ALT:
|
||||||
|
do { code += GET(code, 1); } while (*code == OP_ALT);
|
||||||
|
ADD_ACTIVE((int)(code - start_code), 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_BRA:
|
||||||
|
case OP_SBRA:
|
||||||
|
do
|
||||||
|
{
|
||||||
|
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
|
||||||
|
code += GET(code, 1);
|
||||||
|
}
|
||||||
|
while (*code == OP_ALT);
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_CBRA:
|
||||||
|
case OP_SCBRA:
|
||||||
|
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE), 0);
|
||||||
|
code += GET(code, 1);
|
||||||
|
while (*code == OP_ALT)
|
||||||
|
{
|
||||||
|
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
|
||||||
|
code += GET(code, 1);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_BRAZERO:
|
||||||
|
case OP_BRAMINZERO:
|
||||||
|
ADD_ACTIVE(state_offset + 1, 0);
|
||||||
|
code += 1 + GET(code, 2);
|
||||||
|
while (*code == OP_ALT) code += GET(code, 1);
|
||||||
|
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_SKIPZERO:
|
||||||
|
code += 1 + GET(code, 2);
|
||||||
|
while (*code == OP_ALT) code += GET(code, 1);
|
||||||
|
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_CIRC:
|
||||||
|
if (ptr == start_subject && (mb->moptions & PCRE2_NOTBOL) == 0)
|
||||||
|
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_CIRCM:
|
||||||
|
if ((ptr == start_subject && (mb->moptions & PCRE2_NOTBOL) == 0) ||
|
||||||
|
((ptr != end_subject || (mb->poptions & PCRE2_ALT_CIRCUMFLEX) != 0 )
|
||||||
|
&& WAS_NEWLINE(ptr)))
|
||||||
|
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_EOD:
|
||||||
|
if (ptr >= end_subject)
|
||||||
|
{
|
||||||
|
if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||||
|
return PCRE2_ERROR_PARTIAL;
|
||||||
|
else { ADD_ACTIVE(state_offset + 1, 0); }
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_SOD:
|
||||||
|
if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_SOM:
|
||||||
|
if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
|
||||||
|
break;
|
||||||
|
|
||||||
|
|
||||||
|
/* ========================================================================== */
|
||||||
|
/* These opcodes inspect the next subject character, and sometimes
|
||||||
|
the previous one as well, but do not have an argument. The variable
|
||||||
|
clen contains the length of the current character and is zero if we are
|
||||||
|
at the end of the subject. */
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_ANY:
|
||||||
|
if (clen > 0 && !IS_NEWLINE(ptr))
|
||||||
|
{
|
||||||
|
if (ptr + 1 >= mb->end_subject &&
|
||||||
|
(mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
|
||||||
|
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||||
|
NLBLOCK->nllen == 2 &&
|
||||||
|
c == NLBLOCK->nl[0])
|
||||||
|
{
|
||||||
|
could_continue = partial_newline = TRUE;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ADD_NEW(state_offset + 1, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_ALLANY:
|
||||||
|
if (clen > 0)
|
||||||
|
{ ADD_NEW(state_offset + 1, 0); }
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_EODN:
|
||||||
|
if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - mb->nllen))
|
||||||
|
{
|
||||||
|
if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||||
|
return PCRE2_ERROR_PARTIAL;
|
||||||
|
ADD_ACTIVE(state_offset + 1, 0);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_DOLL:
|
||||||
|
if ((mb->moptions & PCRE2_NOTEOL) == 0)
|
||||||
|
{
|
||||||
|
if (clen == 0 && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||||
|
could_continue = TRUE;
|
||||||
|
else if (clen == 0 ||
|
||||||
|
((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
|
||||||
|
(ptr == end_subject - mb->nllen)
|
||||||
|
))
|
||||||
|
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||||
|
else if (ptr + 1 >= mb->end_subject &&
|
||||||
|
(mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 &&
|
||||||
|
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||||
|
NLBLOCK->nllen == 2 &&
|
||||||
|
c == NLBLOCK->nl[0])
|
||||||
|
{
|
||||||
|
if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||||
|
{
|
||||||
|
reset_could_continue = TRUE;
|
||||||
|
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
|
||||||
|
}
|
||||||
|
else could_continue = partial_newline = TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_DOLLM:
|
||||||
|
if ((mb->moptions & PCRE2_NOTEOL) == 0)
|
||||||
|
{
|
||||||
|
if (clen == 0 && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||||
|
could_continue = TRUE;
|
||||||
|
else if (clen == 0 ||
|
||||||
|
((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
|
||||||
|
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||||
|
else if (ptr + 1 >= mb->end_subject &&
|
||||||
|
(mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 &&
|
||||||
|
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||||
|
NLBLOCK->nllen == 2 &&
|
||||||
|
c == NLBLOCK->nl[0])
|
||||||
|
{
|
||||||
|
if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||||
|
{
|
||||||
|
reset_could_continue = TRUE;
|
||||||
|
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
|
||||||
|
}
|
||||||
|
else could_continue = partial_newline = TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (IS_NEWLINE(ptr))
|
||||||
|
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
|
||||||
|
case OP_DIGIT:
|
||||||
|
case OP_WHITESPACE:
|
||||||
|
case OP_WORDCHAR:
|
||||||
|
if (clen > 0 && c < 256 &&
|
||||||
|
((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
|
||||||
|
{ ADD_NEW(state_offset + 1, 0); }
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_NOT_DIGIT:
|
||||||
|
case OP_NOT_WHITESPACE:
|
||||||
|
case OP_NOT_WORDCHAR:
|
||||||
|
if (clen > 0 && (c >= 256 ||
|
||||||
|
((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
|
||||||
|
{ ADD_NEW(state_offset + 1, 0); }
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_WORD_BOUNDARY:
|
||||||
|
case OP_NOT_WORD_BOUNDARY:
|
||||||
|
case OP_NOT_UCP_WORD_BOUNDARY:
|
||||||
|
case OP_UCP_WORD_BOUNDARY:
|
||||||
|
{
|
||||||
|
int left_word, right_word;
|
||||||
|
|
||||||
|
if (ptr > start_subject)
|
||||||
|
{
|
||||||
|
PCRE2_SPTR temp = ptr - 1;
|
||||||
|
if (temp < mb->start_used_ptr) mb->start_used_ptr = temp;
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
if (utf) { BACKCHAR(temp); }
|
||||||
|
#endif
|
||||||
|
GETCHARTEST(d, temp);
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (codevalue == OP_UCP_WORD_BOUNDARY ||
|
||||||
|
codevalue == OP_NOT_UCP_WORD_BOUNDARY)
|
||||||
|
{
|
||||||
|
int chartype = UCD_CHARTYPE(d);
|
||||||
|
int category = PRIV(ucp_gentype)[chartype];
|
||||||
|
left_word = (category == ucp_L || category == ucp_N ||
|
||||||
|
chartype == ucp_Mn || chartype == ucp_Pc);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
|
||||||
|
}
|
||||||
|
else left_word = FALSE;
|
||||||
|
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
if (ptr >= mb->last_used_ptr)
|
||||||
|
{
|
||||||
|
PCRE2_SPTR temp = ptr + 1;
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
if (utf) { FORWARDCHARTEST(temp, mb->end_subject); }
|
||||||
|
#endif
|
||||||
|
mb->last_used_ptr = temp;
|
||||||
|
}
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (codevalue == OP_UCP_WORD_BOUNDARY ||
|
||||||
|
codevalue == OP_NOT_UCP_WORD_BOUNDARY)
|
||||||
|
{
|
||||||
|
int chartype = UCD_CHARTYPE(c);
|
||||||
|
int category = PRIV(ucp_gentype)[chartype];
|
||||||
|
right_word = (category == ucp_L || category == ucp_N ||
|
||||||
|
chartype == ucp_Mn || chartype == ucp_Pc);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
|
||||||
|
}
|
||||||
|
else right_word = FALSE;
|
||||||
|
|
||||||
|
if ((left_word == right_word) ==
|
||||||
|
(codevalue == OP_NOT_WORD_BOUNDARY ||
|
||||||
|
codevalue == OP_NOT_UCP_WORD_BOUNDARY))
|
||||||
|
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
/* Check the next character by Unicode property. We will get here only
|
||||||
|
if the support is in the binary; otherwise a compile-time error occurs.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
case OP_PROP:
|
||||||
|
case OP_NOTPROP:
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
BOOL OK;
|
||||||
|
int chartype;
|
||||||
|
const uint32_t *cp;
|
||||||
|
const ucd_record * prop = GET_UCD(c);
|
||||||
|
switch(code[1])
|
||||||
|
{
|
||||||
|
case PT_LAMP:
|
||||||
|
chartype = prop->chartype;
|
||||||
|
OK = chartype == ucp_Lu || chartype == ucp_Ll ||
|
||||||
|
chartype == ucp_Lt;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_GC:
|
||||||
|
OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_PC:
|
||||||
|
OK = prop->chartype == code[2];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_SC:
|
||||||
|
OK = prop->script == code[2];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_SCX:
|
||||||
|
OK = (prop->script == code[2] ||
|
||||||
|
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[2]) != 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* These are specials for combination cases. */
|
||||||
|
|
||||||
|
case PT_ALNUM:
|
||||||
|
chartype = prop->chartype;
|
||||||
|
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||||
|
PRIV(ucp_gentype)[chartype] == ucp_N;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||||
|
which means that Perl space and POSIX space are now identical. PCRE
|
||||||
|
was changed at release 8.34. */
|
||||||
|
|
||||||
|
case PT_SPACE: /* Perl space */
|
||||||
|
case PT_PXSPACE: /* POSIX space */
|
||||||
|
switch(c)
|
||||||
|
{
|
||||||
|
HSPACE_CASES:
|
||||||
|
VSPACE_CASES:
|
||||||
|
OK = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_WORD:
|
||||||
|
chartype = prop->chartype;
|
||||||
|
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||||
|
PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||||
|
chartype == ucp_Mn || chartype == ucp_Pc;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_CLIST:
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
if (c > MAX_UTF_CODE_POINT)
|
||||||
|
{
|
||||||
|
OK = FALSE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
cp = PRIV(ucd_caseless_sets) + code[2];
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
if (c < *cp) { OK = FALSE; break; }
|
||||||
|
if (c == *cp++) { OK = TRUE; break; }
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_UCNC:
|
||||||
|
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
||||||
|
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
|
||||||
|
c >= 0xe000;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_BIDICL:
|
||||||
|
OK = UCD_BIDICLASS(c) == code[2];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_BOOL:
|
||||||
|
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||||
|
UCD_BPROPS_PROP(prop), code[2]) != 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Should never occur, but keep compilers from grumbling. */
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = codevalue != OP_PROP;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* ========================================================================== */
|
||||||
|
/* These opcodes likewise inspect the subject character, but have an
|
||||||
|
argument that is not a data character. It is one of these opcodes:
|
||||||
|
OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
|
||||||
|
OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
|
||||||
|
|
||||||
|
case OP_TYPEPLUS:
|
||||||
|
case OP_TYPEMINPLUS:
|
||||||
|
case OP_TYPEPOSPLUS:
|
||||||
|
count = current_state->count; /* Already matched */
|
||||||
|
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
|
||||||
|
(mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
|
||||||
|
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||||
|
NLBLOCK->nllen == 2 &&
|
||||||
|
c == NLBLOCK->nl[0])
|
||||||
|
{
|
||||||
|
could_continue = partial_newline = TRUE;
|
||||||
|
}
|
||||||
|
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||||
|
(c < 256 &&
|
||||||
|
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||||
|
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||||
|
{
|
||||||
|
if (count > 0 && codevalue == OP_TYPEPOSPLUS)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
ADD_NEW(state_offset, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_TYPEQUERY:
|
||||||
|
case OP_TYPEMINQUERY:
|
||||||
|
case OP_TYPEPOSQUERY:
|
||||||
|
ADD_ACTIVE(state_offset + 2, 0);
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
|
||||||
|
(mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
|
||||||
|
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||||
|
NLBLOCK->nllen == 2 &&
|
||||||
|
c == NLBLOCK->nl[0])
|
||||||
|
{
|
||||||
|
could_continue = partial_newline = TRUE;
|
||||||
|
}
|
||||||
|
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||||
|
(c < 256 &&
|
||||||
|
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||||
|
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||||
|
{
|
||||||
|
if (codevalue == OP_TYPEPOSQUERY)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
ADD_NEW(state_offset + 2, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_TYPESTAR:
|
||||||
|
case OP_TYPEMINSTAR:
|
||||||
|
case OP_TYPEPOSSTAR:
|
||||||
|
ADD_ACTIVE(state_offset + 2, 0);
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
|
||||||
|
(mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
|
||||||
|
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||||
|
NLBLOCK->nllen == 2 &&
|
||||||
|
c == NLBLOCK->nl[0])
|
||||||
|
{
|
||||||
|
could_continue = partial_newline = TRUE;
|
||||||
|
}
|
||||||
|
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||||
|
(c < 256 &&
|
||||||
|
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||||
|
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||||
|
{
|
||||||
|
if (codevalue == OP_TYPEPOSSTAR)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
ADD_NEW(state_offset, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_TYPEEXACT:
|
||||||
|
count = current_state->count; /* Number already matched */
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
|
||||||
|
(mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
|
||||||
|
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||||
|
NLBLOCK->nllen == 2 &&
|
||||||
|
c == NLBLOCK->nl[0])
|
||||||
|
{
|
||||||
|
could_continue = partial_newline = TRUE;
|
||||||
|
}
|
||||||
|
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||||
|
(c < 256 &&
|
||||||
|
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||||
|
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||||
|
{
|
||||||
|
if (++count >= (int)GET2(code, 1))
|
||||||
|
{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
|
||||||
|
else
|
||||||
|
{ ADD_NEW(state_offset, count); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_TYPEUPTO:
|
||||||
|
case OP_TYPEMINUPTO:
|
||||||
|
case OP_TYPEPOSUPTO:
|
||||||
|
ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
|
||||||
|
count = current_state->count; /* Number already matched */
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
|
||||||
|
(mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
|
||||||
|
NLBLOCK->nltype == NLTYPE_FIXED &&
|
||||||
|
NLBLOCK->nllen == 2 &&
|
||||||
|
c == NLBLOCK->nl[0])
|
||||||
|
{
|
||||||
|
could_continue = partial_newline = TRUE;
|
||||||
|
}
|
||||||
|
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
|
||||||
|
(c < 256 &&
|
||||||
|
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
|
||||||
|
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
|
||||||
|
{
|
||||||
|
if (codevalue == OP_TYPEPOSUPTO)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
if (++count >= (int)GET2(code, 1))
|
||||||
|
{ ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
|
||||||
|
else
|
||||||
|
{ ADD_NEW(state_offset, count); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* ========================================================================== */
|
||||||
|
/* These are virtual opcodes that are used when something like
|
||||||
|
OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
|
||||||
|
argument. It keeps the code above fast for the other cases. The argument
|
||||||
|
is in the d variable. */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
case OP_PROP_EXTRA + OP_TYPEPLUS:
|
||||||
|
case OP_PROP_EXTRA + OP_TYPEMINPLUS:
|
||||||
|
case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
|
||||||
|
count = current_state->count; /* Already matched */
|
||||||
|
if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
BOOL OK;
|
||||||
|
int chartype;
|
||||||
|
const uint32_t *cp;
|
||||||
|
const ucd_record * prop = GET_UCD(c);
|
||||||
|
switch(code[2])
|
||||||
|
{
|
||||||
|
case PT_LAMP:
|
||||||
|
chartype = prop->chartype;
|
||||||
|
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_GC:
|
||||||
|
OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_PC:
|
||||||
|
OK = prop->chartype == code[3];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_SC:
|
||||||
|
OK = prop->script == code[3];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_SCX:
|
||||||
|
OK = (prop->script == code[3] ||
|
||||||
|
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* These are specials for combination cases. */
|
||||||
|
|
||||||
|
case PT_ALNUM:
|
||||||
|
chartype = prop->chartype;
|
||||||
|
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||||
|
PRIV(ucp_gentype)[chartype] == ucp_N;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||||
|
which means that Perl space and POSIX space are now identical. PCRE
|
||||||
|
was changed at release 8.34. */
|
||||||
|
|
||||||
|
case PT_SPACE: /* Perl space */
|
||||||
|
case PT_PXSPACE: /* POSIX space */
|
||||||
|
switch(c)
|
||||||
|
{
|
||||||
|
HSPACE_CASES:
|
||||||
|
VSPACE_CASES:
|
||||||
|
OK = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_WORD:
|
||||||
|
chartype = prop->chartype;
|
||||||
|
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||||
|
PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||||
|
chartype == ucp_Mn || chartype == ucp_Pc;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_CLIST:
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
if (c > MAX_UTF_CODE_POINT)
|
||||||
|
{
|
||||||
|
OK = FALSE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
cp = PRIV(ucd_caseless_sets) + code[3];
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
if (c < *cp) { OK = FALSE; break; }
|
||||||
|
if (c == *cp++) { OK = TRUE; break; }
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_UCNC:
|
||||||
|
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
||||||
|
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
|
||||||
|
c >= 0xe000;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_BIDICL:
|
||||||
|
OK = UCD_BIDICLASS(c) == code[3];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_BOOL:
|
||||||
|
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||||
|
UCD_BPROPS_PROP(prop), code[3]) != 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Should never occur, but keep compilers from grumbling. */
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = codevalue != OP_PROP;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OK == (d == OP_PROP))
|
||||||
|
{
|
||||||
|
if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
ADD_NEW(state_offset, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
|
||||||
|
case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
|
||||||
|
case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
|
||||||
|
count = current_state->count; /* Already matched */
|
||||||
|
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
int ncount = 0;
|
||||||
|
if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
(void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
|
||||||
|
&ncount);
|
||||||
|
count++;
|
||||||
|
ADD_NEW_DATA(-state_offset, count, ncount);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_ANYNL_EXTRA + OP_TYPEPLUS:
|
||||||
|
case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
|
||||||
|
case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
|
||||||
|
count = current_state->count; /* Already matched */
|
||||||
|
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
int ncount = 0;
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
case CHAR_VT:
|
||||||
|
case CHAR_FF:
|
||||||
|
case CHAR_NEL:
|
||||||
|
#ifndef EBCDIC
|
||||||
|
case 0x2028:
|
||||||
|
case 0x2029:
|
||||||
|
#endif /* Not EBCDIC */
|
||||||
|
if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
|
||||||
|
goto ANYNL01;
|
||||||
|
|
||||||
|
case CHAR_CR:
|
||||||
|
if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
|
||||||
|
/* Fall through */
|
||||||
|
|
||||||
|
ANYNL01:
|
||||||
|
case CHAR_LF:
|
||||||
|
if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
ADD_NEW_DATA(-state_offset, count, ncount);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEPLUS:
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
|
||||||
|
count = current_state->count; /* Already matched */
|
||||||
|
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
BOOL OK;
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
VSPACE_CASES:
|
||||||
|
OK = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = FALSE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OK == (d == OP_VSPACE))
|
||||||
|
{
|
||||||
|
if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
ADD_NEW_DATA(-state_offset, count, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEPLUS:
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
|
||||||
|
count = current_state->count; /* Already matched */
|
||||||
|
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
BOOL OK;
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
HSPACE_CASES:
|
||||||
|
OK = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = FALSE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OK == (d == OP_HSPACE))
|
||||||
|
{
|
||||||
|
if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
ADD_NEW_DATA(-state_offset, count, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
case OP_PROP_EXTRA + OP_TYPEQUERY:
|
||||||
|
case OP_PROP_EXTRA + OP_TYPEMINQUERY:
|
||||||
|
case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
|
||||||
|
count = 4;
|
||||||
|
goto QS1;
|
||||||
|
|
||||||
|
case OP_PROP_EXTRA + OP_TYPESTAR:
|
||||||
|
case OP_PROP_EXTRA + OP_TYPEMINSTAR:
|
||||||
|
case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
|
||||||
|
count = 0;
|
||||||
|
|
||||||
|
QS1:
|
||||||
|
|
||||||
|
ADD_ACTIVE(state_offset + 4, 0);
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
BOOL OK;
|
||||||
|
int chartype;
|
||||||
|
const uint32_t *cp;
|
||||||
|
const ucd_record * prop = GET_UCD(c);
|
||||||
|
switch(code[2])
|
||||||
|
{
|
||||||
|
case PT_LAMP:
|
||||||
|
chartype = prop->chartype;
|
||||||
|
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_GC:
|
||||||
|
OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_PC:
|
||||||
|
OK = prop->chartype == code[3];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_SC:
|
||||||
|
OK = prop->script == code[3];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_SCX:
|
||||||
|
OK = (prop->script == code[3] ||
|
||||||
|
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* These are specials for combination cases. */
|
||||||
|
|
||||||
|
case PT_ALNUM:
|
||||||
|
chartype = prop->chartype;
|
||||||
|
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||||
|
PRIV(ucp_gentype)[chartype] == ucp_N;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||||
|
which means that Perl space and POSIX space are now identical. PCRE
|
||||||
|
was changed at release 8.34. */
|
||||||
|
|
||||||
|
case PT_SPACE: /* Perl space */
|
||||||
|
case PT_PXSPACE: /* POSIX space */
|
||||||
|
switch(c)
|
||||||
|
{
|
||||||
|
HSPACE_CASES:
|
||||||
|
VSPACE_CASES:
|
||||||
|
OK = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_WORD:
|
||||||
|
chartype = prop->chartype;
|
||||||
|
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||||
|
PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||||
|
chartype == ucp_Mn || chartype == ucp_Pc;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_CLIST:
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
if (c > MAX_UTF_CODE_POINT)
|
||||||
|
{
|
||||||
|
OK = FALSE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
cp = PRIV(ucd_caseless_sets) + code[3];
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
if (c < *cp) { OK = FALSE; break; }
|
||||||
|
if (c == *cp++) { OK = TRUE; break; }
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_UCNC:
|
||||||
|
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
||||||
|
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
|
||||||
|
c >= 0xe000;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_BIDICL:
|
||||||
|
OK = UCD_BIDICLASS(c) == code[3];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_BOOL:
|
||||||
|
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||||
|
UCD_BPROPS_PROP(prop), code[3]) != 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Should never occur, but keep compilers from grumbling. */
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = codevalue != OP_PROP;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OK == (d == OP_PROP))
|
||||||
|
{
|
||||||
|
if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
|
||||||
|
codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
ADD_NEW(state_offset + count, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
|
||||||
|
case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
|
||||||
|
case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
|
||||||
|
count = 2;
|
||||||
|
goto QS2;
|
||||||
|
|
||||||
|
case OP_EXTUNI_EXTRA + OP_TYPESTAR:
|
||||||
|
case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
|
||||||
|
case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
|
||||||
|
count = 0;
|
||||||
|
|
||||||
|
QS2:
|
||||||
|
|
||||||
|
ADD_ACTIVE(state_offset + 2, 0);
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
int ncount = 0;
|
||||||
|
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
|
||||||
|
codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
(void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
|
||||||
|
&ncount);
|
||||||
|
ADD_NEW_DATA(-(state_offset + count), 0, ncount);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_ANYNL_EXTRA + OP_TYPEQUERY:
|
||||||
|
case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
|
||||||
|
case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
|
||||||
|
count = 2;
|
||||||
|
goto QS3;
|
||||||
|
|
||||||
|
case OP_ANYNL_EXTRA + OP_TYPESTAR:
|
||||||
|
case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
|
||||||
|
case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
|
||||||
|
count = 0;
|
||||||
|
|
||||||
|
QS3:
|
||||||
|
ADD_ACTIVE(state_offset + 2, 0);
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
int ncount = 0;
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
case CHAR_VT:
|
||||||
|
case CHAR_FF:
|
||||||
|
case CHAR_NEL:
|
||||||
|
#ifndef EBCDIC
|
||||||
|
case 0x2028:
|
||||||
|
case 0x2029:
|
||||||
|
#endif /* Not EBCDIC */
|
||||||
|
if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
|
||||||
|
goto ANYNL02;
|
||||||
|
|
||||||
|
case CHAR_CR:
|
||||||
|
if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
|
||||||
|
/* Fall through */
|
||||||
|
|
||||||
|
ANYNL02:
|
||||||
|
case CHAR_LF:
|
||||||
|
if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
|
||||||
|
codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEQUERY:
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
|
||||||
|
count = 2;
|
||||||
|
goto QS4;
|
||||||
|
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPESTAR:
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
|
||||||
|
count = 0;
|
||||||
|
|
||||||
|
QS4:
|
||||||
|
ADD_ACTIVE(state_offset + 2, 0);
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
BOOL OK;
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
VSPACE_CASES:
|
||||||
|
OK = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = FALSE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (OK == (d == OP_VSPACE))
|
||||||
|
{
|
||||||
|
if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
|
||||||
|
codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEQUERY:
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
|
||||||
|
count = 2;
|
||||||
|
goto QS5;
|
||||||
|
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPESTAR:
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
|
||||||
|
count = 0;
|
||||||
|
|
||||||
|
QS5:
|
||||||
|
ADD_ACTIVE(state_offset + 2, 0);
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
BOOL OK;
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
HSPACE_CASES:
|
||||||
|
OK = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = FALSE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OK == (d == OP_HSPACE))
|
||||||
|
{
|
||||||
|
if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
|
||||||
|
codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
case OP_PROP_EXTRA + OP_TYPEEXACT:
|
||||||
|
case OP_PROP_EXTRA + OP_TYPEUPTO:
|
||||||
|
case OP_PROP_EXTRA + OP_TYPEMINUPTO:
|
||||||
|
case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
|
||||||
|
if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
|
||||||
|
{ ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
|
||||||
|
count = current_state->count; /* Number already matched */
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
BOOL OK;
|
||||||
|
int chartype;
|
||||||
|
const uint32_t *cp;
|
||||||
|
const ucd_record * prop = GET_UCD(c);
|
||||||
|
switch(code[1 + IMM2_SIZE + 1])
|
||||||
|
{
|
||||||
|
case PT_LAMP:
|
||||||
|
chartype = prop->chartype;
|
||||||
|
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_GC:
|
||||||
|
OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_PC:
|
||||||
|
OK = prop->chartype == code[1 + IMM2_SIZE + 2];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_SC:
|
||||||
|
OK = prop->script == code[1 + IMM2_SIZE + 2];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_SCX:
|
||||||
|
OK = (prop->script == code[1 + IMM2_SIZE + 2] ||
|
||||||
|
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop),
|
||||||
|
code[1 + IMM2_SIZE + 2]) != 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* These are specials for combination cases. */
|
||||||
|
|
||||||
|
case PT_ALNUM:
|
||||||
|
chartype = prop->chartype;
|
||||||
|
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||||
|
PRIV(ucp_gentype)[chartype] == ucp_N;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||||
|
which means that Perl space and POSIX space are now identical. PCRE
|
||||||
|
was changed at release 8.34. */
|
||||||
|
|
||||||
|
case PT_SPACE: /* Perl space */
|
||||||
|
case PT_PXSPACE: /* POSIX space */
|
||||||
|
switch(c)
|
||||||
|
{
|
||||||
|
HSPACE_CASES:
|
||||||
|
VSPACE_CASES:
|
||||||
|
OK = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_WORD:
|
||||||
|
chartype = prop->chartype;
|
||||||
|
OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
|
||||||
|
PRIV(ucp_gentype)[chartype] == ucp_N ||
|
||||||
|
chartype == ucp_Mn || chartype == ucp_Pc;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_CLIST:
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
if (c > MAX_UTF_CODE_POINT)
|
||||||
|
{
|
||||||
|
OK = FALSE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
if (c < *cp) { OK = FALSE; break; }
|
||||||
|
if (c == *cp++) { OK = TRUE; break; }
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_UCNC:
|
||||||
|
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
||||||
|
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
|
||||||
|
c >= 0xe000;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_BIDICL:
|
||||||
|
OK = UCD_BIDICLASS(c) == code[1 + IMM2_SIZE + 2];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_BOOL:
|
||||||
|
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||||
|
UCD_BPROPS_PROP(prop), code[1 + IMM2_SIZE + 2]) != 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Should never occur, but keep compilers from grumbling. */
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = codevalue != OP_PROP;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OK == (d == OP_PROP))
|
||||||
|
{
|
||||||
|
if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
if (++count >= (int)GET2(code, 1))
|
||||||
|
{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
|
||||||
|
else
|
||||||
|
{ ADD_NEW(state_offset, count); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
|
||||||
|
case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
|
||||||
|
case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
|
||||||
|
case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
|
||||||
|
if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
|
||||||
|
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
|
||||||
|
count = current_state->count; /* Number already matched */
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
PCRE2_SPTR nptr;
|
||||||
|
int ncount = 0;
|
||||||
|
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
|
||||||
|
&ncount);
|
||||||
|
if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||||
|
reset_could_continue = TRUE;
|
||||||
|
if (++count >= (int)GET2(code, 1))
|
||||||
|
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
|
||||||
|
else
|
||||||
|
{ ADD_NEW_DATA(-state_offset, count, ncount); }
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_ANYNL_EXTRA + OP_TYPEEXACT:
|
||||||
|
case OP_ANYNL_EXTRA + OP_TYPEUPTO:
|
||||||
|
case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
|
||||||
|
case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
|
||||||
|
if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
|
||||||
|
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
|
||||||
|
count = current_state->count; /* Number already matched */
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
int ncount = 0;
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
case CHAR_VT:
|
||||||
|
case CHAR_FF:
|
||||||
|
case CHAR_NEL:
|
||||||
|
#ifndef EBCDIC
|
||||||
|
case 0x2028:
|
||||||
|
case 0x2029:
|
||||||
|
#endif /* Not EBCDIC */
|
||||||
|
if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
|
||||||
|
goto ANYNL03;
|
||||||
|
|
||||||
|
case CHAR_CR:
|
||||||
|
if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
|
||||||
|
/* Fall through */
|
||||||
|
|
||||||
|
ANYNL03:
|
||||||
|
case CHAR_LF:
|
||||||
|
if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
if (++count >= (int)GET2(code, 1))
|
||||||
|
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
|
||||||
|
else
|
||||||
|
{ ADD_NEW_DATA(-state_offset, count, ncount); }
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEEXACT:
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEUPTO:
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
|
||||||
|
case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
|
||||||
|
if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
|
||||||
|
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
|
||||||
|
count = current_state->count; /* Number already matched */
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
BOOL OK;
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
VSPACE_CASES:
|
||||||
|
OK = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OK == (d == OP_VSPACE))
|
||||||
|
{
|
||||||
|
if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
if (++count >= (int)GET2(code, 1))
|
||||||
|
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
|
||||||
|
else
|
||||||
|
{ ADD_NEW_DATA(-state_offset, count, 0); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEEXACT:
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEUPTO:
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
|
||||||
|
case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
|
||||||
|
if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
|
||||||
|
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
|
||||||
|
count = current_state->count; /* Number already matched */
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
BOOL OK;
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
HSPACE_CASES:
|
||||||
|
OK = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = FALSE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OK == (d == OP_HSPACE))
|
||||||
|
{
|
||||||
|
if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
if (++count >= (int)GET2(code, 1))
|
||||||
|
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
|
||||||
|
else
|
||||||
|
{ ADD_NEW_DATA(-state_offset, count, 0); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* ========================================================================== */
|
||||||
|
/* These opcodes are followed by a character that is usually compared
|
||||||
|
to the current subject character; it is loaded into d. We still get
|
||||||
|
here even if there is no subject character, because in some cases zero
|
||||||
|
repetitions are permitted. */
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_CHAR:
|
||||||
|
if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_CHARI:
|
||||||
|
if (clen == 0) break;
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf_or_ucp)
|
||||||
|
{
|
||||||
|
if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
|
||||||
|
{
|
||||||
|
unsigned int othercase;
|
||||||
|
if (c < 128)
|
||||||
|
othercase = fcc[c];
|
||||||
|
else
|
||||||
|
othercase = UCD_OTHERCASE(c);
|
||||||
|
if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
/* Not UTF or UCP mode */
|
||||||
|
{
|
||||||
|
if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
|
||||||
|
{ ADD_NEW(state_offset + 2, 0); }
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
/* This is a tricky one because it can match more than one character.
|
||||||
|
Find out how many characters to skip, and then set up a negative state
|
||||||
|
to wait for them to pass before continuing. */
|
||||||
|
|
||||||
|
case OP_EXTUNI:
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
int ncount = 0;
|
||||||
|
PCRE2_SPTR nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject,
|
||||||
|
end_subject, utf, &ncount);
|
||||||
|
if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||||
|
reset_could_continue = TRUE;
|
||||||
|
ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
/* This is a tricky like EXTUNI because it too can match more than one
|
||||||
|
character (when CR is followed by LF). In this case, set up a negative
|
||||||
|
state to wait for one character to pass before continuing. */
|
||||||
|
|
||||||
|
case OP_ANYNL:
|
||||||
|
if (clen > 0) switch(c)
|
||||||
|
{
|
||||||
|
case CHAR_VT:
|
||||||
|
case CHAR_FF:
|
||||||
|
case CHAR_NEL:
|
||||||
|
#ifndef EBCDIC
|
||||||
|
case 0x2028:
|
||||||
|
case 0x2029:
|
||||||
|
#endif /* Not EBCDIC */
|
||||||
|
if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
|
||||||
|
/* Fall through */
|
||||||
|
|
||||||
|
case CHAR_LF:
|
||||||
|
ADD_NEW(state_offset + 1, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case CHAR_CR:
|
||||||
|
if (ptr + 1 >= end_subject)
|
||||||
|
{
|
||||||
|
ADD_NEW(state_offset + 1, 0);
|
||||||
|
if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||||
|
reset_could_continue = TRUE;
|
||||||
|
}
|
||||||
|
else if (UCHAR21TEST(ptr + 1) == CHAR_LF)
|
||||||
|
{
|
||||||
|
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ADD_NEW(state_offset + 1, 0);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_NOT_VSPACE:
|
||||||
|
if (clen > 0) switch(c)
|
||||||
|
{
|
||||||
|
VSPACE_CASES:
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
ADD_NEW(state_offset + 1, 0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_VSPACE:
|
||||||
|
if (clen > 0) switch(c)
|
||||||
|
{
|
||||||
|
VSPACE_CASES:
|
||||||
|
ADD_NEW(state_offset + 1, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_NOT_HSPACE:
|
||||||
|
if (clen > 0) switch(c)
|
||||||
|
{
|
||||||
|
HSPACE_CASES:
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
ADD_NEW(state_offset + 1, 0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_HSPACE:
|
||||||
|
if (clen > 0) switch(c)
|
||||||
|
{
|
||||||
|
HSPACE_CASES:
|
||||||
|
ADD_NEW(state_offset + 1, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
/* Match a negated single character casefully. */
|
||||||
|
|
||||||
|
case OP_NOT:
|
||||||
|
if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
/* Match a negated single character caselessly. */
|
||||||
|
|
||||||
|
case OP_NOTI:
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
uint32_t otherd;
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf_or_ucp && d >= 128)
|
||||||
|
otherd = UCD_OTHERCASE(d);
|
||||||
|
else
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
otherd = TABLE_GET(d, fcc, d);
|
||||||
|
if (c != d && c != otherd)
|
||||||
|
{ ADD_NEW(state_offset + dlen + 1, 0); }
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_PLUSI:
|
||||||
|
case OP_MINPLUSI:
|
||||||
|
case OP_POSPLUSI:
|
||||||
|
case OP_NOTPLUSI:
|
||||||
|
case OP_NOTMINPLUSI:
|
||||||
|
case OP_NOTPOSPLUSI:
|
||||||
|
caseless = TRUE;
|
||||||
|
codevalue -= OP_STARI - OP_STAR;
|
||||||
|
|
||||||
|
/* Fall through */
|
||||||
|
case OP_PLUS:
|
||||||
|
case OP_MINPLUS:
|
||||||
|
case OP_POSPLUS:
|
||||||
|
case OP_NOTPLUS:
|
||||||
|
case OP_NOTMINPLUS:
|
||||||
|
case OP_NOTPOSPLUS:
|
||||||
|
count = current_state->count; /* Already matched */
|
||||||
|
if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
uint32_t otherd = NOTACHAR;
|
||||||
|
if (caseless)
|
||||||
|
{
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf_or_ucp && d >= 128)
|
||||||
|
otherd = UCD_OTHERCASE(d);
|
||||||
|
else
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
otherd = TABLE_GET(d, fcc, d);
|
||||||
|
}
|
||||||
|
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||||
|
{
|
||||||
|
if (count > 0 &&
|
||||||
|
(codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
ADD_NEW(state_offset, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_QUERYI:
|
||||||
|
case OP_MINQUERYI:
|
||||||
|
case OP_POSQUERYI:
|
||||||
|
case OP_NOTQUERYI:
|
||||||
|
case OP_NOTMINQUERYI:
|
||||||
|
case OP_NOTPOSQUERYI:
|
||||||
|
caseless = TRUE;
|
||||||
|
codevalue -= OP_STARI - OP_STAR;
|
||||||
|
/* Fall through */
|
||||||
|
case OP_QUERY:
|
||||||
|
case OP_MINQUERY:
|
||||||
|
case OP_POSQUERY:
|
||||||
|
case OP_NOTQUERY:
|
||||||
|
case OP_NOTMINQUERY:
|
||||||
|
case OP_NOTPOSQUERY:
|
||||||
|
ADD_ACTIVE(state_offset + dlen + 1, 0);
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
uint32_t otherd = NOTACHAR;
|
||||||
|
if (caseless)
|
||||||
|
{
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf_or_ucp && d >= 128)
|
||||||
|
otherd = UCD_OTHERCASE(d);
|
||||||
|
else
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
otherd = TABLE_GET(d, fcc, d);
|
||||||
|
}
|
||||||
|
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||||
|
{
|
||||||
|
if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
ADD_NEW(state_offset + dlen + 1, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_STARI:
|
||||||
|
case OP_MINSTARI:
|
||||||
|
case OP_POSSTARI:
|
||||||
|
case OP_NOTSTARI:
|
||||||
|
case OP_NOTMINSTARI:
|
||||||
|
case OP_NOTPOSSTARI:
|
||||||
|
caseless = TRUE;
|
||||||
|
codevalue -= OP_STARI - OP_STAR;
|
||||||
|
/* Fall through */
|
||||||
|
case OP_STAR:
|
||||||
|
case OP_MINSTAR:
|
||||||
|
case OP_POSSTAR:
|
||||||
|
case OP_NOTSTAR:
|
||||||
|
case OP_NOTMINSTAR:
|
||||||
|
case OP_NOTPOSSTAR:
|
||||||
|
ADD_ACTIVE(state_offset + dlen + 1, 0);
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
uint32_t otherd = NOTACHAR;
|
||||||
|
if (caseless)
|
||||||
|
{
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf_or_ucp && d >= 128)
|
||||||
|
otherd = UCD_OTHERCASE(d);
|
||||||
|
else
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
otherd = TABLE_GET(d, fcc, d);
|
||||||
|
}
|
||||||
|
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||||
|
{
|
||||||
|
if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
ADD_NEW(state_offset, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_EXACTI:
|
||||||
|
case OP_NOTEXACTI:
|
||||||
|
caseless = TRUE;
|
||||||
|
codevalue -= OP_STARI - OP_STAR;
|
||||||
|
/* Fall through */
|
||||||
|
case OP_EXACT:
|
||||||
|
case OP_NOTEXACT:
|
||||||
|
count = current_state->count; /* Number already matched */
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
uint32_t otherd = NOTACHAR;
|
||||||
|
if (caseless)
|
||||||
|
{
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf_or_ucp && d >= 128)
|
||||||
|
otherd = UCD_OTHERCASE(d);
|
||||||
|
else
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
otherd = TABLE_GET(d, fcc, d);
|
||||||
|
}
|
||||||
|
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||||
|
{
|
||||||
|
if (++count >= (int)GET2(code, 1))
|
||||||
|
{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
|
||||||
|
else
|
||||||
|
{ ADD_NEW(state_offset, count); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_UPTOI:
|
||||||
|
case OP_MINUPTOI:
|
||||||
|
case OP_POSUPTOI:
|
||||||
|
case OP_NOTUPTOI:
|
||||||
|
case OP_NOTMINUPTOI:
|
||||||
|
case OP_NOTPOSUPTOI:
|
||||||
|
caseless = TRUE;
|
||||||
|
codevalue -= OP_STARI - OP_STAR;
|
||||||
|
/* Fall through */
|
||||||
|
case OP_UPTO:
|
||||||
|
case OP_MINUPTO:
|
||||||
|
case OP_POSUPTO:
|
||||||
|
case OP_NOTUPTO:
|
||||||
|
case OP_NOTMINUPTO:
|
||||||
|
case OP_NOTPOSUPTO:
|
||||||
|
ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
|
||||||
|
count = current_state->count; /* Number already matched */
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
uint32_t otherd = NOTACHAR;
|
||||||
|
if (caseless)
|
||||||
|
{
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf_or_ucp && d >= 128)
|
||||||
|
otherd = UCD_OTHERCASE(d);
|
||||||
|
else
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
otherd = TABLE_GET(d, fcc, d);
|
||||||
|
}
|
||||||
|
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
|
||||||
|
{
|
||||||
|
if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
if (++count >= (int)GET2(code, 1))
|
||||||
|
{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
|
||||||
|
else
|
||||||
|
{ ADD_NEW(state_offset, count); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
|
||||||
|
/* ========================================================================== */
|
||||||
|
/* These are the class-handling opcodes */
|
||||||
|
|
||||||
|
case OP_CLASS:
|
||||||
|
case OP_NCLASS:
|
||||||
|
#ifdef SUPPORT_WIDE_CHARS
|
||||||
|
case OP_XCLASS:
|
||||||
|
case OP_ECLASS:
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
BOOL isinclass = FALSE;
|
||||||
|
int next_state_offset;
|
||||||
|
PCRE2_SPTR ecode;
|
||||||
|
|
||||||
|
#ifdef SUPPORT_WIDE_CHARS
|
||||||
|
/* An extended class may have a table or a list of single characters,
|
||||||
|
ranges, or both, and it may be positive or negative. There's a
|
||||||
|
function that sorts all this out. */
|
||||||
|
|
||||||
|
if (codevalue == OP_XCLASS)
|
||||||
|
{
|
||||||
|
ecode = code + GET(code, 1);
|
||||||
|
if (clen > 0)
|
||||||
|
isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE,
|
||||||
|
(const uint8_t*)mb->start_code, utf);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* A nested set-based class has internal opcodes for performing
|
||||||
|
set operations. */
|
||||||
|
|
||||||
|
else if (codevalue == OP_ECLASS)
|
||||||
|
{
|
||||||
|
ecode = code + GET(code, 1);
|
||||||
|
if (clen > 0)
|
||||||
|
isinclass = PRIV(eclass)(c, code + 1 + LINK_SIZE, ecode,
|
||||||
|
(const uint8_t*)mb->start_code, utf);
|
||||||
|
}
|
||||||
|
|
||||||
|
else
|
||||||
|
#endif /* SUPPORT_WIDE_CHARS */
|
||||||
|
|
||||||
|
/* For a simple class, there is always just a 32-byte table, and we
|
||||||
|
can set isinclass from it. */
|
||||||
|
|
||||||
|
{
|
||||||
|
ecode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
|
||||||
|
if (clen > 0)
|
||||||
|
{
|
||||||
|
isinclass = (c > 255)? (codevalue == OP_NCLASS) :
|
||||||
|
((((const uint8_t *)(code + 1))[c/8] & (1u << (c&7))) != 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* At this point, isinclass is set for all kinds of class, and ecode
|
||||||
|
points to the byte after the end of the class. If there is a
|
||||||
|
quantifier, this is where it will be. */
|
||||||
|
|
||||||
|
next_state_offset = (int)(ecode - start_code);
|
||||||
|
|
||||||
|
switch (*ecode)
|
||||||
|
{
|
||||||
|
case OP_CRSTAR:
|
||||||
|
case OP_CRMINSTAR:
|
||||||
|
case OP_CRPOSSTAR:
|
||||||
|
ADD_ACTIVE(next_state_offset + 1, 0);
|
||||||
|
if (isinclass)
|
||||||
|
{
|
||||||
|
if (*ecode == OP_CRPOSSTAR)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
ADD_NEW(state_offset, 0);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_CRPLUS:
|
||||||
|
case OP_CRMINPLUS:
|
||||||
|
case OP_CRPOSPLUS:
|
||||||
|
count = current_state->count; /* Already matched */
|
||||||
|
if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
|
||||||
|
if (isinclass)
|
||||||
|
{
|
||||||
|
if (count > 0 && *ecode == OP_CRPOSPLUS)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
ADD_NEW(state_offset, count);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_CRQUERY:
|
||||||
|
case OP_CRMINQUERY:
|
||||||
|
case OP_CRPOSQUERY:
|
||||||
|
ADD_ACTIVE(next_state_offset + 1, 0);
|
||||||
|
if (isinclass)
|
||||||
|
{
|
||||||
|
if (*ecode == OP_CRPOSQUERY)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
ADD_NEW(next_state_offset + 1, 0);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_CRRANGE:
|
||||||
|
case OP_CRMINRANGE:
|
||||||
|
case OP_CRPOSRANGE:
|
||||||
|
count = current_state->count; /* Already matched */
|
||||||
|
if (count >= (int)GET2(ecode, 1))
|
||||||
|
{ ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
|
||||||
|
if (isinclass)
|
||||||
|
{
|
||||||
|
int max = (int)GET2(ecode, 1 + IMM2_SIZE);
|
||||||
|
|
||||||
|
if (*ecode == OP_CRPOSRANGE && count >= (int)GET2(ecode, 1))
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (++count >= max && max != 0) /* Max 0 => no limit */
|
||||||
|
{ ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
|
||||||
|
else
|
||||||
|
{ ADD_NEW(state_offset, count); }
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
if (isinclass) { ADD_NEW(next_state_offset, 0); }
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* ========================================================================== */
|
||||||
|
/* These are the opcodes for fancy brackets of various kinds. We have
|
||||||
|
to use recursion in order to handle them. The "always failing" assertion
|
||||||
|
(?!) is optimised to OP_FAIL when compiling, so we have to support that,
|
||||||
|
though the other "backtracking verbs" are not supported. */
|
||||||
|
|
||||||
|
case OP_FAIL:
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_ASSERT:
|
||||||
|
case OP_ASSERT_NOT:
|
||||||
|
case OP_ASSERTBACK:
|
||||||
|
case OP_ASSERTBACK_NOT:
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
int *local_workspace;
|
||||||
|
PCRE2_SIZE *local_offsets;
|
||||||
|
PCRE2_SPTR endasscode = code + GET(code, 1);
|
||||||
|
RWS_anchor *rws = (RWS_anchor *)RWS;
|
||||||
|
|
||||||
|
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
|
||||||
|
{
|
||||||
|
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
|
||||||
|
if (rc != 0) return rc;
|
||||||
|
RWS = (int *)rws;
|
||||||
|
}
|
||||||
|
|
||||||
|
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
|
||||||
|
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
|
||||||
|
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||||
|
|
||||||
|
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
|
||||||
|
|
||||||
|
rc = internal_dfa_match(
|
||||||
|
mb, /* static match data */
|
||||||
|
code, /* this subexpression's code */
|
||||||
|
ptr, /* where we currently are */
|
||||||
|
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
|
||||||
|
local_offsets, /* offset vector */
|
||||||
|
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
|
||||||
|
local_workspace, /* workspace vector */
|
||||||
|
RWS_RSIZE, /* size of same */
|
||||||
|
rlevel, /* function recursion level */
|
||||||
|
RWS); /* recursion workspace */
|
||||||
|
|
||||||
|
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||||
|
|
||||||
|
if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc;
|
||||||
|
if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
|
||||||
|
{ ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_COND:
|
||||||
|
case OP_SCOND:
|
||||||
|
{
|
||||||
|
int codelink = (int)GET(code, 1);
|
||||||
|
PCRE2_UCHAR condcode;
|
||||||
|
|
||||||
|
/* Because of the way auto-callout works during compile, a callout item
|
||||||
|
is inserted between OP_COND and an assertion condition. This does not
|
||||||
|
happen for the other conditions. */
|
||||||
|
|
||||||
|
if (code[LINK_SIZE + 1] == OP_CALLOUT
|
||||||
|
|| code[LINK_SIZE + 1] == OP_CALLOUT_STR)
|
||||||
|
{
|
||||||
|
PCRE2_SIZE callout_length;
|
||||||
|
rrc = do_callout_dfa(code, offsets, current_subject, ptr, mb,
|
||||||
|
1 + LINK_SIZE, &callout_length);
|
||||||
|
if (rrc < 0) return rrc; /* Abandon */
|
||||||
|
if (rrc > 0) break; /* Fail this thread */
|
||||||
|
code += callout_length; /* Skip callout data */
|
||||||
|
}
|
||||||
|
|
||||||
|
condcode = code[LINK_SIZE+1];
|
||||||
|
|
||||||
|
/* Back reference conditions and duplicate named recursion conditions
|
||||||
|
are not supported */
|
||||||
|
|
||||||
|
if (condcode == OP_CREF || condcode == OP_DNCREF ||
|
||||||
|
condcode == OP_DNRREF)
|
||||||
|
return PCRE2_ERROR_DFA_UCOND;
|
||||||
|
|
||||||
|
/* The DEFINE condition is always false, and the assertion (?!) is
|
||||||
|
converted to OP_FAIL. */
|
||||||
|
|
||||||
|
if (condcode == OP_FALSE || condcode == OP_FAIL)
|
||||||
|
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
|
||||||
|
|
||||||
|
/* There is also an always-true condition */
|
||||||
|
|
||||||
|
else if (condcode == OP_TRUE)
|
||||||
|
{ ADD_ACTIVE(state_offset + LINK_SIZE + 2, 0); }
|
||||||
|
|
||||||
|
/* The only supported version of OP_RREF is for the value RREF_ANY,
|
||||||
|
which means "test if in any recursion". We can't test for specifically
|
||||||
|
recursed groups. */
|
||||||
|
|
||||||
|
else if (condcode == OP_RREF)
|
||||||
|
{
|
||||||
|
unsigned int value = GET2(code, LINK_SIZE + 2);
|
||||||
|
if (value != RREF_ANY) return PCRE2_ERROR_DFA_UCOND;
|
||||||
|
if (mb->recursive != NULL)
|
||||||
|
{ ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
|
||||||
|
else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Otherwise, the condition is an assertion */
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
int *local_workspace;
|
||||||
|
PCRE2_SIZE *local_offsets;
|
||||||
|
PCRE2_SPTR asscode = code + LINK_SIZE + 1;
|
||||||
|
PCRE2_SPTR endasscode = asscode + GET(asscode, 1);
|
||||||
|
RWS_anchor *rws = (RWS_anchor *)RWS;
|
||||||
|
|
||||||
|
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
|
||||||
|
{
|
||||||
|
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
|
||||||
|
if (rc != 0) return rc;
|
||||||
|
RWS = (int *)rws;
|
||||||
|
}
|
||||||
|
|
||||||
|
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
|
||||||
|
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
|
||||||
|
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||||
|
|
||||||
|
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
|
||||||
|
|
||||||
|
rc = internal_dfa_match(
|
||||||
|
mb, /* fixed match data */
|
||||||
|
asscode, /* this subexpression's code */
|
||||||
|
ptr, /* where we currently are */
|
||||||
|
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
|
||||||
|
local_offsets, /* offset vector */
|
||||||
|
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
|
||||||
|
local_workspace, /* workspace vector */
|
||||||
|
RWS_RSIZE, /* size of same */
|
||||||
|
rlevel, /* function recursion level */
|
||||||
|
RWS); /* recursion workspace */
|
||||||
|
|
||||||
|
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||||
|
|
||||||
|
if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc;
|
||||||
|
if ((rc >= 0) ==
|
||||||
|
(condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
|
||||||
|
{ ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
|
||||||
|
else
|
||||||
|
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_RECURSE:
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
int *local_workspace;
|
||||||
|
PCRE2_SIZE *local_offsets;
|
||||||
|
RWS_anchor *rws = (RWS_anchor *)RWS;
|
||||||
|
PCRE2_SPTR callpat = start_code + GET(code, 1);
|
||||||
|
uint32_t recno = (callpat == mb->start_code)? 0 :
|
||||||
|
GET2(callpat, 1 + LINK_SIZE);
|
||||||
|
|
||||||
|
if (rws->free < RWS_RSIZE + RWS_OVEC_RSIZE)
|
||||||
|
{
|
||||||
|
rc = more_workspace(&rws, RWS_OVEC_RSIZE, mb);
|
||||||
|
if (rc != 0) return rc;
|
||||||
|
RWS = (int *)rws;
|
||||||
|
}
|
||||||
|
|
||||||
|
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
|
||||||
|
local_workspace = ((int *)local_offsets) + RWS_OVEC_RSIZE;
|
||||||
|
rws->free -= RWS_RSIZE + RWS_OVEC_RSIZE;
|
||||||
|
|
||||||
|
/* Check for repeating a recursion without advancing the subject
|
||||||
|
pointer or last used character. This should catch convoluted mutual
|
||||||
|
recursions. (Some simple cases are caught at compile time.) */
|
||||||
|
|
||||||
|
for (dfa_recursion_info *ri = mb->recursive;
|
||||||
|
ri != NULL;
|
||||||
|
ri = ri->prevrec)
|
||||||
|
{
|
||||||
|
if (recno == ri->group_num && ptr == ri->subject_position &&
|
||||||
|
mb->last_used_ptr == ri->last_used_ptr)
|
||||||
|
return PCRE2_ERROR_RECURSELOOP;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Remember this recursion and where we started it so as to
|
||||||
|
catch infinite loops. */
|
||||||
|
|
||||||
|
new_recursive.group_num = recno;
|
||||||
|
new_recursive.subject_position = ptr;
|
||||||
|
new_recursive.last_used_ptr = mb->last_used_ptr;
|
||||||
|
new_recursive.prevrec = mb->recursive;
|
||||||
|
mb->recursive = &new_recursive;
|
||||||
|
|
||||||
|
rc = internal_dfa_match(
|
||||||
|
mb, /* fixed match data */
|
||||||
|
callpat, /* this subexpression's code */
|
||||||
|
ptr, /* where we currently are */
|
||||||
|
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
|
||||||
|
local_offsets, /* offset vector */
|
||||||
|
RWS_OVEC_RSIZE/OVEC_UNIT, /* size of same */
|
||||||
|
local_workspace, /* workspace vector */
|
||||||
|
RWS_RSIZE, /* size of same */
|
||||||
|
rlevel, /* function recursion level */
|
||||||
|
RWS); /* recursion workspace */
|
||||||
|
|
||||||
|
rws->free += RWS_RSIZE + RWS_OVEC_RSIZE;
|
||||||
|
mb->recursive = new_recursive.prevrec; /* Done this recursion */
|
||||||
|
|
||||||
|
/* Ran out of internal offsets */
|
||||||
|
|
||||||
|
if (rc == 0) return PCRE2_ERROR_DFA_RECURSE;
|
||||||
|
|
||||||
|
/* For each successful matched substring, set up the next state with a
|
||||||
|
count of characters to skip before trying it. Note that the count is in
|
||||||
|
characters, not bytes. */
|
||||||
|
|
||||||
|
if (rc > 0)
|
||||||
|
{
|
||||||
|
for (rc = rc*2 - 2; rc >= 0; rc -= 2)
|
||||||
|
{
|
||||||
|
PCRE2_SIZE charcount = local_offsets[rc+1] - local_offsets[rc];
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
if (utf)
|
||||||
|
{
|
||||||
|
PCRE2_SPTR p = start_subject + local_offsets[rc];
|
||||||
|
PCRE2_SPTR pp = start_subject + local_offsets[rc+1];
|
||||||
|
while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if (charcount > 0)
|
||||||
|
{
|
||||||
|
ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0,
|
||||||
|
(int)(charcount - 1));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (rc != PCRE2_ERROR_NOMATCH) return rc;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_BRAPOS:
|
||||||
|
case OP_SBRAPOS:
|
||||||
|
case OP_CBRAPOS:
|
||||||
|
case OP_SCBRAPOS:
|
||||||
|
case OP_BRAPOSZERO:
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
int *local_workspace;
|
||||||
|
PCRE2_SIZE *local_offsets;
|
||||||
|
PCRE2_SIZE charcount, matched_count;
|
||||||
|
PCRE2_SPTR local_ptr = ptr;
|
||||||
|
RWS_anchor *rws = (RWS_anchor *)RWS;
|
||||||
|
BOOL allow_zero;
|
||||||
|
|
||||||
|
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
|
||||||
|
{
|
||||||
|
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
|
||||||
|
if (rc != 0) return rc;
|
||||||
|
RWS = (int *)rws;
|
||||||
|
}
|
||||||
|
|
||||||
|
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
|
||||||
|
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
|
||||||
|
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||||
|
|
||||||
|
if (codevalue == OP_BRAPOSZERO)
|
||||||
|
{
|
||||||
|
allow_zero = TRUE;
|
||||||
|
++code; /* The following opcode will be one of the above BRAs */
|
||||||
|
}
|
||||||
|
else allow_zero = FALSE;
|
||||||
|
|
||||||
|
/* Loop to match the subpattern as many times as possible as if it were
|
||||||
|
a complete pattern. */
|
||||||
|
|
||||||
|
for (matched_count = 0;; matched_count++)
|
||||||
|
{
|
||||||
|
rc = internal_dfa_match(
|
||||||
|
mb, /* fixed match data */
|
||||||
|
code, /* this subexpression's code */
|
||||||
|
local_ptr, /* where we currently are */
|
||||||
|
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
|
||||||
|
local_offsets, /* offset vector */
|
||||||
|
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
|
||||||
|
local_workspace, /* workspace vector */
|
||||||
|
RWS_RSIZE, /* size of same */
|
||||||
|
rlevel, /* function recursion level */
|
||||||
|
RWS); /* recursion workspace */
|
||||||
|
|
||||||
|
/* Failed to match */
|
||||||
|
|
||||||
|
if (rc < 0)
|
||||||
|
{
|
||||||
|
if (rc != PCRE2_ERROR_NOMATCH) return rc;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Matched: break the loop if zero characters matched. */
|
||||||
|
|
||||||
|
charcount = local_offsets[1] - local_offsets[0];
|
||||||
|
if (charcount == 0) break;
|
||||||
|
local_ptr += charcount; /* Advance temporary position ptr */
|
||||||
|
}
|
||||||
|
|
||||||
|
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||||
|
|
||||||
|
/* At this point we have matched the subpattern matched_count
|
||||||
|
times, and local_ptr is pointing to the character after the end of the
|
||||||
|
last match. */
|
||||||
|
|
||||||
|
if (matched_count > 0 || allow_zero)
|
||||||
|
{
|
||||||
|
PCRE2_SPTR end_subpattern = code;
|
||||||
|
int next_state_offset;
|
||||||
|
|
||||||
|
do { end_subpattern += GET(end_subpattern, 1); }
|
||||||
|
while (*end_subpattern == OP_ALT);
|
||||||
|
next_state_offset =
|
||||||
|
(int)(end_subpattern - start_code + LINK_SIZE + 1);
|
||||||
|
|
||||||
|
/* Optimization: if there are no more active states, and there
|
||||||
|
are no new states yet set up, then skip over the subject string
|
||||||
|
right here, to save looping. Otherwise, set up the new state to swing
|
||||||
|
into action when the end of the matched substring is reached. */
|
||||||
|
|
||||||
|
if (i + 1 >= active_count && new_count == 0)
|
||||||
|
{
|
||||||
|
ptr = local_ptr;
|
||||||
|
clen = 0;
|
||||||
|
ADD_NEW(next_state_offset, 0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PCRE2_SPTR p = ptr;
|
||||||
|
PCRE2_SPTR pp = local_ptr;
|
||||||
|
charcount = (PCRE2_SIZE)(pp - p);
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
if (utf) while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
|
||||||
|
#endif
|
||||||
|
ADD_NEW_DATA(-next_state_offset, 0, (int)(charcount - 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------*/
|
||||||
|
case OP_ONCE:
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
int *local_workspace;
|
||||||
|
PCRE2_SIZE *local_offsets;
|
||||||
|
RWS_anchor *rws = (RWS_anchor *)RWS;
|
||||||
|
|
||||||
|
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
|
||||||
|
{
|
||||||
|
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
|
||||||
|
if (rc != 0) return rc;
|
||||||
|
RWS = (int *)rws;
|
||||||
|
}
|
||||||
|
|
||||||
|
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
|
||||||
|
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
|
||||||
|
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||||
|
|
||||||
|
rc = internal_dfa_match(
|
||||||
|
mb, /* fixed match data */
|
||||||
|
code, /* this subexpression's code */
|
||||||
|
ptr, /* where we currently are */
|
||||||
|
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
|
||||||
|
local_offsets, /* offset vector */
|
||||||
|
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
|
||||||
|
local_workspace, /* workspace vector */
|
||||||
|
RWS_RSIZE, /* size of same */
|
||||||
|
rlevel, /* function recursion level */
|
||||||
|
RWS); /* recursion workspace */
|
||||||
|
|
||||||
|
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||||
|
|
||||||
|
if (rc >= 0)
|
||||||
|
{
|
||||||
|
PCRE2_SPTR end_subpattern = code;
|
||||||
|
PCRE2_SIZE charcount = local_offsets[1] - local_offsets[0];
|
||||||
|
int next_state_offset, repeat_state_offset;
|
||||||
|
|
||||||
|
do { end_subpattern += GET(end_subpattern, 1); }
|
||||||
|
while (*end_subpattern == OP_ALT);
|
||||||
|
next_state_offset =
|
||||||
|
(int)(end_subpattern - start_code + LINK_SIZE + 1);
|
||||||
|
|
||||||
|
/* If the end of this subpattern is KETRMAX or KETRMIN, we must
|
||||||
|
arrange for the repeat state also to be added to the relevant list.
|
||||||
|
Calculate the offset, or set -1 for no repeat. */
|
||||||
|
|
||||||
|
repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
|
||||||
|
*end_subpattern == OP_KETRMIN)?
|
||||||
|
(int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
|
||||||
|
|
||||||
|
/* If we have matched an empty string, add the next state at the
|
||||||
|
current character pointer. This is important so that the duplicate
|
||||||
|
checking kicks in, which is what breaks infinite loops that match an
|
||||||
|
empty string. */
|
||||||
|
|
||||||
|
if (charcount == 0)
|
||||||
|
{
|
||||||
|
ADD_ACTIVE(next_state_offset, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Optimization: if there are no more active states, and there
|
||||||
|
are no new states yet set up, then skip over the subject string
|
||||||
|
right here, to save looping. Otherwise, set up the new state to swing
|
||||||
|
into action when the end of the matched substring is reached. */
|
||||||
|
|
||||||
|
else if (i + 1 >= active_count && new_count == 0)
|
||||||
|
{
|
||||||
|
ptr += charcount;
|
||||||
|
clen = 0;
|
||||||
|
ADD_NEW(next_state_offset, 0);
|
||||||
|
|
||||||
|
/* If we are adding a repeat state at the new character position,
|
||||||
|
we must fudge things so that it is the only current state.
|
||||||
|
Otherwise, it might be a duplicate of one we processed before, and
|
||||||
|
that would cause it to be skipped. */
|
||||||
|
|
||||||
|
if (repeat_state_offset >= 0)
|
||||||
|
{
|
||||||
|
next_active_state = active_states;
|
||||||
|
active_count = 0;
|
||||||
|
i = -1;
|
||||||
|
ADD_ACTIVE(repeat_state_offset, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
if (utf)
|
||||||
|
{
|
||||||
|
PCRE2_SPTR p = start_subject + local_offsets[0];
|
||||||
|
PCRE2_SPTR pp = start_subject + local_offsets[1];
|
||||||
|
while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
ADD_NEW_DATA(-next_state_offset, 0, (int)(charcount - 1));
|
||||||
|
if (repeat_state_offset >= 0)
|
||||||
|
{ ADD_NEW_DATA(-repeat_state_offset, 0, (int)(charcount - 1)); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (rc != PCRE2_ERROR_NOMATCH) return rc;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
|
||||||
|
/* ========================================================================== */
|
||||||
|
/* Handle callouts */
|
||||||
|
|
||||||
|
case OP_CALLOUT:
|
||||||
|
case OP_CALLOUT_STR:
|
||||||
|
{
|
||||||
|
PCRE2_SIZE callout_length;
|
||||||
|
rrc = do_callout_dfa(code, offsets, current_subject, ptr, mb, 0,
|
||||||
|
&callout_length);
|
||||||
|
if (rrc < 0) return rrc; /* Abandon */
|
||||||
|
if (rrc == 0)
|
||||||
|
{ ADD_ACTIVE(state_offset + (int)callout_length, 0); }
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
|
||||||
|
/* ========================================================================== */
|
||||||
|
default: /* Unsupported opcode */
|
||||||
|
return PCRE2_ERROR_DFA_UITEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
NEXT_ACTIVE_STATE: continue;
|
||||||
|
|
||||||
|
} /* End of loop scanning active states */
|
||||||
|
|
||||||
|
/* We have finished the processing at the current subject character. If no
|
||||||
|
new states have been set for the next character, we have found all the
|
||||||
|
matches that we are going to find. If partial matching has been requested,
|
||||||
|
check for appropriate conditions.
|
||||||
|
|
||||||
|
The "could_continue" variable is true if a state could have continued but
|
||||||
|
for the fact that the end of the subject was reached. */
|
||||||
|
|
||||||
|
if (new_count <= 0)
|
||||||
|
{
|
||||||
|
if (could_continue && /* Some could go on, and */
|
||||||
|
( /* either... */
|
||||||
|
(mb->moptions & PCRE2_PARTIAL_HARD) != 0 /* Hard partial */
|
||||||
|
|| /* or... */
|
||||||
|
((mb->moptions & PCRE2_PARTIAL_SOFT) != 0 && /* Soft partial and */
|
||||||
|
match_count < 0) /* no matches */
|
||||||
|
) && /* And... */
|
||||||
|
(
|
||||||
|
partial_newline || /* Either partial NL */
|
||||||
|
( /* or ... */
|
||||||
|
ptr >= end_subject && /* End of subject and */
|
||||||
|
( /* either */
|
||||||
|
ptr > mb->start_used_ptr || /* Inspected non-empty string */
|
||||||
|
mb->allowemptypartial /* or pattern has lookbehind */
|
||||||
|
) /* or could match empty */
|
||||||
|
)
|
||||||
|
))
|
||||||
|
match_count = PCRE2_ERROR_PARTIAL;
|
||||||
|
break; /* Exit from loop along the subject string */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* One or more states are active for the next character. */
|
||||||
|
|
||||||
|
ptr += clen; /* Advance to next subject character */
|
||||||
|
} /* Loop to move along the subject string */
|
||||||
|
|
||||||
|
/* Control gets here from "break" a few lines above. If we have a match and
|
||||||
|
PCRE2_ENDANCHORED is set, the match fails. */
|
||||||
|
|
||||||
|
if (match_count >= 0 &&
|
||||||
|
((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0 &&
|
||||||
|
ptr < end_subject)
|
||||||
|
match_count = PCRE2_ERROR_NOMATCH;
|
||||||
|
|
||||||
|
return match_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Match a pattern using the DFA algorithm *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function matches a compiled pattern to a subject string, using the
|
||||||
|
alternate matching algorithm that finds all matches at once.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
code points to the compiled pattern
|
||||||
|
subject subject string
|
||||||
|
length length of subject string
|
||||||
|
startoffset where to start matching in the subject
|
||||||
|
options option bits
|
||||||
|
match_data points to a match data structure
|
||||||
|
gcontext points to a match context
|
||||||
|
workspace pointer to workspace
|
||||||
|
wscount size of workspace
|
||||||
|
|
||||||
|
Returns: > 0 => number of match offset pairs placed in offsets
|
||||||
|
= 0 => offsets overflowed; longest matches are present
|
||||||
|
-1 => failed to match
|
||||||
|
< -1 => some kind of unexpected problem
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
|
||||||
|
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
|
||||||
|
pcre2_match_context *mcontext, int *workspace, PCRE2_SIZE wscount)
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
int was_zero_terminated = 0;
|
||||||
|
|
||||||
|
const pcre2_real_code *re = (const pcre2_real_code *)code;
|
||||||
|
|
||||||
|
PCRE2_SPTR start_match;
|
||||||
|
PCRE2_SPTR end_subject;
|
||||||
|
PCRE2_SPTR bumpalong_limit;
|
||||||
|
PCRE2_SPTR req_cu_ptr;
|
||||||
|
|
||||||
|
BOOL utf, anchored, startline, firstline;
|
||||||
|
BOOL has_first_cu = FALSE;
|
||||||
|
BOOL has_req_cu = FALSE;
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
PCRE2_SPTR memchr_found_first_cu = NULL;
|
||||||
|
PCRE2_SPTR memchr_found_first_cu2 = NULL;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
PCRE2_UCHAR first_cu = 0;
|
||||||
|
PCRE2_UCHAR first_cu2 = 0;
|
||||||
|
PCRE2_UCHAR req_cu = 0;
|
||||||
|
PCRE2_UCHAR req_cu2 = 0;
|
||||||
|
|
||||||
|
const uint8_t *start_bits = NULL;
|
||||||
|
|
||||||
|
/* We need to have mb pointing to a match block, because the IS_NEWLINE macro
|
||||||
|
is used below, and it expects NLBLOCK to be defined as a pointer. */
|
||||||
|
|
||||||
|
pcre2_callout_block cb;
|
||||||
|
dfa_match_block actual_match_block;
|
||||||
|
dfa_match_block *mb = &actual_match_block;
|
||||||
|
|
||||||
|
/* Set up a starting block of memory for use during recursive calls to
|
||||||
|
internal_dfa_match(). By putting this on the stack, it minimizes resource use
|
||||||
|
in the case when it is not needed. If this is too small, more memory is
|
||||||
|
obtained from the heap. At the start of each block is an anchor structure.*/
|
||||||
|
|
||||||
|
int base_recursion_workspace[RWS_BASE_SIZE];
|
||||||
|
RWS_anchor *rws = (RWS_anchor *)base_recursion_workspace;
|
||||||
|
rws->next = NULL;
|
||||||
|
rws->size = RWS_BASE_SIZE;
|
||||||
|
rws->free = RWS_BASE_SIZE - RWS_ANCHOR_SIZE;
|
||||||
|
|
||||||
|
/* Recognize NULL, length 0 as an empty string. */
|
||||||
|
|
||||||
|
if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
|
||||||
|
|
||||||
|
/* Plausibility checks */
|
||||||
|
|
||||||
|
if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
|
||||||
|
if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
|
||||||
|
return PCRE2_ERROR_NULL;
|
||||||
|
|
||||||
|
if (length == PCRE2_ZERO_TERMINATED)
|
||||||
|
{
|
||||||
|
length = PRIV(strlen)(subject);
|
||||||
|
was_zero_terminated = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE;
|
||||||
|
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
|
||||||
|
|
||||||
|
/* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
|
||||||
|
time. */
|
||||||
|
|
||||||
|
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 &&
|
||||||
|
((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
|
||||||
|
return PCRE2_ERROR_BADOPTION;
|
||||||
|
|
||||||
|
/* Invalid UTF support is not available for DFA matching. */
|
||||||
|
|
||||||
|
if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
|
||||||
|
return PCRE2_ERROR_DFA_UINVALID_UTF;
|
||||||
|
|
||||||
|
/* Check that the first field in the block is the magic number. If it is not,
|
||||||
|
return with PCRE2_ERROR_BADMAGIC. */
|
||||||
|
|
||||||
|
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||||
|
|
||||||
|
/* Check the code unit width. */
|
||||||
|
|
||||||
|
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
|
||||||
|
return PCRE2_ERROR_BADMODE;
|
||||||
|
|
||||||
|
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
|
||||||
|
options variable for this function. Users of PCRE2 who are not calling the
|
||||||
|
function directly would like to have a way of setting these flags, in the same
|
||||||
|
way that they can set pcre2_compile() flags like PCRE2_NO_AUTO_POSSESS with
|
||||||
|
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
|
||||||
|
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
|
||||||
|
transferred to the options for this function. The bits are guaranteed to be
|
||||||
|
adjacent, but do not have the same values. This bit of Boolean trickery assumes
|
||||||
|
that the match-time bits are not more significant than the flag bits. If by
|
||||||
|
accident this is not the case, a compile-time division by zero error will
|
||||||
|
occur. */
|
||||||
|
|
||||||
|
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
|
||||||
|
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
|
||||||
|
options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
|
||||||
|
#undef FF
|
||||||
|
#undef OO
|
||||||
|
|
||||||
|
/* If restarting after a partial match, do some sanity checks on the contents
|
||||||
|
of the workspace. */
|
||||||
|
|
||||||
|
if ((options & PCRE2_DFA_RESTART) != 0)
|
||||||
|
{
|
||||||
|
if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
|
||||||
|
workspace[1] > (int)((wscount - 2)/INTS_PER_STATEBLOCK))
|
||||||
|
return PCRE2_ERROR_DFA_BADRESTART;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set some local values */
|
||||||
|
|
||||||
|
utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||||
|
start_match = subject + start_offset;
|
||||||
|
end_subject = subject + length;
|
||||||
|
req_cu_ptr = start_match - 1;
|
||||||
|
anchored = (options & (PCRE2_ANCHORED|PCRE2_DFA_RESTART)) != 0 ||
|
||||||
|
(re->overall_options & PCRE2_ANCHORED) != 0;
|
||||||
|
|
||||||
|
/* The "must be at the start of a line" flags are used in a loop when finding
|
||||||
|
where to start. */
|
||||||
|
|
||||||
|
startline = (re->flags & PCRE2_STARTLINE) != 0;
|
||||||
|
firstline = !anchored && (re->overall_options & PCRE2_FIRSTLINE) != 0;
|
||||||
|
bumpalong_limit = end_subject;
|
||||||
|
|
||||||
|
/* Initialize and set up the fixed fields in the callout block, with a pointer
|
||||||
|
in the match block. */
|
||||||
|
|
||||||
|
mb->cb = &cb;
|
||||||
|
cb.version = 2;
|
||||||
|
cb.subject = subject;
|
||||||
|
cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
|
||||||
|
cb.callout_flags = 0;
|
||||||
|
cb.capture_top = 1; /* No capture support */
|
||||||
|
cb.capture_last = 0;
|
||||||
|
cb.mark = NULL; /* No (*MARK) support */
|
||||||
|
|
||||||
|
/* Get data from the match context, if present, and fill in the remaining
|
||||||
|
fields in the match block. It is an error to set an offset limit without
|
||||||
|
setting the flag at compile time. */
|
||||||
|
|
||||||
|
if (mcontext == NULL)
|
||||||
|
{
|
||||||
|
mb->callout = NULL;
|
||||||
|
mb->memctl = re->memctl;
|
||||||
|
mb->match_limit = PRIV(default_match_context).match_limit;
|
||||||
|
mb->match_limit_depth = PRIV(default_match_context).depth_limit;
|
||||||
|
mb->heap_limit = PRIV(default_match_context).heap_limit;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (mcontext->offset_limit != PCRE2_UNSET)
|
||||||
|
{
|
||||||
|
if ((re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
|
||||||
|
return PCRE2_ERROR_BADOFFSETLIMIT;
|
||||||
|
bumpalong_limit = subject + mcontext->offset_limit;
|
||||||
|
}
|
||||||
|
mb->callout = mcontext->callout;
|
||||||
|
mb->callout_data = mcontext->callout_data;
|
||||||
|
mb->memctl = mcontext->memctl;
|
||||||
|
mb->match_limit = mcontext->match_limit;
|
||||||
|
mb->match_limit_depth = mcontext->depth_limit;
|
||||||
|
mb->heap_limit = mcontext->heap_limit;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mb->match_limit > re->limit_match)
|
||||||
|
mb->match_limit = re->limit_match;
|
||||||
|
|
||||||
|
if (mb->match_limit_depth > re->limit_depth)
|
||||||
|
mb->match_limit_depth = re->limit_depth;
|
||||||
|
|
||||||
|
if (mb->heap_limit > re->limit_heap)
|
||||||
|
mb->heap_limit = re->limit_heap;
|
||||||
|
|
||||||
|
mb->start_code = (PCRE2_SPTR)((const uint8_t *)re + re->code_start);
|
||||||
|
mb->tables = re->tables;
|
||||||
|
mb->start_subject = subject;
|
||||||
|
mb->end_subject = end_subject;
|
||||||
|
mb->start_offset = start_offset;
|
||||||
|
mb->allowemptypartial = (re->max_lookbehind > 0) ||
|
||||||
|
(re->flags & PCRE2_MATCH_EMPTY) != 0;
|
||||||
|
mb->moptions = options;
|
||||||
|
mb->poptions = re->overall_options;
|
||||||
|
mb->match_call_count = 0;
|
||||||
|
mb->heap_used = 0;
|
||||||
|
|
||||||
|
/* Process the \R and newline settings. */
|
||||||
|
|
||||||
|
mb->bsr_convention = re->bsr_convention;
|
||||||
|
mb->nltype = NLTYPE_FIXED;
|
||||||
|
switch(re->newline_convention)
|
||||||
|
{
|
||||||
|
case PCRE2_NEWLINE_CR:
|
||||||
|
mb->nllen = 1;
|
||||||
|
mb->nl[0] = CHAR_CR;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE2_NEWLINE_LF:
|
||||||
|
mb->nllen = 1;
|
||||||
|
mb->nl[0] = CHAR_NL;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE2_NEWLINE_NUL:
|
||||||
|
mb->nllen = 1;
|
||||||
|
mb->nl[0] = CHAR_NUL;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE2_NEWLINE_CRLF:
|
||||||
|
mb->nllen = 2;
|
||||||
|
mb->nl[0] = CHAR_CR;
|
||||||
|
mb->nl[1] = CHAR_NL;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE2_NEWLINE_ANY:
|
||||||
|
mb->nltype = NLTYPE_ANY;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE2_NEWLINE_ANYCRLF:
|
||||||
|
mb->nltype = NLTYPE_ANYCRLF;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
PCRE2_DEBUG_UNREACHABLE();
|
||||||
|
return PCRE2_ERROR_INTERNAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
|
||||||
|
we must also check that a starting offset does not point into the middle of a
|
||||||
|
multiunit character. We check only the portion of the subject that is going to
|
||||||
|
be inspected during matching - from the offset minus the maximum back reference
|
||||||
|
to the given length. This saves time when a small part of a large subject is
|
||||||
|
being matched by the use of a starting offset. Note that the maximum lookbehind
|
||||||
|
is a number of characters, not code units. */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||||
|
{
|
||||||
|
PCRE2_SPTR check_subject = start_match; /* start_match includes offset */
|
||||||
|
|
||||||
|
if (start_offset > 0)
|
||||||
|
{
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
unsigned int i;
|
||||||
|
if (start_match < end_subject && NOT_FIRSTCU(*start_match))
|
||||||
|
return PCRE2_ERROR_BADUTFOFFSET;
|
||||||
|
for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--)
|
||||||
|
{
|
||||||
|
check_subject--;
|
||||||
|
while (check_subject > subject &&
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
(*check_subject & 0xc0) == 0x80)
|
||||||
|
#else /* 16-bit */
|
||||||
|
(*check_subject & 0xfc00) == 0xdc00)
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||||
|
check_subject--;
|
||||||
|
}
|
||||||
|
#else /* In the 32-bit library, one code unit equals one character. */
|
||||||
|
check_subject -= re->max_lookbehind;
|
||||||
|
if (check_subject < subject) check_subject = subject;
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Validate the relevant portion of the subject. After an error, adjust the
|
||||||
|
offset to be an absolute offset in the whole string. */
|
||||||
|
|
||||||
|
match_data->rc = PRIV(valid_utf)(check_subject,
|
||||||
|
length - (PCRE2_SIZE)(check_subject - subject), &(match_data->startchar));
|
||||||
|
if (match_data->rc != 0)
|
||||||
|
{
|
||||||
|
match_data->startchar += (PCRE2_SIZE)(check_subject - subject);
|
||||||
|
return match_data->rc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
/* Set up the first code unit to match, if available. If there's no first code
|
||||||
|
unit there may be a bitmap of possible first characters. */
|
||||||
|
|
||||||
|
if ((re->flags & PCRE2_FIRSTSET) != 0)
|
||||||
|
{
|
||||||
|
has_first_cu = TRUE;
|
||||||
|
first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
|
||||||
|
if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
|
||||||
|
{
|
||||||
|
first_cu2 = TABLE_GET(first_cu, mb->tables + fcc_offset, first_cu);
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
if (first_cu > 127 && !utf && (re->overall_options & PCRE2_UCP) != 0)
|
||||||
|
first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
|
||||||
|
#else
|
||||||
|
if (first_cu > 127 && (utf || (re->overall_options & PCRE2_UCP) != 0))
|
||||||
|
first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
|
||||||
|
#endif
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
|
||||||
|
start_bits = re->start_bitmap;
|
||||||
|
|
||||||
|
/* There may be a "last known required code unit" set. */
|
||||||
|
|
||||||
|
if ((re->flags & PCRE2_LASTSET) != 0)
|
||||||
|
{
|
||||||
|
has_req_cu = TRUE;
|
||||||
|
req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit);
|
||||||
|
if ((re->flags & PCRE2_LASTCASELESS) != 0)
|
||||||
|
{
|
||||||
|
req_cu2 = TABLE_GET(req_cu, mb->tables + fcc_offset, req_cu);
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
if (req_cu > 127 && !utf && (re->overall_options & PCRE2_UCP) != 0)
|
||||||
|
req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu);
|
||||||
|
#else
|
||||||
|
if (req_cu > 127 && (utf || (re->overall_options & PCRE2_UCP) != 0))
|
||||||
|
req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu);
|
||||||
|
#endif
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
|
||||||
|
free the memory that was obtained. */
|
||||||
|
|
||||||
|
if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
|
||||||
|
{
|
||||||
|
match_data->memctl.free((void *)match_data->subject,
|
||||||
|
match_data->memctl.memory_data);
|
||||||
|
match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Fill in fields that are always returned in the match data. */
|
||||||
|
|
||||||
|
match_data->code = re;
|
||||||
|
match_data->subject = NULL; /* Default for no match */
|
||||||
|
match_data->mark = NULL;
|
||||||
|
match_data->matchedby = PCRE2_MATCHEDBY_DFA_INTERPRETER;
|
||||||
|
|
||||||
|
/* Call the main matching function, looping for a non-anchored regex after a
|
||||||
|
failed match. If not restarting, perform certain optimizations at the start of
|
||||||
|
a match. */
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
/* ----------------- Start of match optimizations ---------------- */
|
||||||
|
|
||||||
|
/* There are some optimizations that avoid running the match if a known
|
||||||
|
starting point is not found, or if a known later code unit is not present.
|
||||||
|
However, there is an option (settable at compile time) that disables
|
||||||
|
these, for testing and for ensuring that all callouts do actually occur.
|
||||||
|
The optimizations must also be avoided when restarting a DFA match. */
|
||||||
|
|
||||||
|
if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0 &&
|
||||||
|
(options & PCRE2_DFA_RESTART) == 0)
|
||||||
|
{
|
||||||
|
/* If firstline is TRUE, the start of the match is constrained to the first
|
||||||
|
line of a multiline string. That is, the match must be before or at the
|
||||||
|
first newline following the start of matching. Temporarily adjust
|
||||||
|
end_subject so that we stop the optimization scans for a first code unit
|
||||||
|
immediately after the first character of a newline (the first code unit can
|
||||||
|
legitimately be a newline). If the match fails at the newline, later code
|
||||||
|
breaks this loop. */
|
||||||
|
|
||||||
|
if (firstline)
|
||||||
|
{
|
||||||
|
PCRE2_SPTR t = start_match;
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf)
|
||||||
|
{
|
||||||
|
while (t < end_subject && !IS_NEWLINE(t))
|
||||||
|
{
|
||||||
|
t++;
|
||||||
|
ACROSSCHAR(t < end_subject, t, t++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
while (t < end_subject && !IS_NEWLINE(t)) t++;
|
||||||
|
end_subject = t;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Anchored: check the first code unit if one is recorded. This may seem
|
||||||
|
pointless but it can help in detecting a no match case without scanning for
|
||||||
|
the required code unit. */
|
||||||
|
|
||||||
|
if (anchored)
|
||||||
|
{
|
||||||
|
if (has_first_cu || start_bits != NULL)
|
||||||
|
{
|
||||||
|
BOOL ok = start_match < end_subject;
|
||||||
|
if (ok)
|
||||||
|
{
|
||||||
|
PCRE2_UCHAR c = UCHAR21TEST(start_match);
|
||||||
|
ok = has_first_cu && (c == first_cu || c == first_cu2);
|
||||||
|
if (!ok && start_bits != NULL)
|
||||||
|
{
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
|
if (c > 255) c = 255;
|
||||||
|
#endif
|
||||||
|
ok = (start_bits[c/8] & (1u << (c&7))) != 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!ok) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Not anchored. Advance to a unique first code unit if there is one. */
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (has_first_cu)
|
||||||
|
{
|
||||||
|
if (first_cu != first_cu2) /* Caseless */
|
||||||
|
{
|
||||||
|
/* In 16-bit and 32_bit modes we have to do our own search, so can
|
||||||
|
look for both cases at once. */
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
|
PCRE2_UCHAR smc;
|
||||||
|
while (start_match < end_subject &&
|
||||||
|
(smc = UCHAR21TEST(start_match)) != first_cu &&
|
||||||
|
smc != first_cu2)
|
||||||
|
start_match++;
|
||||||
|
#else
|
||||||
|
/* In 8-bit mode, the use of memchr() gives a big speed up, even
|
||||||
|
though we have to call it twice in order to find the earliest
|
||||||
|
occurrence of the code unit in either of its cases. Caching is used
|
||||||
|
to remember the positions of previously found code units. This can
|
||||||
|
make a huge difference when the strings are very long and only one
|
||||||
|
case is actually present. */
|
||||||
|
|
||||||
|
PCRE2_SPTR pp1 = NULL;
|
||||||
|
PCRE2_SPTR pp2 = NULL;
|
||||||
|
PCRE2_SIZE searchlength = end_subject - start_match;
|
||||||
|
|
||||||
|
/* If we haven't got a previously found position for first_cu, or if
|
||||||
|
the current starting position is later, we need to do a search. If
|
||||||
|
the code unit is not found, set it to the end. */
|
||||||
|
|
||||||
|
if (memchr_found_first_cu == NULL ||
|
||||||
|
start_match > memchr_found_first_cu)
|
||||||
|
{
|
||||||
|
pp1 = memchr(start_match, first_cu, searchlength);
|
||||||
|
memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If the start is before a previously found position, use the
|
||||||
|
previous position, or NULL if a previous search failed. */
|
||||||
|
|
||||||
|
else pp1 = (memchr_found_first_cu == end_subject)? NULL :
|
||||||
|
memchr_found_first_cu;
|
||||||
|
|
||||||
|
/* Do the same thing for the other case. */
|
||||||
|
|
||||||
|
if (memchr_found_first_cu2 == NULL ||
|
||||||
|
start_match > memchr_found_first_cu2)
|
||||||
|
{
|
||||||
|
pp2 = memchr(start_match, first_cu2, searchlength);
|
||||||
|
memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2;
|
||||||
|
}
|
||||||
|
|
||||||
|
else pp2 = (memchr_found_first_cu2 == end_subject)? NULL :
|
||||||
|
memchr_found_first_cu2;
|
||||||
|
|
||||||
|
/* Set the start to the end of the subject if neither case was found.
|
||||||
|
Otherwise, use the earlier found point. */
|
||||||
|
|
||||||
|
if (pp1 == NULL)
|
||||||
|
start_match = (pp2 == NULL)? end_subject : pp2;
|
||||||
|
else
|
||||||
|
start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
|
||||||
|
|
||||||
|
#endif /* 8-bit handling */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The caseful case is much simpler. */
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
|
while (start_match < end_subject && UCHAR21TEST(start_match) !=
|
||||||
|
first_cu)
|
||||||
|
start_match++;
|
||||||
|
#else /* 8-bit code units */
|
||||||
|
start_match = memchr(start_match, first_cu, end_subject - start_match);
|
||||||
|
if (start_match == NULL) start_match = end_subject;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we can't find the required code unit, having reached the true end
|
||||||
|
of the subject, break the bumpalong loop, to force a match failure,
|
||||||
|
except when doing partial matching, when we let the next cycle run at
|
||||||
|
the end of the subject. To see why, consider the pattern /(?<=abc)def/,
|
||||||
|
which partially matches "abc", even though the string does not contain
|
||||||
|
the starting character "d". If we have not reached the true end of the
|
||||||
|
subject (PCRE2_FIRSTLINE caused end_subject to be temporarily modified)
|
||||||
|
we also let the cycle run, because the matching string is legitimately
|
||||||
|
allowed to start with the first code unit of a newline. */
|
||||||
|
|
||||||
|
if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0 &&
|
||||||
|
start_match >= mb->end_subject)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If there's no first code unit, advance to just after a linebreak for a
|
||||||
|
multiline match if required. */
|
||||||
|
|
||||||
|
else if (startline)
|
||||||
|
{
|
||||||
|
if (start_match > mb->start_subject + start_offset)
|
||||||
|
{
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf)
|
||||||
|
{
|
||||||
|
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
||||||
|
{
|
||||||
|
start_match++;
|
||||||
|
ACROSSCHAR(start_match < end_subject, start_match, start_match++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
||||||
|
start_match++;
|
||||||
|
|
||||||
|
/* If we have just passed a CR and the newline option is ANY or
|
||||||
|
ANYCRLF, and we are now at a LF, advance the match position by one
|
||||||
|
more code unit. */
|
||||||
|
|
||||||
|
if (start_match[-1] == CHAR_CR &&
|
||||||
|
(mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
|
||||||
|
start_match < end_subject &&
|
||||||
|
UCHAR21TEST(start_match) == CHAR_NL)
|
||||||
|
start_match++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If there's no first code unit or a requirement for a multiline line
|
||||||
|
start, advance to a non-unique first code unit if any have been
|
||||||
|
identified. The bitmap contains only 256 bits. When code units are 16 or
|
||||||
|
32 bits wide, all code units greater than 254 set the 255 bit. */
|
||||||
|
|
||||||
|
else if (start_bits != NULL)
|
||||||
|
{
|
||||||
|
while (start_match < end_subject)
|
||||||
|
{
|
||||||
|
uint32_t c = UCHAR21TEST(start_match);
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
|
if (c > 255) c = 255;
|
||||||
|
#endif
|
||||||
|
if ((start_bits[c/8] & (1u << (c&7))) != 0) break;
|
||||||
|
start_match++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* See comment above in first_cu checking about the next line. */
|
||||||
|
|
||||||
|
if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0 &&
|
||||||
|
start_match >= mb->end_subject)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} /* End of first code unit handling */
|
||||||
|
|
||||||
|
/* Restore fudged end_subject */
|
||||||
|
|
||||||
|
end_subject = mb->end_subject;
|
||||||
|
|
||||||
|
/* The following two optimizations are disabled for partial matching. */
|
||||||
|
|
||||||
|
if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0)
|
||||||
|
{
|
||||||
|
PCRE2_SPTR p;
|
||||||
|
|
||||||
|
/* The minimum matching length is a lower bound; no actual string of that
|
||||||
|
length may actually match the pattern. Although the value is, strictly,
|
||||||
|
in characters, we treat it as code units to avoid spending too much time
|
||||||
|
in this optimization. */
|
||||||
|
|
||||||
|
if (end_subject - start_match < re->minlength) goto NOMATCH_EXIT;
|
||||||
|
|
||||||
|
/* If req_cu is set, we know that that code unit must appear in the
|
||||||
|
subject for the match to succeed. If the first code unit is set, req_cu
|
||||||
|
must be later in the subject; otherwise the test starts at the match
|
||||||
|
point. This optimization can save a huge amount of backtracking in
|
||||||
|
patterns with nested unlimited repeats that aren't going to match.
|
||||||
|
Writing separate code for cased/caseless versions makes it go faster, as
|
||||||
|
does using an autoincrement and backing off on a match. As in the case of
|
||||||
|
the first code unit, using memchr() in the 8-bit library gives a big
|
||||||
|
speed up. Unlike the first_cu check above, we do not need to call
|
||||||
|
memchr() twice in the caseless case because we only need to check for the
|
||||||
|
presence of the character in either case, not find the first occurrence.
|
||||||
|
|
||||||
|
The search can be skipped if the code unit was found later than the
|
||||||
|
current starting point in a previous iteration of the bumpalong loop.
|
||||||
|
|
||||||
|
HOWEVER: when the subject string is very, very long, searching to its end
|
||||||
|
can take a long time, and give bad performance on quite ordinary
|
||||||
|
patterns. This showed up when somebody was matching something like
|
||||||
|
/^\d+C/ on a 32-megabyte string... so we don't do this when the string is
|
||||||
|
sufficiently long, but it's worth searching a lot more for unanchored
|
||||||
|
patterns. */
|
||||||
|
|
||||||
|
p = start_match + (has_first_cu? 1:0);
|
||||||
|
if (has_req_cu && p > req_cu_ptr)
|
||||||
|
{
|
||||||
|
PCRE2_SIZE check_length = end_subject - start_match;
|
||||||
|
|
||||||
|
if (check_length < REQ_CU_MAX ||
|
||||||
|
(!anchored && check_length < REQ_CU_MAX * 1000))
|
||||||
|
{
|
||||||
|
if (req_cu != req_cu2) /* Caseless */
|
||||||
|
{
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
|
while (p < end_subject)
|
||||||
|
{
|
||||||
|
uint32_t pp = UCHAR21INCTEST(p);
|
||||||
|
if (pp == req_cu || pp == req_cu2) { p--; break; }
|
||||||
|
}
|
||||||
|
#else /* 8-bit code units */
|
||||||
|
PCRE2_SPTR pp = p;
|
||||||
|
p = memchr(pp, req_cu, end_subject - pp);
|
||||||
|
if (p == NULL)
|
||||||
|
{
|
||||||
|
p = memchr(pp, req_cu2, end_subject - pp);
|
||||||
|
if (p == NULL) p = end_subject;
|
||||||
|
}
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The caseful case */
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
|
while (p < end_subject)
|
||||||
|
{
|
||||||
|
if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
|
||||||
|
}
|
||||||
|
|
||||||
|
#else /* 8-bit code units */
|
||||||
|
p = memchr(p, req_cu, end_subject - p);
|
||||||
|
if (p == NULL) p = end_subject;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we can't find the required code unit, break the matching loop,
|
||||||
|
forcing a match failure. */
|
||||||
|
|
||||||
|
if (p >= end_subject) break;
|
||||||
|
|
||||||
|
/* If we have found the required code unit, save the point where we
|
||||||
|
found it, so that we don't search again next time round the loop if
|
||||||
|
the start hasn't passed this code unit yet. */
|
||||||
|
|
||||||
|
req_cu_ptr = p;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ------------ End of start of match optimizations ------------ */
|
||||||
|
|
||||||
|
/* Give no match if we have passed the bumpalong limit. */
|
||||||
|
|
||||||
|
if (start_match > bumpalong_limit) break;
|
||||||
|
|
||||||
|
/* OK, now we can do the business */
|
||||||
|
|
||||||
|
mb->start_used_ptr = start_match;
|
||||||
|
mb->last_used_ptr = start_match;
|
||||||
|
mb->recursive = NULL;
|
||||||
|
|
||||||
|
rc = internal_dfa_match(
|
||||||
|
mb, /* fixed match data */
|
||||||
|
mb->start_code, /* this subexpression's code */
|
||||||
|
start_match, /* where we currently are */
|
||||||
|
start_offset, /* start offset in subject */
|
||||||
|
match_data->ovector, /* offset vector */
|
||||||
|
(uint32_t)match_data->oveccount * 2, /* actual size of same */
|
||||||
|
workspace, /* workspace vector */
|
||||||
|
(int)wscount, /* size of same */
|
||||||
|
0, /* function recurse level */
|
||||||
|
base_recursion_workspace); /* initial workspace for recursion */
|
||||||
|
|
||||||
|
/* Anything other than "no match" means we are done, always; otherwise, carry
|
||||||
|
on only if not anchored. */
|
||||||
|
|
||||||
|
if (rc != PCRE2_ERROR_NOMATCH || anchored)
|
||||||
|
{
|
||||||
|
if (rc == PCRE2_ERROR_PARTIAL && match_data->oveccount > 0)
|
||||||
|
{
|
||||||
|
match_data->ovector[0] = (PCRE2_SIZE)(start_match - subject);
|
||||||
|
match_data->ovector[1] = (PCRE2_SIZE)(end_subject - subject);
|
||||||
|
}
|
||||||
|
match_data->subject_length = length;
|
||||||
|
match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject);
|
||||||
|
match_data->rightchar = (PCRE2_SIZE)(mb->last_used_ptr - subject);
|
||||||
|
match_data->startchar = (PCRE2_SIZE)(start_match - subject);
|
||||||
|
match_data->rc = rc;
|
||||||
|
|
||||||
|
if (rc >= 0 &&(options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
|
||||||
|
{
|
||||||
|
length = CU2BYTES(length + was_zero_terminated);
|
||||||
|
match_data->subject = match_data->memctl.malloc(length,
|
||||||
|
match_data->memctl.memory_data);
|
||||||
|
if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||||
|
memcpy((void *)match_data->subject, subject, length);
|
||||||
|
match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (rc >= 0 || rc == PCRE2_ERROR_PARTIAL) match_data->subject = subject;
|
||||||
|
}
|
||||||
|
goto EXIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Advance to the next subject character unless we are at the end of a line
|
||||||
|
and firstline is set. */
|
||||||
|
|
||||||
|
if (firstline && IS_NEWLINE(start_match)) break;
|
||||||
|
start_match++;
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf)
|
||||||
|
{
|
||||||
|
ACROSSCHAR(start_match < end_subject, start_match, start_match++);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if (start_match > end_subject) break;
|
||||||
|
|
||||||
|
/* If we have just passed a CR and we are now at a LF, and the pattern does
|
||||||
|
not contain any explicit matches for \r or \n, and the newline option is CRLF
|
||||||
|
or ANY or ANYCRLF, advance the match position by one more character. */
|
||||||
|
|
||||||
|
if (UCHAR21TEST(start_match - 1) == CHAR_CR &&
|
||||||
|
start_match < end_subject &&
|
||||||
|
UCHAR21TEST(start_match) == CHAR_NL &&
|
||||||
|
(re->flags & PCRE2_HASCRORLF) == 0 &&
|
||||||
|
(mb->nltype == NLTYPE_ANY ||
|
||||||
|
mb->nltype == NLTYPE_ANYCRLF ||
|
||||||
|
mb->nllen == 2))
|
||||||
|
start_match++;
|
||||||
|
|
||||||
|
} /* "Bumpalong" loop */
|
||||||
|
|
||||||
|
NOMATCH_EXIT:
|
||||||
|
rc = PCRE2_ERROR_NOMATCH;
|
||||||
|
|
||||||
|
EXIT:
|
||||||
|
while (rws->next != NULL)
|
||||||
|
{
|
||||||
|
RWS_anchor *next = rws->next;
|
||||||
|
rws->next = next->next;
|
||||||
|
mb->memctl.free(next, mb->memctl.memory_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* These #undefs are here to enable unity builds with CMake. */
|
||||||
|
|
||||||
|
#undef NLBLOCK /* Block containing newline information */
|
||||||
|
#undef PSSTART /* Field containing processed string start */
|
||||||
|
#undef PSEND /* Field containing processed string end */
|
||||||
|
|
||||||
|
/* End of pcre2_dfa_match.c */
|
||||||
297
3rd/pcre2/src/pcre2_dftables.c
Normal file
297
3rd/pcre2/src/pcre2_dftables.c
Normal file
@@ -0,0 +1,297 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This is a freestanding support program to generate a file containing
|
||||||
|
character tables for PCRE2. The tables are built using the pcre2_maketables()
|
||||||
|
function, which is part of the PCRE2 API. By default, the system's "C" locale
|
||||||
|
is used rather than what the building user happens to have set, but the -L
|
||||||
|
option can be used to select the current locale from the LC_ALL environment
|
||||||
|
variable. By default, the tables are written in source form, but if -b is
|
||||||
|
given, they are written in binary. */
|
||||||
|
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <locale.h>
|
||||||
|
|
||||||
|
#define PCRE2_DFTABLES /* for pcre2_internal.h, pcre2_maketables.c */
|
||||||
|
|
||||||
|
#define PCRE2_CODE_UNIT_WIDTH 0 /* Must be set, but not relevant here */
|
||||||
|
#include "pcre2_internal.h"
|
||||||
|
|
||||||
|
#include "pcre2_maketables.c"
|
||||||
|
|
||||||
|
|
||||||
|
static const char *classlist[] =
|
||||||
|
{
|
||||||
|
"space", "xdigit", "digit", "upper", "lower",
|
||||||
|
"word", "graph", "print", "punct", "cntrl"
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Usage *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
static void
|
||||||
|
usage(void)
|
||||||
|
{
|
||||||
|
(void)fprintf(stderr,
|
||||||
|
"Usage: pcre2_dftables [options] <output file>\n"
|
||||||
|
" -b Write output in binary (default is source code)\n"
|
||||||
|
" -L Use locale from LC_ALL (default is \"C\" locale)\n"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Entry point *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
FILE *f;
|
||||||
|
int i;
|
||||||
|
int nclass = 0;
|
||||||
|
BOOL binary = FALSE;
|
||||||
|
char *env = (char *)"C";
|
||||||
|
const uint8_t *tables;
|
||||||
|
const uint8_t *base_of_tables;
|
||||||
|
|
||||||
|
/* Process options */
|
||||||
|
|
||||||
|
for (i = 1; i < argc; i++)
|
||||||
|
{
|
||||||
|
char *arg = argv[i];
|
||||||
|
if (*arg != '-') break;
|
||||||
|
|
||||||
|
if (strcmp(arg, "-help") == 0 || strcmp(arg, "--help") == 0)
|
||||||
|
{
|
||||||
|
usage();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (strcmp(arg, "-L") == 0)
|
||||||
|
{
|
||||||
|
if (setlocale(LC_ALL, "") == NULL)
|
||||||
|
{
|
||||||
|
(void)fprintf(stderr, "pcre2_dftables: setlocale() failed\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
env = getenv("LC_ALL");
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (strcmp(arg, "-b") == 0)
|
||||||
|
binary = TRUE;
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
(void)fprintf(stderr, "pcre2_dftables: unrecognized option %s\n", arg);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i != argc - 1)
|
||||||
|
{
|
||||||
|
(void)fprintf(stderr, "pcre2_dftables: one filename argument is required\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Make the tables */
|
||||||
|
|
||||||
|
tables = maketables();
|
||||||
|
base_of_tables = tables;
|
||||||
|
|
||||||
|
f = fopen(argv[i], "wb");
|
||||||
|
if (f == NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "pcre2_dftables: failed to open %s for writing\n", argv[1]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If -b was specified, we write the tables in binary. */
|
||||||
|
|
||||||
|
if (binary)
|
||||||
|
{
|
||||||
|
int yield = 0;
|
||||||
|
size_t len = fwrite(tables, 1, TABLES_LENGTH, f);
|
||||||
|
if (len != TABLES_LENGTH)
|
||||||
|
{
|
||||||
|
(void)fprintf(stderr, "pcre2_dftables: fwrite() returned wrong length %d "
|
||||||
|
"instead of %d\n", (int)len, TABLES_LENGTH);
|
||||||
|
yield = 1;
|
||||||
|
}
|
||||||
|
fclose(f);
|
||||||
|
free((void *)base_of_tables);
|
||||||
|
return yield;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Write the tables as source code for inclusion in the PCRE2 library. There
|
||||||
|
are several fprintf() calls here, because gcc in pedantic mode complains about
|
||||||
|
the very long string otherwise. */
|
||||||
|
|
||||||
|
(void)fprintf(f,
|
||||||
|
"/*************************************************\n"
|
||||||
|
"* Perl-Compatible Regular Expressions *\n"
|
||||||
|
"*************************************************/\n\n"
|
||||||
|
"/* This file was automatically written by the pcre2_dftables auxiliary\n"
|
||||||
|
"program. It contains character tables that are used when no external\n"
|
||||||
|
"tables are passed to PCRE2 by the application that calls it. The tables\n"
|
||||||
|
"are used only for characters whose code values are less than 256, and\n"
|
||||||
|
"only relevant if not in UCP mode. */\n\n");
|
||||||
|
|
||||||
|
(void)fprintf(f,
|
||||||
|
"/* This set of tables was written in the %s locale. */\n\n", env);
|
||||||
|
|
||||||
|
(void)fprintf(f,
|
||||||
|
"/* The pcre2_ftables program (which is distributed with PCRE2) can be used\n"
|
||||||
|
"to build alternative versions of this file. This is necessary if you are\n"
|
||||||
|
"running in an EBCDIC environment, or if you want to default to a different\n"
|
||||||
|
"encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates\n"
|
||||||
|
"these tables in the \"C\" locale by default. This happens automatically if\n"
|
||||||
|
"PCRE2 is configured with --enable-rebuild-chartables. However, you can run\n"
|
||||||
|
"pcre2_dftables manually with the -L option to build tables using the LC_ALL\n"
|
||||||
|
"locale. */\n\n");
|
||||||
|
|
||||||
|
/* Force config.h in z/OS */
|
||||||
|
|
||||||
|
#if defined NATIVE_ZOS
|
||||||
|
(void)fprintf(f,
|
||||||
|
"/* For z/OS, config.h is forced */\n"
|
||||||
|
"#ifndef HAVE_CONFIG_H\n"
|
||||||
|
"#define HAVE_CONFIG_H 1\n"
|
||||||
|
"#endif\n\n");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
(void)fprintf(f,
|
||||||
|
"#ifdef HAVE_CONFIG_H\n"
|
||||||
|
"#include \"config.h\"\n"
|
||||||
|
"#endif\n\n"
|
||||||
|
"#include \"pcre2_internal.h\"\n\n");
|
||||||
|
|
||||||
|
(void)fprintf(f,
|
||||||
|
"const uint8_t PRIV(default_tables)[] = {\n\n"
|
||||||
|
"/* This table is a lower casing table. */\n\n");
|
||||||
|
|
||||||
|
(void)fprintf(f, " ");
|
||||||
|
for (i = 0; i < 256; i++)
|
||||||
|
{
|
||||||
|
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||||
|
fprintf(f, "%3d", *tables++);
|
||||||
|
if (i != 255) fprintf(f, ",");
|
||||||
|
}
|
||||||
|
(void)fprintf(f, ",\n\n");
|
||||||
|
|
||||||
|
(void)fprintf(f, "/* This table is a case flipping table. */\n\n");
|
||||||
|
|
||||||
|
(void)fprintf(f, " ");
|
||||||
|
for (i = 0; i < 256; i++)
|
||||||
|
{
|
||||||
|
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||||
|
fprintf(f, "%3d", *tables++);
|
||||||
|
if (i != 255) fprintf(f, ",");
|
||||||
|
}
|
||||||
|
(void)fprintf(f, ",\n\n");
|
||||||
|
|
||||||
|
(void)fprintf(f,
|
||||||
|
"/* This table contains bit maps for various character classes. Each map is 32\n"
|
||||||
|
"bytes long and the bits run from the least significant end of each byte. The\n"
|
||||||
|
"classes that have their own maps are: space, xdigit, digit, upper, lower, word,\n"
|
||||||
|
"graph, print, punct, and cntrl. Other classes are built from combinations. */\n\n");
|
||||||
|
|
||||||
|
(void)fprintf(f, " ");
|
||||||
|
for (i = 0; i < cbit_length; i++)
|
||||||
|
{
|
||||||
|
if ((i & 7) == 0 && i != 0)
|
||||||
|
{
|
||||||
|
if ((i & 31) == 0) (void)fprintf(f, "\n");
|
||||||
|
if ((i & 24) == 8) (void)fprintf(f, " /* %s */", classlist[nclass++]);
|
||||||
|
(void)fprintf(f, "\n ");
|
||||||
|
}
|
||||||
|
(void)fprintf(f, "0x%02x", *tables++);
|
||||||
|
if (i != cbit_length - 1) (void)fprintf(f, ",");
|
||||||
|
}
|
||||||
|
(void)fprintf(f, ",\n\n");
|
||||||
|
|
||||||
|
(void)fprintf(f,
|
||||||
|
"/* This table identifies various classes of character by individual bits:\n"
|
||||||
|
" 0x%02x white space character\n"
|
||||||
|
" 0x%02x letter\n"
|
||||||
|
" 0x%02x lower case letter\n"
|
||||||
|
" 0x%02x decimal digit\n"
|
||||||
|
" 0x%02x word (alphanumeric or '_')\n*/\n\n",
|
||||||
|
ctype_space, ctype_letter, ctype_lcletter, ctype_digit, ctype_word);
|
||||||
|
|
||||||
|
(void)fprintf(f, " ");
|
||||||
|
for (i = 0; i < 256; i++)
|
||||||
|
{
|
||||||
|
if ((i & 7) == 0 && i != 0)
|
||||||
|
{
|
||||||
|
(void)fprintf(f, " /* ");
|
||||||
|
if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
|
||||||
|
else (void)fprintf(f, "%3d-", i-8);
|
||||||
|
if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
|
||||||
|
else (void)fprintf(f, "%3d", i-1);
|
||||||
|
(void)fprintf(f, " */\n ");
|
||||||
|
}
|
||||||
|
(void)fprintf(f, "0x%02x", *tables++);
|
||||||
|
if (i != 255) (void)fprintf(f, ",");
|
||||||
|
}
|
||||||
|
|
||||||
|
(void)fprintf(f, "};/* ");
|
||||||
|
if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
|
||||||
|
else (void)fprintf(f, "%3d-", i-8);
|
||||||
|
if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
|
||||||
|
else (void)fprintf(f, "%3d", i-1);
|
||||||
|
(void)fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n");
|
||||||
|
|
||||||
|
fclose(f);
|
||||||
|
free((void *)base_of_tables);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre2_dftables.c */
|
||||||
367
3rd/pcre2/src/pcre2_error.c
Normal file
367
3rd/pcre2/src/pcre2_error.c
Normal file
@@ -0,0 +1,367 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "pcre2_internal.h"
|
||||||
|
|
||||||
|
#define STRING(a) # a
|
||||||
|
#define XSTRING(s) STRING(s)
|
||||||
|
|
||||||
|
/* The texts of compile-time error messages. Compile-time error numbers start
|
||||||
|
at COMPILE_ERROR_BASE (100).
|
||||||
|
|
||||||
|
This used to be a table of strings, but in order to reduce the number of
|
||||||
|
relocations needed when a shared library is loaded dynamically, it is now one
|
||||||
|
long string. We cannot use a table of offsets, because the lengths of inserts
|
||||||
|
such as XSTRING(MAX_NAME_SIZE) are not known. Instead,
|
||||||
|
pcre2_get_error_message() counts through to the one it wants - this isn't a
|
||||||
|
performance issue because these strings are used only when there is an error.
|
||||||
|
|
||||||
|
Each substring ends with \0 to insert a null character. This includes the final
|
||||||
|
substring, so that the whole string ends with \0\0, which can be detected when
|
||||||
|
counting through. */
|
||||||
|
|
||||||
|
static const unsigned char compile_error_texts[] =
|
||||||
|
"no error\0"
|
||||||
|
"\\ at end of pattern\0"
|
||||||
|
"\\c at end of pattern\0"
|
||||||
|
"unrecognized character follows \\\0"
|
||||||
|
"numbers out of order in {} quantifier\0"
|
||||||
|
/* 5 */
|
||||||
|
"number too big in {} quantifier\0"
|
||||||
|
"missing terminating ] for character class\0"
|
||||||
|
"escape sequence is invalid in character class\0"
|
||||||
|
"range out of order in character class\0"
|
||||||
|
"quantifier does not follow a repeatable item\0"
|
||||||
|
/* 10 */
|
||||||
|
"internal error: unexpected repeat\0"
|
||||||
|
"unrecognized character after (? or (?-\0"
|
||||||
|
"POSIX named classes are supported only within a class\0"
|
||||||
|
"POSIX collating elements are not supported\0"
|
||||||
|
"missing closing parenthesis\0"
|
||||||
|
/* 15 */
|
||||||
|
"reference to non-existent subpattern\0"
|
||||||
|
"pattern passed as NULL with non-zero length\0"
|
||||||
|
"unrecognised compile-time option bit(s)\0"
|
||||||
|
"missing ) after (?# comment\0"
|
||||||
|
"parentheses are too deeply nested\0"
|
||||||
|
/* 20 */
|
||||||
|
"regular expression is too large\0"
|
||||||
|
"failed to allocate heap memory\0"
|
||||||
|
"unmatched closing parenthesis\0"
|
||||||
|
"internal error: code overflow\0"
|
||||||
|
"missing closing parenthesis for condition\0"
|
||||||
|
/* 25 */
|
||||||
|
"length of lookbehind assertion is not limited\0"
|
||||||
|
"a relative value of zero is not allowed\0"
|
||||||
|
"conditional subpattern contains more than two branches\0"
|
||||||
|
"atomic assertion expected after (?( or (?(?C)\0"
|
||||||
|
"digit expected after (?+ or (?-\0"
|
||||||
|
/* 30 */
|
||||||
|
"unknown POSIX class name\0"
|
||||||
|
"internal error in pcre2_study(): should not occur\0"
|
||||||
|
"this version of PCRE2 does not have Unicode support\0"
|
||||||
|
"parentheses are too deeply nested (stack check)\0"
|
||||||
|
"character code point value in \\x{} or \\o{} is too large\0"
|
||||||
|
/* 35 */
|
||||||
|
"lookbehind is too complicated\0"
|
||||||
|
"\\C is not allowed in a lookbehind assertion in UTF-" XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0"
|
||||||
|
"PCRE2 does not support \\F, \\L, \\l, \\N{name}, \\U, or \\u\0"
|
||||||
|
"number after (?C is greater than 255\0"
|
||||||
|
"closing parenthesis for (?C expected\0"
|
||||||
|
/* 40 */
|
||||||
|
"invalid escape sequence in (*VERB) name\0"
|
||||||
|
"unrecognized character after (?P\0"
|
||||||
|
"syntax error in subpattern name (missing terminator?)\0"
|
||||||
|
"two named subpatterns have the same name (PCRE2_DUPNAMES not set)\0"
|
||||||
|
"subpattern name must start with a non-digit\0"
|
||||||
|
/* 45 */
|
||||||
|
"this version of PCRE2 does not have support for \\P, \\p, or \\X\0"
|
||||||
|
"malformed \\P or \\p sequence\0"
|
||||||
|
"unknown property after \\P or \\p\0"
|
||||||
|
"subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " code units)\0"
|
||||||
|
"too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
|
||||||
|
/* 50 */
|
||||||
|
"invalid range in character class\0"
|
||||||
|
"octal value is greater than \\377 in 8-bit non-UTF-8 mode\0"
|
||||||
|
"internal error: overran compiling workspace\0"
|
||||||
|
"internal error: previously-checked referenced subpattern not found\0"
|
||||||
|
"DEFINE subpattern contains more than one branch\0"
|
||||||
|
/* 55 */
|
||||||
|
"missing opening brace after \\o\0"
|
||||||
|
"internal error: unknown newline setting\0"
|
||||||
|
"\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
|
||||||
|
"(?R (recursive pattern call) must be followed by a closing parenthesis\0"
|
||||||
|
/* "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" */
|
||||||
|
"obsolete error (should not occur)\0" /* Was the above */
|
||||||
|
/* 60 */
|
||||||
|
"(*VERB) not recognized or malformed\0"
|
||||||
|
"subpattern number is too big\0"
|
||||||
|
"subpattern name expected\0"
|
||||||
|
"internal error: parsed pattern overflow\0"
|
||||||
|
"non-octal character in \\o{} (closing brace missing?)\0"
|
||||||
|
/* 65 */
|
||||||
|
"different names for subpatterns of the same number are not allowed\0"
|
||||||
|
"(*MARK) must have an argument\0"
|
||||||
|
"non-hex character in \\x{} (closing brace missing?)\0"
|
||||||
|
#ifndef EBCDIC
|
||||||
|
"\\c must be followed by a printable ASCII character\0"
|
||||||
|
#else
|
||||||
|
"\\c must be followed by a letter or one of [\\]^_?\0"
|
||||||
|
#endif
|
||||||
|
"\\k is not followed by a braced, angle-bracketed, or quoted name\0"
|
||||||
|
/* 70 */
|
||||||
|
"internal error: unknown meta code in check_lookbehinds()\0"
|
||||||
|
"\\N is not supported in a class\0"
|
||||||
|
"callout string is too long\0"
|
||||||
|
"disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
|
||||||
|
"using UTF is disabled by the application\0"
|
||||||
|
/* 75 */
|
||||||
|
"using UCP is disabled by the application\0"
|
||||||
|
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
|
||||||
|
"character code point value in \\u.... sequence is too large\0"
|
||||||
|
"digits missing after \\x or in \\x{} or \\o{} or \\N{U+}\0"
|
||||||
|
"syntax error or number too big in (?(VERSION condition\0"
|
||||||
|
/* 80 */
|
||||||
|
"internal error: unknown opcode in auto_possessify()\0"
|
||||||
|
"missing terminating delimiter for callout with string argument\0"
|
||||||
|
"unrecognized string delimiter follows (?C\0"
|
||||||
|
"using \\C is disabled by the application\0"
|
||||||
|
"(?| and/or (?J: or (?x: parentheses are too deeply nested\0"
|
||||||
|
/* 85 */
|
||||||
|
"using \\C is disabled in this PCRE2 library\0"
|
||||||
|
"regular expression is too complicated\0"
|
||||||
|
"lookbehind assertion is too long\0"
|
||||||
|
"pattern string is longer than the limit set by the application\0"
|
||||||
|
"internal error: unknown code in parsed pattern\0"
|
||||||
|
/* 90 */
|
||||||
|
"internal error: bad code value in parsed_skip()\0"
|
||||||
|
"PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0"
|
||||||
|
"invalid option bits with PCRE2_LITERAL\0"
|
||||||
|
"\\N{U+dddd} is supported only in Unicode (UTF) mode\0"
|
||||||
|
"invalid hyphen in option setting\0"
|
||||||
|
/* 95 */
|
||||||
|
"(*alpha_assertion) not recognized\0"
|
||||||
|
"script runs require Unicode support, which this version of PCRE2 does not have\0"
|
||||||
|
"too many capturing groups (maximum 65535)\0"
|
||||||
|
"octal digit missing after \\0 (PCRE2_EXTRA_NO_BS0 is set)\0"
|
||||||
|
"\\K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)\0"
|
||||||
|
/* 100 */
|
||||||
|
"branch too long in variable-length lookbehind assertion\0"
|
||||||
|
"compiled pattern would be longer than the limit set by the application\0"
|
||||||
|
"octal value given by \\ddd is greater than \\377 (forbidden by PCRE2_EXTRA_PYTHON_OCTAL)\0"
|
||||||
|
"using callouts is disabled by the application\0"
|
||||||
|
"PCRE2_EXTRA_TURKISH_CASING require Unicode (UTF or UCP) mode\0"
|
||||||
|
/* 105 */
|
||||||
|
"PCRE2_EXTRA_TURKISH_CASING requires UTF in 8-bit mode\0"
|
||||||
|
"PCRE2_EXTRA_TURKISH_CASING and PCRE2_EXTRA_CASELESS_RESTRICT are not compatible\0"
|
||||||
|
"extended character class nesting is too deep\0"
|
||||||
|
"invalid operator in extended character class\0"
|
||||||
|
"unexpected operator in extended character class (no preceding operand)\0"
|
||||||
|
/* 110 */
|
||||||
|
"expected operand after operator in extended character class\0"
|
||||||
|
"square brackets needed to clarify operator precedence in extended character class\0"
|
||||||
|
"missing terminating ] for extended character class (note '[' must be escaped under PCRE2_ALT_EXTENDED_CLASS)\0"
|
||||||
|
"unexpected expression in extended character class (no preceding operator)\0"
|
||||||
|
"empty expression in extended character class\0"
|
||||||
|
/* 115 */
|
||||||
|
"terminating ] with no following closing parenthesis in (?[...]\0"
|
||||||
|
"unexpected character in (?[...]) extended character class\0"
|
||||||
|
;
|
||||||
|
|
||||||
|
/* Match-time and UTF error texts are in the same format. */
|
||||||
|
|
||||||
|
static const unsigned char match_error_texts[] =
|
||||||
|
"no error\0"
|
||||||
|
"no match\0"
|
||||||
|
"partial match\0"
|
||||||
|
"UTF-8 error: 1 byte missing at end\0"
|
||||||
|
"UTF-8 error: 2 bytes missing at end\0"
|
||||||
|
/* 5 */
|
||||||
|
"UTF-8 error: 3 bytes missing at end\0"
|
||||||
|
"UTF-8 error: 4 bytes missing at end\0"
|
||||||
|
"UTF-8 error: 5 bytes missing at end\0"
|
||||||
|
"UTF-8 error: byte 2 top bits not 0x80\0"
|
||||||
|
"UTF-8 error: byte 3 top bits not 0x80\0"
|
||||||
|
/* 10 */
|
||||||
|
"UTF-8 error: byte 4 top bits not 0x80\0"
|
||||||
|
"UTF-8 error: byte 5 top bits not 0x80\0"
|
||||||
|
"UTF-8 error: byte 6 top bits not 0x80\0"
|
||||||
|
"UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"
|
||||||
|
"UTF-8 error: 6-byte character is not allowed (RFC 3629)\0"
|
||||||
|
/* 15 */
|
||||||
|
"UTF-8 error: code points greater than 0x10ffff are not defined\0"
|
||||||
|
"UTF-8 error: code points 0xd800-0xdfff are not defined\0"
|
||||||
|
"UTF-8 error: overlong 2-byte sequence\0"
|
||||||
|
"UTF-8 error: overlong 3-byte sequence\0"
|
||||||
|
"UTF-8 error: overlong 4-byte sequence\0"
|
||||||
|
/* 20 */
|
||||||
|
"UTF-8 error: overlong 5-byte sequence\0"
|
||||||
|
"UTF-8 error: overlong 6-byte sequence\0"
|
||||||
|
"UTF-8 error: isolated byte with 0x80 bit set\0"
|
||||||
|
"UTF-8 error: illegal byte (0xfe or 0xff)\0"
|
||||||
|
"UTF-16 error: missing low surrogate at end\0"
|
||||||
|
/* 25 */
|
||||||
|
"UTF-16 error: invalid low surrogate\0"
|
||||||
|
"UTF-16 error: isolated low surrogate\0"
|
||||||
|
"UTF-32 error: code points 0xd800-0xdfff are not defined\0"
|
||||||
|
"UTF-32 error: code points greater than 0x10ffff are not defined\0"
|
||||||
|
"bad data value\0"
|
||||||
|
/* 30 */
|
||||||
|
"patterns do not all use the same character tables\0"
|
||||||
|
"magic number missing\0"
|
||||||
|
"pattern compiled in wrong mode: 8/16/32-bit error\0"
|
||||||
|
"bad offset value\0"
|
||||||
|
"bad option value\0"
|
||||||
|
/* 35 */
|
||||||
|
"invalid replacement string\0"
|
||||||
|
"bad offset into UTF string\0"
|
||||||
|
"callout error code\0" /* Never returned by PCRE2 itself */
|
||||||
|
"invalid data in workspace for DFA restart\0"
|
||||||
|
"too much recursion for DFA matching\0"
|
||||||
|
/* 40 */
|
||||||
|
"backreference condition or recursion test is not supported for DFA matching\0"
|
||||||
|
"function is not supported for DFA matching\0"
|
||||||
|
"pattern contains an item that is not supported for DFA matching\0"
|
||||||
|
"workspace size exceeded in DFA matching\0"
|
||||||
|
"internal error - pattern overwritten?\0"
|
||||||
|
/* 45 */
|
||||||
|
"bad JIT option\0"
|
||||||
|
"JIT stack limit reached\0"
|
||||||
|
"match limit exceeded\0"
|
||||||
|
"no more memory\0"
|
||||||
|
"unknown substring\0"
|
||||||
|
/* 50 */
|
||||||
|
"non-unique substring name\0"
|
||||||
|
"NULL argument passed with non-zero length\0"
|
||||||
|
"nested recursion at the same subject position\0"
|
||||||
|
"matching depth limit exceeded\0"
|
||||||
|
"requested value is not available\0"
|
||||||
|
/* 55 */
|
||||||
|
"requested value is not set\0"
|
||||||
|
"offset limit set without PCRE2_USE_OFFSET_LIMIT\0"
|
||||||
|
"bad escape sequence in replacement string\0"
|
||||||
|
"expected closing curly bracket in replacement string\0"
|
||||||
|
"bad substitution in replacement string\0"
|
||||||
|
/* 60 */
|
||||||
|
"match with end before start or start moved backwards is not supported\0"
|
||||||
|
"too many replacements (more than INT_MAX)\0"
|
||||||
|
"bad serialized data\0"
|
||||||
|
"heap limit exceeded\0"
|
||||||
|
"invalid syntax\0"
|
||||||
|
/* 65 */
|
||||||
|
"internal error - duplicate substitution match\0"
|
||||||
|
"PCRE2_MATCH_INVALID_UTF is not supported for DFA matching\0"
|
||||||
|
"INTERNAL ERROR: invalid substring offset\0"
|
||||||
|
"feature is not supported by the JIT compiler\0"
|
||||||
|
"error performing replacement case transformation\0"
|
||||||
|
/* 70 */
|
||||||
|
"replacement too large (longer than PCRE2_SIZE)\0"
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Return error message *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function copies an error message into a buffer whose units are of an
|
||||||
|
appropriate width. Error numbers are positive for compile-time errors, and
|
||||||
|
negative for match-time errors (except for UTF errors), but the numbers are all
|
||||||
|
distinct.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
enumber error number
|
||||||
|
buffer where to put the message (zero terminated)
|
||||||
|
size size of the buffer in code units
|
||||||
|
|
||||||
|
Returns: length of message if all is well
|
||||||
|
negative on error
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, PCRE2_SIZE size)
|
||||||
|
{
|
||||||
|
const unsigned char *message;
|
||||||
|
PCRE2_SIZE i;
|
||||||
|
int n;
|
||||||
|
|
||||||
|
if (size == 0) return PCRE2_ERROR_NOMEMORY;
|
||||||
|
|
||||||
|
if (enumber >= COMPILE_ERROR_BASE) /* Compile error */
|
||||||
|
{
|
||||||
|
message = compile_error_texts;
|
||||||
|
n = enumber - COMPILE_ERROR_BASE;
|
||||||
|
}
|
||||||
|
else if (enumber < 0) /* Match or UTF error */
|
||||||
|
{
|
||||||
|
message = match_error_texts;
|
||||||
|
n = -enumber;
|
||||||
|
}
|
||||||
|
else /* Invalid error number */
|
||||||
|
{
|
||||||
|
message = (const unsigned char *)"\0"; /* Empty message list */
|
||||||
|
n = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; n > 0; n--)
|
||||||
|
{
|
||||||
|
while (*message++ != CHAR_NUL) {};
|
||||||
|
if (*message == CHAR_NUL) return PCRE2_ERROR_BADDATA;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; *message != 0; i++)
|
||||||
|
{
|
||||||
|
if (i >= size - 1)
|
||||||
|
{
|
||||||
|
buffer[i] = 0; /* Terminate partial message */
|
||||||
|
return PCRE2_ERROR_NOMEMORY;
|
||||||
|
}
|
||||||
|
buffer[i] = *message++;
|
||||||
|
}
|
||||||
|
|
||||||
|
buffer[i] = 0;
|
||||||
|
return (int)i;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre2_error.c */
|
||||||
162
3rd/pcre2/src/pcre2_extuni.c
Normal file
162
3rd/pcre2/src/pcre2_extuni.c
Normal file
@@ -0,0 +1,162 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* This module contains an internal function that is used to match a Unicode
|
||||||
|
extended grapheme sequence. It is used by both pcre2_match() and
|
||||||
|
pcre2_dfa_match(). However, it is called only when Unicode support is being
|
||||||
|
compiled. Nevertheless, we provide a dummy function when there is no Unicode
|
||||||
|
support, because some compilers do not like functionless source files. */
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#include "pcre2_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/* Dummy function */
|
||||||
|
|
||||||
|
#ifndef SUPPORT_UNICODE
|
||||||
|
PCRE2_SPTR
|
||||||
|
PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
|
||||||
|
PCRE2_SPTR end_subject, BOOL utf, int *xcount)
|
||||||
|
{
|
||||||
|
(void)c;
|
||||||
|
(void)eptr;
|
||||||
|
(void)start_subject;
|
||||||
|
(void)end_subject;
|
||||||
|
(void)utf;
|
||||||
|
(void)xcount;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Match an extended grapheme sequence *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* NOTE: The logic contained in this function is replicated in three special-
|
||||||
|
purpose functions in the pcre2_jit_compile.c module. If the logic below is
|
||||||
|
changed, they must be kept in step so that the interpreter and the JIT have the
|
||||||
|
same behaviour.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
c the first character
|
||||||
|
eptr pointer to next character
|
||||||
|
start_subject pointer to start of subject
|
||||||
|
end_subject pointer to end of subject
|
||||||
|
utf TRUE if in UTF mode
|
||||||
|
xcount pointer to count of additional characters,
|
||||||
|
or NULL if count not needed
|
||||||
|
|
||||||
|
Returns: pointer after the end of the sequence
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE2_SPTR
|
||||||
|
PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
|
||||||
|
PCRE2_SPTR end_subject, BOOL utf, int *xcount)
|
||||||
|
{
|
||||||
|
BOOL was_ep_ZWJ = FALSE;
|
||||||
|
int lgb = UCD_GRAPHBREAK(c);
|
||||||
|
|
||||||
|
while (eptr < end_subject)
|
||||||
|
{
|
||||||
|
int rgb;
|
||||||
|
int len = 1;
|
||||||
|
if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
|
||||||
|
rgb = UCD_GRAPHBREAK(c);
|
||||||
|
if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
|
||||||
|
|
||||||
|
/* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
|
||||||
|
preceded by Extended Pictographic. */
|
||||||
|
|
||||||
|
if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Not breaking between Regional Indicators is allowed only if there
|
||||||
|
are an even number of preceding RIs. */
|
||||||
|
|
||||||
|
if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
|
||||||
|
{
|
||||||
|
int ricount = 0;
|
||||||
|
PCRE2_SPTR bptr = eptr - 1;
|
||||||
|
if (utf) BACKCHAR(bptr);
|
||||||
|
|
||||||
|
/* bptr is pointing to the left-hand character */
|
||||||
|
|
||||||
|
while (bptr > start_subject)
|
||||||
|
{
|
||||||
|
bptr--;
|
||||||
|
if (utf)
|
||||||
|
{
|
||||||
|
BACKCHAR(bptr);
|
||||||
|
GETCHAR(c, bptr);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
c = *bptr;
|
||||||
|
if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
|
||||||
|
ricount++;
|
||||||
|
}
|
||||||
|
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
|
||||||
|
between; see next statement). */
|
||||||
|
|
||||||
|
was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
|
||||||
|
|
||||||
|
/* If Extend follows Extended_Pictographic, do not update lgb; this allows
|
||||||
|
any number of them before a following ZWJ. */
|
||||||
|
|
||||||
|
if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) lgb = rgb;
|
||||||
|
|
||||||
|
eptr += len;
|
||||||
|
if (xcount != NULL) *xcount += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return eptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
/* End of pcre2_extuni.c */
|
||||||
220
3rd/pcre2/src/pcre2_find_bracket.c
Normal file
220
3rd/pcre2/src/pcre2_find_bracket.c
Normal file
@@ -0,0 +1,220 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains a single function that scans through a compiled pattern
|
||||||
|
until it finds a capturing bracket with the given number, or, if the number is
|
||||||
|
negative, an instance of OP_REVERSE or OP_VREVERSE for a lookbehind. The
|
||||||
|
function is called from pcre2_compile.c and also from pcre2_study.c when
|
||||||
|
finding the minimum matching length. */
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "pcre2_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Scan compiled regex for specific bracket *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/*
|
||||||
|
Arguments:
|
||||||
|
code points to start of expression
|
||||||
|
utf TRUE in UTF mode
|
||||||
|
number the required bracket number or negative to find a lookbehind
|
||||||
|
|
||||||
|
Returns: pointer to the opcode for the bracket, or NULL if not found
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE2_SPTR
|
||||||
|
PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
|
||||||
|
{
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
PCRE2_UCHAR c = *code;
|
||||||
|
|
||||||
|
if (c == OP_END) return NULL;
|
||||||
|
|
||||||
|
/* XCLASS is used for classes that cannot be represented just by a bit map.
|
||||||
|
This includes negated single high-valued characters. ECLASS is used for
|
||||||
|
classes that use set operations internally. CALLOUT_STR is used for
|
||||||
|
callouts with string arguments. In each case the length in the table is
|
||||||
|
zero; the actual length is stored in the compiled code. */
|
||||||
|
|
||||||
|
if (c == OP_XCLASS || c == OP_ECLASS) code += GET(code, 1);
|
||||||
|
else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
|
||||||
|
|
||||||
|
/* Handle lookbehind */
|
||||||
|
|
||||||
|
else if (c == OP_REVERSE || c == OP_VREVERSE)
|
||||||
|
{
|
||||||
|
if (number < 0) return code;
|
||||||
|
code += PRIV(OP_lengths)[c];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Handle capturing bracket */
|
||||||
|
|
||||||
|
else if (c == OP_CBRA || c == OP_SCBRA ||
|
||||||
|
c == OP_CBRAPOS || c == OP_SCBRAPOS)
|
||||||
|
{
|
||||||
|
int n = (int)GET2(code, 1+LINK_SIZE);
|
||||||
|
if (n == number) return code;
|
||||||
|
code += PRIV(OP_lengths)[c];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Otherwise, we can get the item's length from the table, except that for
|
||||||
|
repeated character types, we have to test for \p and \P, which have an extra
|
||||||
|
two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
|
||||||
|
must add in its length. */
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
switch(c)
|
||||||
|
{
|
||||||
|
case OP_TYPESTAR:
|
||||||
|
case OP_TYPEMINSTAR:
|
||||||
|
case OP_TYPEPLUS:
|
||||||
|
case OP_TYPEMINPLUS:
|
||||||
|
case OP_TYPEQUERY:
|
||||||
|
case OP_TYPEMINQUERY:
|
||||||
|
case OP_TYPEPOSSTAR:
|
||||||
|
case OP_TYPEPOSPLUS:
|
||||||
|
case OP_TYPEPOSQUERY:
|
||||||
|
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_TYPEUPTO:
|
||||||
|
case OP_TYPEMINUPTO:
|
||||||
|
case OP_TYPEEXACT:
|
||||||
|
case OP_TYPEPOSUPTO:
|
||||||
|
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
|
||||||
|
code += 2;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_MARK:
|
||||||
|
case OP_COMMIT_ARG:
|
||||||
|
case OP_PRUNE_ARG:
|
||||||
|
case OP_SKIP_ARG:
|
||||||
|
case OP_THEN_ARG:
|
||||||
|
code += code[1];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add in the fixed length from the table */
|
||||||
|
|
||||||
|
code += PRIV(OP_lengths)[c];
|
||||||
|
|
||||||
|
/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
|
||||||
|
followed by a multi-byte character. The length in the table is a minimum, so
|
||||||
|
we have to arrange to skip the extra bytes. */
|
||||||
|
|
||||||
|
#ifdef MAYBE_UTF_MULTI
|
||||||
|
if (utf) switch(c)
|
||||||
|
{
|
||||||
|
case OP_CHAR:
|
||||||
|
case OP_CHARI:
|
||||||
|
case OP_NOT:
|
||||||
|
case OP_NOTI:
|
||||||
|
case OP_EXACT:
|
||||||
|
case OP_EXACTI:
|
||||||
|
case OP_NOTEXACT:
|
||||||
|
case OP_NOTEXACTI:
|
||||||
|
case OP_UPTO:
|
||||||
|
case OP_UPTOI:
|
||||||
|
case OP_NOTUPTO:
|
||||||
|
case OP_NOTUPTOI:
|
||||||
|
case OP_MINUPTO:
|
||||||
|
case OP_MINUPTOI:
|
||||||
|
case OP_NOTMINUPTO:
|
||||||
|
case OP_NOTMINUPTOI:
|
||||||
|
case OP_POSUPTO:
|
||||||
|
case OP_POSUPTOI:
|
||||||
|
case OP_NOTPOSUPTO:
|
||||||
|
case OP_NOTPOSUPTOI:
|
||||||
|
case OP_STAR:
|
||||||
|
case OP_STARI:
|
||||||
|
case OP_NOTSTAR:
|
||||||
|
case OP_NOTSTARI:
|
||||||
|
case OP_MINSTAR:
|
||||||
|
case OP_MINSTARI:
|
||||||
|
case OP_NOTMINSTAR:
|
||||||
|
case OP_NOTMINSTARI:
|
||||||
|
case OP_POSSTAR:
|
||||||
|
case OP_POSSTARI:
|
||||||
|
case OP_NOTPOSSTAR:
|
||||||
|
case OP_NOTPOSSTARI:
|
||||||
|
case OP_PLUS:
|
||||||
|
case OP_PLUSI:
|
||||||
|
case OP_NOTPLUS:
|
||||||
|
case OP_NOTPLUSI:
|
||||||
|
case OP_MINPLUS:
|
||||||
|
case OP_MINPLUSI:
|
||||||
|
case OP_NOTMINPLUS:
|
||||||
|
case OP_NOTMINPLUSI:
|
||||||
|
case OP_POSPLUS:
|
||||||
|
case OP_POSPLUSI:
|
||||||
|
case OP_NOTPOSPLUS:
|
||||||
|
case OP_NOTPOSPLUSI:
|
||||||
|
case OP_QUERY:
|
||||||
|
case OP_QUERYI:
|
||||||
|
case OP_NOTQUERY:
|
||||||
|
case OP_NOTQUERYI:
|
||||||
|
case OP_MINQUERY:
|
||||||
|
case OP_MINQUERYI:
|
||||||
|
case OP_NOTMINQUERY:
|
||||||
|
case OP_NOTMINQUERYI:
|
||||||
|
case OP_POSQUERY:
|
||||||
|
case OP_POSQUERYI:
|
||||||
|
case OP_NOTPOSQUERY:
|
||||||
|
case OP_NOTPOSQUERYI:
|
||||||
|
if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
(void)(utf); /* Keep compiler happy by referencing function argument */
|
||||||
|
#endif /* MAYBE_UTF_MULTI */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre2_find_bracket.c */
|
||||||
804
3rd/pcre2/src/pcre2_fuzzsupport.c
Normal file
804
3rd/pcre2/src/pcre2_fuzzsupport.c
Normal file
@@ -0,0 +1,804 @@
|
|||||||
|
/***************************************************************************
|
||||||
|
Fuzzer driver for PCRE2. Given an arbitrary string of bytes and a length, it
|
||||||
|
tries to compile and match it, deriving options from the string itself. If
|
||||||
|
STANDALONE is defined, a main program that calls the driver with the contents
|
||||||
|
of specified files is compiled, and commentary on what is happening is output.
|
||||||
|
If an argument starts with '=' the rest of it it is taken as a literal string
|
||||||
|
rather than a file name. This allows easy testing of short strings.
|
||||||
|
|
||||||
|
Written by Philip Hazel, October 2016
|
||||||
|
Updated February 2024 (Addison Crump added 16-bit/32-bit and JIT support)
|
||||||
|
Further updates March/April/May 2024 by PH
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
#include <errno.h>
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
/* stack size adjustment */
|
||||||
|
#include <sys/time.h>
|
||||||
|
#include <sys/resource.h>
|
||||||
|
|
||||||
|
#define STACK_SIZE_MB 256
|
||||||
|
#define JIT_SIZE_LIMIT (200 * 1024)
|
||||||
|
|
||||||
|
#ifndef PCRE2_CODE_UNIT_WIDTH
|
||||||
|
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
#include "pcre2.h"
|
||||||
|
#include "pcre2_internal.h"
|
||||||
|
|
||||||
|
#define MAX_MATCH_SIZE 1000
|
||||||
|
|
||||||
|
#define DFA_WORKSPACE_COUNT 100
|
||||||
|
|
||||||
|
/* When adding new compile or match options, remember to update the functions
|
||||||
|
below that output them. */
|
||||||
|
|
||||||
|
#define ALLOWED_COMPILE_OPTIONS \
|
||||||
|
(PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
|
||||||
|
PCRE2_ALT_EXTENDED_CLASS|PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT| \
|
||||||
|
PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
|
||||||
|
PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED| \
|
||||||
|
PCRE2_EXTENDED_MORE|PCRE2_FIRSTLINE| \
|
||||||
|
PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
|
||||||
|
PCRE2_NO_AUTO_CAPTURE| \
|
||||||
|
PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
|
||||||
|
PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
|
||||||
|
PCRE2_UTF)
|
||||||
|
|
||||||
|
#define ALLOWED_MATCH_OPTIONS \
|
||||||
|
(PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
|
||||||
|
PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
|
||||||
|
PCRE2_PARTIAL_SOFT)
|
||||||
|
|
||||||
|
#define BASE_MATCH_OPTIONS \
|
||||||
|
(PCRE2_NO_JIT|PCRE2_DISABLE_RECURSELOOP_CHECK)
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(SUPPORT_DIFF_FUZZ) || defined(STANDALONE)
|
||||||
|
static void print_compile_options(FILE *stream, uint32_t compile_options)
|
||||||
|
{
|
||||||
|
fprintf(stream, "Compile options %s%.8x =",
|
||||||
|
(compile_options == PCRE2_NEVER_BACKSLASH_C)? "(base) " : "",
|
||||||
|
compile_options);
|
||||||
|
|
||||||
|
fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
|
||||||
|
((compile_options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
|
||||||
|
((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
|
||||||
|
((compile_options & PCRE2_ALT_EXTENDED_CLASS) != 0)? "alt_extended_class" : "",
|
||||||
|
((compile_options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
|
||||||
|
((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
|
||||||
|
((compile_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
|
||||||
|
((compile_options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
|
||||||
|
((compile_options & PCRE2_CASELESS) != 0)? " caseless" : "",
|
||||||
|
((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
|
||||||
|
((compile_options & PCRE2_DOTALL) != 0)? " dotall" : "",
|
||||||
|
((compile_options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
|
||||||
|
((compile_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
|
||||||
|
((compile_options & PCRE2_EXTENDED) != 0)? " extended" : "",
|
||||||
|
((compile_options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
|
||||||
|
((compile_options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
|
||||||
|
((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
|
||||||
|
((compile_options & PCRE2_MULTILINE) != 0)? " multiline" : "",
|
||||||
|
((compile_options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
|
||||||
|
((compile_options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
|
||||||
|
((compile_options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
|
||||||
|
((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
|
||||||
|
((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
|
||||||
|
((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
|
||||||
|
((compile_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
|
||||||
|
((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
|
||||||
|
((compile_options & PCRE2_UCP) != 0)? " ucp" : "",
|
||||||
|
((compile_options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
|
||||||
|
((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
|
||||||
|
((compile_options & PCRE2_UTF) != 0)? " utf" : "");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void print_match_options(FILE *stream, uint32_t match_options)
|
||||||
|
{
|
||||||
|
fprintf(stream, "Match options %s%.8x =",
|
||||||
|
(match_options == BASE_MATCH_OPTIONS)? "(base) " : "", match_options);
|
||||||
|
|
||||||
|
fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s\n",
|
||||||
|
((match_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
|
||||||
|
((match_options & PCRE2_DISABLE_RECURSELOOP_CHECK) != 0)? " disable_recurseloop_check" : "",
|
||||||
|
((match_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
|
||||||
|
((match_options & PCRE2_NO_JIT) != 0)? " no_jit" : "",
|
||||||
|
((match_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
|
||||||
|
((match_options & PCRE2_NOTBOL) != 0)? " notbol" : "",
|
||||||
|
((match_options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
|
||||||
|
((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
|
||||||
|
((match_options & PCRE2_NOTEOL) != 0)? " noteol" : "",
|
||||||
|
((match_options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
|
||||||
|
((match_options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* This function can print an error message at all code unit widths. */
|
||||||
|
|
||||||
|
static void print_error(FILE *f, int errorcode, const char *text, ...)
|
||||||
|
{
|
||||||
|
PCRE2_UCHAR buffer[256];
|
||||||
|
PCRE2_UCHAR *p = buffer;
|
||||||
|
va_list ap;
|
||||||
|
va_start(ap, text);
|
||||||
|
vfprintf(f, text, ap);
|
||||||
|
va_end(ap);
|
||||||
|
pcre2_get_error_message(errorcode, buffer, 256);
|
||||||
|
while (*p != 0) fprintf(f, "%c", *p++);
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
#endif /* defined(SUPPORT_DIFF_FUZZ || defined(STANDALONE) */
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef SUPPORT_JIT
|
||||||
|
#ifdef SUPPORT_DIFF_FUZZ
|
||||||
|
static void dump_matches(FILE *stream, int count, pcre2_match_data *match_data)
|
||||||
|
{
|
||||||
|
int errorcode;
|
||||||
|
|
||||||
|
for (int index = 0; index < count; index++)
|
||||||
|
{
|
||||||
|
PCRE2_UCHAR *bufferptr = NULL;
|
||||||
|
PCRE2_SIZE bufflen = 0;
|
||||||
|
|
||||||
|
errorcode = pcre2_substring_get_bynumber(match_data, index, &bufferptr,
|
||||||
|
&bufflen);
|
||||||
|
|
||||||
|
if (errorcode >= 0)
|
||||||
|
{
|
||||||
|
fprintf(stream, "Match %d (hex encoded): ", index);
|
||||||
|
for (PCRE2_SIZE i = 0; i < bufflen; i++)
|
||||||
|
{
|
||||||
|
fprintf(stream, "%02x", bufferptr[i]);
|
||||||
|
}
|
||||||
|
fprintf(stream, "\n");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
print_error(stream, errorcode, "Match %d failed: ", index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This function describes the current test case being evaluated, then aborts */
|
||||||
|
|
||||||
|
static void describe_failure(
|
||||||
|
const char *task,
|
||||||
|
const PCRE2_UCHAR *data,
|
||||||
|
PCRE2_SIZE size,
|
||||||
|
uint32_t compile_options,
|
||||||
|
uint32_t match_options,
|
||||||
|
int errorcode,
|
||||||
|
int errorcode_jit,
|
||||||
|
int matches,
|
||||||
|
int matches_jit,
|
||||||
|
pcre2_match_data *match_data,
|
||||||
|
pcre2_match_data *match_data_jit
|
||||||
|
) {
|
||||||
|
|
||||||
|
fprintf(stderr, "Encountered failure while performing %s; context:\n", task);
|
||||||
|
|
||||||
|
fprintf(stderr, "Pattern/sample string (hex encoded): ");
|
||||||
|
for (size_t i = 0; i < size; i++)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "%02x", data[i]);
|
||||||
|
}
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
|
||||||
|
print_compile_options(stderr, compile_options);
|
||||||
|
print_match_options(stderr, match_options);
|
||||||
|
|
||||||
|
if (errorcode < 0)
|
||||||
|
{
|
||||||
|
print_error(stderr, errorcode, "Non-JIT'd operation emitted an error: ");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (matches >= 0)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Non-JIT'd operation did not emit an error.\n");
|
||||||
|
if (match_data != NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "%d matches discovered by non-JIT'd regex:\n", matches);
|
||||||
|
dump_matches(stderr, matches, match_data);
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (errorcode_jit < 0)
|
||||||
|
{
|
||||||
|
print_error(stderr, errorcode_jit, "JIT'd operation emitted error %d:",
|
||||||
|
errorcode_jit);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (matches_jit >= 0)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "JIT'd operation did not emit an error.\n");
|
||||||
|
if (match_data_jit != NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "%d matches discovered by JIT'd regex:\n", matches_jit);
|
||||||
|
dump_matches(stderr, matches_jit, match_data_jit);
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_DIFF_FUZZ */
|
||||||
|
#endif /* SUPPORT_JIT */
|
||||||
|
|
||||||
|
/* This is the callout function. Its only purpose is to halt matching if there
|
||||||
|
are more than 100 callouts, as one way of stopping too much time being spent on
|
||||||
|
fruitless matches. The callout data is a pointer to the counter. */
|
||||||
|
|
||||||
|
static int callout_function(pcre2_callout_block *cb, void *callout_data)
|
||||||
|
{
|
||||||
|
(void)cb; /* Avoid unused parameter warning */
|
||||||
|
*((uint32_t *)callout_data) += 1;
|
||||||
|
return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Putting in this apparently unnecessary prototype prevents gcc from giving a
|
||||||
|
"no previous prototype" warning when compiling at high warning level. */
|
||||||
|
|
||||||
|
int LLVMFuzzerInitialize(int *, char ***);
|
||||||
|
|
||||||
|
int LLVMFuzzerTestOneInput(unsigned char *, size_t);
|
||||||
|
|
||||||
|
int LLVMFuzzerInitialize(int *argc, char ***argv)
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
struct rlimit rlim;
|
||||||
|
getrlimit(RLIMIT_STACK, &rlim);
|
||||||
|
rlim.rlim_cur = STACK_SIZE_MB * 1024 * 1024;
|
||||||
|
if (rlim.rlim_cur > rlim.rlim_max)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Hard stack size limit is too small\n");
|
||||||
|
_exit(1);
|
||||||
|
}
|
||||||
|
rc = setrlimit(RLIMIT_STACK, &rlim);
|
||||||
|
if (rc != 0)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Failed to expand stack size\n");
|
||||||
|
_exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
(void)argc; /* Avoid "unused parameter" warnings */
|
||||||
|
(void)argv;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Here's the driving function. */
|
||||||
|
|
||||||
|
int LLVMFuzzerTestOneInput(unsigned char *data, size_t size)
|
||||||
|
{
|
||||||
|
PCRE2_UCHAR *wdata;
|
||||||
|
PCRE2_UCHAR *newwdata = NULL;
|
||||||
|
uint32_t compile_options;
|
||||||
|
uint32_t match_options;
|
||||||
|
uint64_t random_options;
|
||||||
|
pcre2_match_data *match_data = NULL;
|
||||||
|
#ifdef SUPPORT_JIT
|
||||||
|
pcre2_match_data *match_data_jit = NULL;
|
||||||
|
#endif
|
||||||
|
pcre2_compile_context *compile_context = NULL;
|
||||||
|
pcre2_match_context *match_context = NULL;
|
||||||
|
size_t match_size;
|
||||||
|
int dfa_workspace[DFA_WORKSPACE_COUNT];
|
||||||
|
|
||||||
|
if (size < sizeof(random_options)) return -1;
|
||||||
|
|
||||||
|
random_options = *(uint64_t *)(data);
|
||||||
|
data += sizeof(random_options);
|
||||||
|
wdata = (PCRE2_UCHAR *)data;
|
||||||
|
size -= sizeof(random_options);
|
||||||
|
size /= PCRE2_CODE_UNIT_WIDTH / 8;
|
||||||
|
|
||||||
|
/* PCRE2 compiles quantified groups by replicating them. In certain cases of
|
||||||
|
very large quantifiers this can lead to unacceptably long JIT compile times. To
|
||||||
|
get around this, we scan the data string for large quantifiers that follow a
|
||||||
|
closing parenthesis, and reduce the value of the quantifier to 10, assuming
|
||||||
|
that this will make minimal difference to the detection of bugs.
|
||||||
|
|
||||||
|
Do the same for quantifiers that follow a closing square bracket, because
|
||||||
|
classes that contain a number of non-ascii characters can take a lot of time
|
||||||
|
when matching.
|
||||||
|
|
||||||
|
We have to make a copy of the input because oss-fuzz complains if we overwrite
|
||||||
|
the original. Start the scan at the second character so there can be a
|
||||||
|
lookbehind for a backslash, and end it before the end so that the next
|
||||||
|
character can be checked for an opening brace. */
|
||||||
|
|
||||||
|
if (size > 3)
|
||||||
|
{
|
||||||
|
newwdata = malloc(size * sizeof(PCRE2_UCHAR));
|
||||||
|
memcpy(newwdata, wdata, size * sizeof(PCRE2_UCHAR));
|
||||||
|
wdata = newwdata;
|
||||||
|
|
||||||
|
for (size_t i = 1; i < size - 2; i++)
|
||||||
|
{
|
||||||
|
size_t j;
|
||||||
|
|
||||||
|
if ((wdata[i] != ')' && wdata[i] != ']') || wdata[i-1] == '\\' ||
|
||||||
|
wdata[i+1] != '{')
|
||||||
|
continue;
|
||||||
|
i++; /* Points to '{' */
|
||||||
|
|
||||||
|
/* Loop for two values in a quantifier. Offset i points to brace or comma
|
||||||
|
at the start of the loop. */
|
||||||
|
|
||||||
|
for (int ii = 0; ii < 2; ii++)
|
||||||
|
{
|
||||||
|
int q = 0;
|
||||||
|
|
||||||
|
if (i >= size - 1) goto END_QSCAN; /* Can happen for , */
|
||||||
|
|
||||||
|
/* Ignore leading spaces. */
|
||||||
|
|
||||||
|
while (wdata[i+1] == ' ' || wdata[i+1] == '\t')
|
||||||
|
{
|
||||||
|
i++;
|
||||||
|
if (i >= size - 1) goto END_QSCAN;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Ignore non-significant leading zeros. */
|
||||||
|
|
||||||
|
while (wdata[i+1] == '0' && i+2 < size && wdata[i+2] >= '0' &&
|
||||||
|
wdata[i+2] <= '9')
|
||||||
|
{
|
||||||
|
i++;
|
||||||
|
if (i >= size - 1) goto END_QSCAN;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Scan for a number ending in brace, or comma in the first iteration,
|
||||||
|
optionally preceded by space. */
|
||||||
|
|
||||||
|
for (j = i + 1; j < size && j < i + 7; j++)
|
||||||
|
{
|
||||||
|
if (wdata[j] == ' ' || wdata[j] == '\t')
|
||||||
|
{
|
||||||
|
j++;
|
||||||
|
while (j < size && (wdata[j] == ' ' || wdata[j] == '\t')) j++;
|
||||||
|
if (j >= size) goto OUTERLOOP;
|
||||||
|
if (wdata[j] != '}' && wdata[j] != ',') goto OUTERLOOP;
|
||||||
|
}
|
||||||
|
if (wdata[j] == '}' || (ii == 0 && wdata[j] == ',')) break;
|
||||||
|
|
||||||
|
if (wdata[j] < '0' || wdata[j] > '9')
|
||||||
|
{
|
||||||
|
j--; /* Ensure this character is checked next. The */
|
||||||
|
goto OUTERLOOP; /* string might be (e.g.) "){9){234}" */
|
||||||
|
}
|
||||||
|
q = q * 10 + (wdata[j] - '0');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (j >= size) goto END_QSCAN; /* End of data */
|
||||||
|
|
||||||
|
/* Hit ',' or '}' or read 6 digits. Six digits is a number > 65536 which
|
||||||
|
is the maximum quantifier. Leave such numbers alone. */
|
||||||
|
|
||||||
|
if (j >= i + 7 || q > 65535) goto OUTERLOOP;
|
||||||
|
|
||||||
|
/* Limit the quantifier size to 10 */
|
||||||
|
|
||||||
|
if (q > 10)
|
||||||
|
{
|
||||||
|
#ifdef STANDALONE
|
||||||
|
printf("Reduced quantifier value %d to 10.\n", q);
|
||||||
|
#endif
|
||||||
|
for (size_t k = i + 1; k < j; k++) wdata[k] = '0';
|
||||||
|
wdata[j - 2] = '1';
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Advance to end of number and break if reached closing brace (continue
|
||||||
|
after comma, which is only valid in the first time round this loop). */
|
||||||
|
|
||||||
|
i = j;
|
||||||
|
if (wdata[i] == '}') break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Continue along the data string */
|
||||||
|
|
||||||
|
OUTERLOOP:
|
||||||
|
i = j;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
END_QSCAN:
|
||||||
|
|
||||||
|
/* Limiting the length of the subject for matching stops fruitless searches
|
||||||
|
in large trees taking too much time. */
|
||||||
|
|
||||||
|
match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size;
|
||||||
|
|
||||||
|
/* Create a compile context, and set a limit on the size of the compiled
|
||||||
|
pattern. This stops the fuzzer using vast amounts of memory. */
|
||||||
|
|
||||||
|
compile_context = pcre2_compile_context_create(NULL);
|
||||||
|
if (compile_context == NULL)
|
||||||
|
{
|
||||||
|
#ifdef STANDALONE
|
||||||
|
fprintf(stderr, "** Failed to create compile context block\n");
|
||||||
|
#endif
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
pcre2_set_max_pattern_compiled_length(compile_context, 10*1024*1024);
|
||||||
|
|
||||||
|
/* Ensure that all undefined option bits are zero (waste of time trying them)
|
||||||
|
and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the
|
||||||
|
input is valid UTF. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is
|
||||||
|
no reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set
|
||||||
|
because \C in random patterns is highly likely to cause a crash. */
|
||||||
|
|
||||||
|
compile_options = ((random_options >> 32) & ALLOWED_COMPILE_OPTIONS) |
|
||||||
|
PCRE2_NEVER_BACKSLASH_C;
|
||||||
|
match_options = (((uint32_t)random_options) & ALLOWED_MATCH_OPTIONS) |
|
||||||
|
BASE_MATCH_OPTIONS;
|
||||||
|
|
||||||
|
/* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not
|
||||||
|
allowed together and just give an immediate error return. */
|
||||||
|
|
||||||
|
if (((compile_options|match_options) & PCRE2_ENDANCHORED) != 0)
|
||||||
|
match_options &= ~(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT);
|
||||||
|
|
||||||
|
/* Do the compile with and without the options, and after a successful compile,
|
||||||
|
likewise do the match with and without the options. */
|
||||||
|
|
||||||
|
for (int i = 0; i < 2; i++)
|
||||||
|
{
|
||||||
|
uint32_t callout_count;
|
||||||
|
int errorcode;
|
||||||
|
#ifdef SUPPORT_JIT
|
||||||
|
int errorcode_jit;
|
||||||
|
#ifdef SUPPORT_DIFF_FUZZ
|
||||||
|
int matches = 0;
|
||||||
|
int matches_jit = 0;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
PCRE2_SIZE erroroffset;
|
||||||
|
pcre2_code *code;
|
||||||
|
|
||||||
|
#ifdef STANDALONE
|
||||||
|
printf("\n");
|
||||||
|
print_compile_options(stdout, compile_options);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
code = pcre2_compile((PCRE2_SPTR)wdata, (PCRE2_SIZE)size, compile_options,
|
||||||
|
&errorcode, &erroroffset, compile_context);
|
||||||
|
|
||||||
|
/* Compilation succeeded */
|
||||||
|
|
||||||
|
if (code != NULL)
|
||||||
|
{
|
||||||
|
int j;
|
||||||
|
uint32_t save_match_options = match_options;
|
||||||
|
|
||||||
|
/* Call JIT compile only if the compiled pattern is not too big. */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_JIT
|
||||||
|
int jit_ret = -1;
|
||||||
|
if (((struct pcre2_real_code *)code)->blocksize <= JIT_SIZE_LIMIT)
|
||||||
|
{
|
||||||
|
#ifdef STANDALONE
|
||||||
|
printf("Compile succeeded; calling JIT compile\n");
|
||||||
|
#endif
|
||||||
|
jit_ret = pcre2_jit_compile(code, PCRE2_JIT_COMPLETE);
|
||||||
|
#ifdef STANDALONE
|
||||||
|
if (jit_ret < 0) printf("JIT compile error %d\n", jit_ret);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
#ifdef STANDALONE
|
||||||
|
printf("Not calling JIT: compiled pattern is too long "
|
||||||
|
"(%ld bytes; limit=%d)\n",
|
||||||
|
((struct pcre2_real_code *)code)->blocksize, JIT_SIZE_LIMIT);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_JIT */
|
||||||
|
|
||||||
|
/* Create match data and context blocks only when we first need them. Set
|
||||||
|
low match and depth limits to avoid wasting too much searching large
|
||||||
|
pattern trees. Almost all matches are going to fail. */
|
||||||
|
|
||||||
|
if (match_data == NULL)
|
||||||
|
{
|
||||||
|
match_data = pcre2_match_data_create(32, NULL);
|
||||||
|
#ifdef SUPPORT_JIT
|
||||||
|
match_data_jit = pcre2_match_data_create(32, NULL);
|
||||||
|
if (match_data == NULL || match_data_jit == NULL)
|
||||||
|
#else
|
||||||
|
if (match_data == NULL)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
#ifdef STANDALONE
|
||||||
|
fprintf(stderr, "** Failed to create match data block\n");
|
||||||
|
#endif
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (match_context == NULL)
|
||||||
|
{
|
||||||
|
match_context = pcre2_match_context_create(NULL);
|
||||||
|
if (match_context == NULL)
|
||||||
|
{
|
||||||
|
#ifdef STANDALONE
|
||||||
|
fprintf(stderr, "** Failed to create match context block\n");
|
||||||
|
#endif
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
(void)pcre2_set_match_limit(match_context, 100);
|
||||||
|
(void)pcre2_set_depth_limit(match_context, 100);
|
||||||
|
(void)pcre2_set_callout(match_context, callout_function, &callout_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Match twice, with and without options. */
|
||||||
|
|
||||||
|
#ifdef STANDALONE
|
||||||
|
printf("\n");
|
||||||
|
#endif
|
||||||
|
for (j = 0; j < 2; j++)
|
||||||
|
{
|
||||||
|
#ifdef STANDALONE
|
||||||
|
print_match_options(stdout, match_options);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
callout_count = 0;
|
||||||
|
errorcode = pcre2_match(code, (PCRE2_SPTR)wdata, (PCRE2_SIZE)match_size, 0,
|
||||||
|
match_options, match_data, match_context);
|
||||||
|
|
||||||
|
#ifdef STANDALONE
|
||||||
|
if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
|
||||||
|
print_error(stdout, errorcode, "Match failed: error %d: ", errorcode);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* If JIT is enabled, do a JIT match and, if appropriately compiled, compare
|
||||||
|
with the interpreter. */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_JIT
|
||||||
|
if (jit_ret >= 0)
|
||||||
|
{
|
||||||
|
#ifdef STANDALONE
|
||||||
|
printf("Matching with JIT\n");
|
||||||
|
#endif
|
||||||
|
callout_count = 0;
|
||||||
|
errorcode_jit = pcre2_match(code, (PCRE2_SPTR)wdata, (PCRE2_SIZE)match_size, 0,
|
||||||
|
match_options & ~PCRE2_NO_JIT, match_data_jit, match_context);
|
||||||
|
|
||||||
|
#ifdef STANDALONE
|
||||||
|
if (errorcode_jit >= 0)
|
||||||
|
printf("Match returned %d\n", errorcode_jit);
|
||||||
|
else
|
||||||
|
print_error(stdout, errorcode_jit, "JIT match failed: error %d: ",
|
||||||
|
errorcode_jit);
|
||||||
|
#else
|
||||||
|
(void)errorcode_jit; /* Avoid compiler warning */
|
||||||
|
#endif /* STANDALONE */
|
||||||
|
|
||||||
|
/* With differential matching enabled, compare with interpreter. */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_DIFF_FUZZ
|
||||||
|
matches = errorcode;
|
||||||
|
matches_jit = errorcode_jit;
|
||||||
|
|
||||||
|
if (errorcode_jit != errorcode)
|
||||||
|
{
|
||||||
|
if (!(errorcode < 0 && errorcode_jit < 0) &&
|
||||||
|
errorcode != PCRE2_ERROR_MATCHLIMIT && errorcode != PCRE2_ERROR_CALLOUT &&
|
||||||
|
errorcode_jit != PCRE2_ERROR_MATCHLIMIT && errorcode_jit != PCRE2_ERROR_JIT_STACKLIMIT && errorcode_jit != PCRE2_ERROR_CALLOUT)
|
||||||
|
{
|
||||||
|
describe_failure("match errorcode comparison", wdata, size, compile_options, match_options, errorcode, errorcode_jit, matches, matches_jit, match_data, match_data_jit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (int index = 0; index < errorcode; index++)
|
||||||
|
{
|
||||||
|
PCRE2_UCHAR *bufferptr, *bufferptr_jit;
|
||||||
|
PCRE2_SIZE bufflen, bufflen_jit;
|
||||||
|
|
||||||
|
bufferptr = bufferptr_jit = NULL;
|
||||||
|
bufflen = bufflen_jit = 0;
|
||||||
|
|
||||||
|
errorcode = pcre2_substring_get_bynumber(match_data, (uint32_t) index, &bufferptr, &bufflen);
|
||||||
|
errorcode_jit = pcre2_substring_get_bynumber(match_data_jit, (uint32_t) index, &bufferptr_jit, &bufflen_jit);
|
||||||
|
|
||||||
|
if (errorcode != errorcode_jit)
|
||||||
|
{
|
||||||
|
describe_failure("match entry errorcode comparison", wdata, size,
|
||||||
|
compile_options, match_options, errorcode, errorcode_jit,
|
||||||
|
matches, matches_jit, match_data, match_data_jit);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (errorcode >= 0)
|
||||||
|
{
|
||||||
|
if (bufflen != bufflen_jit)
|
||||||
|
{
|
||||||
|
describe_failure("match entry length comparison", wdata, size,
|
||||||
|
compile_options, match_options, errorcode, errorcode_jit,
|
||||||
|
matches, matches_jit, match_data, match_data_jit);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (memcmp(bufferptr, bufferptr_jit, bufflen) != 0)
|
||||||
|
{
|
||||||
|
describe_failure("match entry content comparison", wdata, size,
|
||||||
|
compile_options, match_options, errorcode, errorcode_jit,
|
||||||
|
matches, matches_jit, match_data, match_data_jit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pcre2_substring_free(bufferptr);
|
||||||
|
pcre2_substring_free(bufferptr_jit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_DIFF_FUZZ */
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_JIT */
|
||||||
|
|
||||||
|
if (match_options == BASE_MATCH_OPTIONS) break; /* Don't do same twice */
|
||||||
|
match_options = BASE_MATCH_OPTIONS; /* For second time */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Match with DFA twice, with and without options, but remove options that
|
||||||
|
are not allowed with DFA. */
|
||||||
|
|
||||||
|
match_options = save_match_options & ~BASE_MATCH_OPTIONS;
|
||||||
|
|
||||||
|
#ifdef STANDALONE
|
||||||
|
printf("\n");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (j = 0; j < 2; j++)
|
||||||
|
{
|
||||||
|
#ifdef STANDALONE
|
||||||
|
printf("DFA match options %.8x =", match_options);
|
||||||
|
printf("%s%s%s%s%s%s%s%s%s\n",
|
||||||
|
((match_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
|
||||||
|
((match_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
|
||||||
|
((match_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
|
||||||
|
((match_options & PCRE2_NOTBOL) != 0)? " notbol" : "",
|
||||||
|
((match_options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
|
||||||
|
((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
|
||||||
|
((match_options & PCRE2_NOTEOL) != 0)? " noteol" : "",
|
||||||
|
((match_options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
|
||||||
|
((match_options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
callout_count = 0;
|
||||||
|
errorcode = pcre2_dfa_match(code, (PCRE2_SPTR)wdata,
|
||||||
|
(PCRE2_SIZE)match_size, 0, match_options, match_data,
|
||||||
|
match_context, dfa_workspace, DFA_WORKSPACE_COUNT);
|
||||||
|
|
||||||
|
#ifdef STANDALONE
|
||||||
|
if (errorcode >= 0)
|
||||||
|
printf("Match returned %d\n", errorcode);
|
||||||
|
else
|
||||||
|
print_error(stdout, errorcode, "DFA match failed: error %d: ", errorcode);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (match_options == 0) break; /* No point doing same twice */
|
||||||
|
match_options = 0; /* For second time */
|
||||||
|
}
|
||||||
|
|
||||||
|
match_options = save_match_options; /* Reset for the second compile */
|
||||||
|
pcre2_code_free(code);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compilation failed */
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
#ifdef STANDALONE
|
||||||
|
print_error(stdout, errorcode, "Error %d at offset %lu: ", errorcode,
|
||||||
|
erroroffset);
|
||||||
|
#else
|
||||||
|
if (errorcode == PCRE2_ERROR_INTERNAL) abort();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
if (compile_options == PCRE2_NEVER_BACKSLASH_C) break; /* Avoid same twice */
|
||||||
|
compile_options = PCRE2_NEVER_BACKSLASH_C; /* For second time */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Tidy up before exiting */
|
||||||
|
|
||||||
|
if (match_data != NULL) pcre2_match_data_free(match_data);
|
||||||
|
#ifdef SUPPORT_JIT
|
||||||
|
if (match_data_jit != NULL) pcre2_match_data_free(match_data_jit);
|
||||||
|
#endif
|
||||||
|
free(newwdata);
|
||||||
|
if (match_context != NULL) pcre2_match_context_free(match_context);
|
||||||
|
if (compile_context != NULL) pcre2_compile_context_free(compile_context);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Optional main program. */
|
||||||
|
|
||||||
|
#ifdef STANDALONE
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
LLVMFuzzerInitialize(&argc, &argv);
|
||||||
|
|
||||||
|
if (argc < 2)
|
||||||
|
{
|
||||||
|
printf("** No arguments given\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 1; i < argc; i++)
|
||||||
|
{
|
||||||
|
size_t filelen;
|
||||||
|
size_t readsize;
|
||||||
|
unsigned char *buffer;
|
||||||
|
FILE *f;
|
||||||
|
|
||||||
|
/* Handle a literal string. Copy to an exact size buffer so that checks for
|
||||||
|
overrunning work. */
|
||||||
|
|
||||||
|
if (argv[i][0] == '=')
|
||||||
|
{
|
||||||
|
readsize = strlen(argv[i]) - 1;
|
||||||
|
printf("------ <Literal> ------\n");
|
||||||
|
printf("Length = %lu\n", readsize);
|
||||||
|
printf("%.*s\n", (int)readsize, argv[i]+1);
|
||||||
|
buffer = (unsigned char *)malloc(readsize);
|
||||||
|
if (buffer == NULL)
|
||||||
|
printf("** Failed to allocate %lu bytes of memory\n", readsize);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
memcpy(buffer, argv[i]+1, readsize);
|
||||||
|
LLVMFuzzerTestOneInput(buffer, readsize);
|
||||||
|
free(buffer);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Handle a string given in a file */
|
||||||
|
|
||||||
|
f = fopen(argv[i], "rb");
|
||||||
|
if (f == NULL)
|
||||||
|
{
|
||||||
|
printf("** Failed to open %s: %s\n", argv[i], strerror(errno));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("------ %s ------\n", argv[i]);
|
||||||
|
|
||||||
|
fseek(f, 0, SEEK_END);
|
||||||
|
filelen = ftell(f);
|
||||||
|
fseek(f, 0, SEEK_SET);
|
||||||
|
|
||||||
|
buffer = (unsigned char *)malloc(filelen);
|
||||||
|
if (buffer == NULL)
|
||||||
|
{
|
||||||
|
printf("** Failed to allocate %lu bytes of memory\n", filelen);
|
||||||
|
fclose(f);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
readsize = fread(buffer, 1, filelen, f);
|
||||||
|
fclose(f);
|
||||||
|
|
||||||
|
if (readsize != filelen)
|
||||||
|
printf("** File size is %lu but fread() returned %lu\n", filelen, readsize);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
printf("Length = %lu\n", filelen);
|
||||||
|
LLVMFuzzerTestOneInput(buffer, filelen);
|
||||||
|
}
|
||||||
|
free(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif /* STANDALONE */
|
||||||
|
|
||||||
|
/* End */
|
||||||
2235
3rd/pcre2/src/pcre2_internal.h
Normal file
2235
3rd/pcre2/src/pcre2_internal.h
Normal file
@@ -0,0 +1,2235 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE2 is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef PCRE2_INTERNAL_H_IDEMPOTENT_GUARD
|
||||||
|
#define PCRE2_INTERNAL_H_IDEMPOTENT_GUARD
|
||||||
|
|
||||||
|
/* We do not support both EBCDIC and Unicode at the same time. The "configure"
|
||||||
|
script prevents both being selected, but not everybody uses "configure". EBCDIC
|
||||||
|
is only supported for the 8-bit library, but the check for this has to be later
|
||||||
|
in this file, because the first part is not width-dependent, and is included by
|
||||||
|
pcre2test.c with CODE_UNIT_WIDTH == 0. */
|
||||||
|
|
||||||
|
#if defined EBCDIC && defined SUPPORT_UNICODE
|
||||||
|
#error The use of both EBCDIC and SUPPORT_UNICODE is not supported.
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* When compiling one of the libraries, the value of PCRE2_CODE_UNIT_WIDTH must
|
||||||
|
be 8, 16, or 32. AutoTools and CMake ensure that this is always the case, but
|
||||||
|
other other building methods may not, so here is a check. It is cut out when
|
||||||
|
building pcre2test, bcause that sets the value to zero. No other source should
|
||||||
|
be including this file. There is no explicit way of forcing a compile to be
|
||||||
|
abandoned, but trying to include a non-existent file seems cleanest. Otherwise
|
||||||
|
there will be many irrelevant consequential errors. */
|
||||||
|
|
||||||
|
#if (!defined PCRE2_BUILDING_PCRE2TEST && !defined PCRE2_DFTABLES) && \
|
||||||
|
(!defined PCRE2_CODE_UNIT_WIDTH || \
|
||||||
|
(PCRE2_CODE_UNIT_WIDTH != 8 && \
|
||||||
|
PCRE2_CODE_UNIT_WIDTH != 16 && \
|
||||||
|
PCRE2_CODE_UNIT_WIDTH != 32))
|
||||||
|
#error PCRE2_CODE_UNIT_WIDTH must be defined as 8, 16, or 32.
|
||||||
|
#include <AbandonCompile>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/* Standard C headers */
|
||||||
|
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
/* Macros to make boolean values more obvious. The #ifndef is to pacify
|
||||||
|
compiler warnings in environments where these macros are defined elsewhere.
|
||||||
|
Unfortunately, there is no way to do the same for the typedef. */
|
||||||
|
|
||||||
|
typedef int BOOL;
|
||||||
|
#ifndef FALSE
|
||||||
|
#define FALSE 0
|
||||||
|
#define TRUE 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Helper macro for static (compile-time) assertions. Can be used inside
|
||||||
|
functions, or at the top-level of a file. */
|
||||||
|
#define STATIC_ASSERT_JOIN(a,b) a ## b
|
||||||
|
#define STATIC_ASSERT(cond, msg) \
|
||||||
|
typedef int STATIC_ASSERT_JOIN(static_assertion_,msg)[(cond)?1:-1]
|
||||||
|
|
||||||
|
/* Valgrind (memcheck) support */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_VALGRIND
|
||||||
|
#include <valgrind/memcheck.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* -ftrivial-auto-var-init support supports initializing all local variables
|
||||||
|
to avoid some classes of bug, but this can cause an unacceptable slowdown
|
||||||
|
for large on-stack arrays in hot functions. This macro lets us annotate
|
||||||
|
such arrays. */
|
||||||
|
|
||||||
|
#ifdef HAVE_ATTRIBUTE_UNINITIALIZED
|
||||||
|
#define PCRE2_KEEP_UNINITIALIZED __attribute__((uninitialized))
|
||||||
|
#else
|
||||||
|
#define PCRE2_KEEP_UNINITIALIZED
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Older versions of MSVC lack snprintf(). This define allows for
|
||||||
|
warning/error-free compilation and testing with MSVC compilers back to at least
|
||||||
|
MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
|
||||||
|
|
||||||
|
#if defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||||
|
#define snprintf _snprintf
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* When compiling a DLL for Windows, the exported symbols have to be declared
|
||||||
|
using some MS magic. I found some useful information on this web page:
|
||||||
|
http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the
|
||||||
|
information there, using __declspec(dllexport) without "extern" we have a
|
||||||
|
definition; with "extern" we have a declaration. The settings here override the
|
||||||
|
setting in pcre2.h (which is included below); it defines only PCRE2_EXP_DECL,
|
||||||
|
which is all that is needed for applications (they just import the symbols). We
|
||||||
|
use:
|
||||||
|
|
||||||
|
PCRE2_EXP_DECL for declarations
|
||||||
|
PCRE2_EXP_DEFN for definitions
|
||||||
|
|
||||||
|
The reason for wrapping this in #ifndef PCRE2_EXP_DECL is so that pcre2test,
|
||||||
|
which is an application, but needs to import this file in order to "peek" at
|
||||||
|
internals, can #include pcre2.h first to get an application's-eye view.
|
||||||
|
|
||||||
|
In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon,
|
||||||
|
special-purpose environments) might want to stick other stuff in front of
|
||||||
|
exported symbols. That's why, in the non-Windows case, we set PCRE2_EXP_DEFN
|
||||||
|
only if it is not already set. */
|
||||||
|
|
||||||
|
#ifndef PCRE2_EXP_DECL
|
||||||
|
# ifdef _WIN32
|
||||||
|
# ifndef PCRE2_STATIC
|
||||||
|
# define PCRE2_EXP_DECL extern __declspec(dllexport)
|
||||||
|
# define PCRE2_EXP_DEFN __declspec(dllexport)
|
||||||
|
# else
|
||||||
|
# define PCRE2_EXP_DECL extern PCRE2_EXPORT
|
||||||
|
# define PCRE2_EXP_DEFN
|
||||||
|
# endif
|
||||||
|
# else
|
||||||
|
# ifdef __cplusplus
|
||||||
|
# define PCRE2_EXP_DECL extern "C" PCRE2_EXPORT
|
||||||
|
# else
|
||||||
|
# define PCRE2_EXP_DECL extern PCRE2_EXPORT
|
||||||
|
# endif
|
||||||
|
# ifndef PCRE2_EXP_DEFN
|
||||||
|
# define PCRE2_EXP_DEFN PCRE2_EXP_DECL
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Include the public PCRE2 header and the definitions of UCP character
|
||||||
|
property values. This must follow the setting of PCRE2_EXP_DECL above. */
|
||||||
|
|
||||||
|
#include "pcre2.h"
|
||||||
|
#include "pcre2_ucp.h"
|
||||||
|
|
||||||
|
/* When PCRE2 is compiled as a C++ library, the subject pointer can be replaced
|
||||||
|
with a custom type. This makes it possible, for example, to allow pcre2_match()
|
||||||
|
to process subject strings that are discontinuous by using a smart pointer
|
||||||
|
class. It must always be possible to inspect all of the subject string in
|
||||||
|
pcre2_match() because of the way it backtracks. */
|
||||||
|
|
||||||
|
/* WARNING: This is as yet untested for PCRE2. */
|
||||||
|
|
||||||
|
#ifdef CUSTOM_SUBJECT_PTR
|
||||||
|
#undef PCRE2_SPTR
|
||||||
|
#define PCRE2_SPTR CUSTOM_SUBJECT_PTR
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* When checking for integer overflow, we need to handle large integers.
|
||||||
|
If a 64-bit integer type is available, we can use that.
|
||||||
|
Otherwise we have to cast to double, which of course requires floating point
|
||||||
|
arithmetic. Handle this by defining a macro for the appropriate type. */
|
||||||
|
|
||||||
|
#if defined INT64_MAX || defined int64_t
|
||||||
|
#define INT64_OR_DOUBLE int64_t
|
||||||
|
#else
|
||||||
|
#define INT64_OR_DOUBLE double
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* External (in the C sense) functions and tables that are private to the
|
||||||
|
libraries are always referenced using the PRIV macro. This makes it possible
|
||||||
|
for pcre2test.c to include some of the source files from the libraries using a
|
||||||
|
different PRIV definition to avoid name clashes. It also makes it clear in the
|
||||||
|
code that a non-static object is being referenced. */
|
||||||
|
|
||||||
|
#ifndef PRIV
|
||||||
|
#define PRIV(name) _pcre2_##name
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* When compiling for use with the Virtual Pascal compiler, these functions
|
||||||
|
need to have their names changed. PCRE2 must be compiled with the -DVPCOMPAT
|
||||||
|
option on the command line. */
|
||||||
|
|
||||||
|
#ifdef VPCOMPAT
|
||||||
|
#define strlen(s) _strlen(s)
|
||||||
|
#define strncmp(s1,s2,m) _strncmp(s1,s2,m)
|
||||||
|
#define memcmp(s,c,n) _memcmp(s,c,n)
|
||||||
|
#define memcpy(d,s,n) _memcpy(d,s,n)
|
||||||
|
#define memmove(d,s,n) _memmove(d,s,n)
|
||||||
|
#define memset(s,c,n) _memset(s,c,n)
|
||||||
|
#else /* VPCOMPAT */
|
||||||
|
|
||||||
|
/* Otherwise, to cope with SunOS4 and other systems that lack memmove(), define
|
||||||
|
a macro that calls an emulating function. */
|
||||||
|
|
||||||
|
#ifndef HAVE_MEMMOVE
|
||||||
|
#undef memmove /* Some systems may have a macro */
|
||||||
|
#define memmove(a, b, c) PRIV(memmove)(a, b, c)
|
||||||
|
#endif /* not HAVE_MEMMOVE */
|
||||||
|
#endif /* not VPCOMPAT */
|
||||||
|
|
||||||
|
/* This is an unsigned int value that no UTF character can ever have, as
|
||||||
|
Unicode doesn't go beyond 0x0010ffff. */
|
||||||
|
|
||||||
|
#define NOTACHAR 0xffffffff
|
||||||
|
|
||||||
|
/* This is the largest valid UTF/Unicode code point. */
|
||||||
|
|
||||||
|
#define MAX_UTF_CODE_POINT 0x10ffff
|
||||||
|
|
||||||
|
/* Compile-time positive error numbers (all except UTF errors, which are
|
||||||
|
negative) start at this value. It should probably never be changed, in case
|
||||||
|
some application is checking for specific numbers. There is a copy of this
|
||||||
|
#define in pcre2posix.c (which now no longer includes this file). Ideally, a
|
||||||
|
way of having a single definition should be found, but as the number is
|
||||||
|
unlikely to change, this is not a pressing issue. The original reason for
|
||||||
|
having a base other than 0 was to keep the absolute values of compile-time and
|
||||||
|
run-time error numbers numerically different, but in the event the code does
|
||||||
|
not rely on this. */
|
||||||
|
|
||||||
|
#define COMPILE_ERROR_BASE 100
|
||||||
|
|
||||||
|
/* The initial frames vector for remembering pcre2_match() backtracking points
|
||||||
|
is allocated on the heap, of this size (bytes) or ten times the frame size if
|
||||||
|
larger, unless the heap limit is smaller. Typical frame sizes are a few hundred
|
||||||
|
bytes (it depends on the number of capturing parentheses) so 20KiB handles
|
||||||
|
quite a few frames. A larger vector on the heap is obtained for matches that
|
||||||
|
need more frames, subject to the heap limit. */
|
||||||
|
|
||||||
|
#define START_FRAMES_SIZE 20480
|
||||||
|
|
||||||
|
/* For DFA matching, an initial internal workspace vector is allocated on the
|
||||||
|
stack. The heap is used only if this turns out to be too small. */
|
||||||
|
|
||||||
|
#define DFA_START_RWS_SIZE 30720
|
||||||
|
|
||||||
|
/* Define the default BSR convention. */
|
||||||
|
|
||||||
|
#ifdef BSR_ANYCRLF
|
||||||
|
#define BSR_DEFAULT PCRE2_BSR_ANYCRLF
|
||||||
|
#else
|
||||||
|
#define BSR_DEFAULT PCRE2_BSR_UNICODE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/* ---------------- Basic UTF-8 macros ---------------- */
|
||||||
|
|
||||||
|
/* These UTF-8 macros are always defined because they are used in pcre2test for
|
||||||
|
handling wide characters in 16-bit and 32-bit modes, even if an 8-bit library
|
||||||
|
is not supported. */
|
||||||
|
|
||||||
|
/* Tests whether a UTF-8 code point needs extra bytes to decode. */
|
||||||
|
|
||||||
|
#define HASUTF8EXTRALEN(c) ((c) >= 0xc0)
|
||||||
|
|
||||||
|
/* The following macros were originally written in the form of loops that used
|
||||||
|
data from the tables whose names start with PRIV(utf8_table). They were
|
||||||
|
rewritten by a user so as not to use loops, because in some environments this
|
||||||
|
gives a significant performance advantage, and it seems never to do any harm.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Base macro to pick up the remaining bytes of a UTF-8 character, not
|
||||||
|
advancing the pointer. */
|
||||||
|
|
||||||
|
#define GETUTF8(c, eptr) \
|
||||||
|
{ \
|
||||||
|
if ((c & 0x20u) == 0) \
|
||||||
|
c = ((c & 0x1fu) << 6) | (eptr[1] & 0x3fu); \
|
||||||
|
else if ((c & 0x10u) == 0) \
|
||||||
|
c = ((c & 0x0fu) << 12) | ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
|
||||||
|
else if ((c & 0x08u) == 0) \
|
||||||
|
c = ((c & 0x07u) << 18) | ((eptr[1] & 0x3fu) << 12) | \
|
||||||
|
((eptr[2] & 0x3fu) << 6) | (eptr[3] & 0x3fu); \
|
||||||
|
else if ((c & 0x04u) == 0) \
|
||||||
|
c = ((c & 0x03u) << 24) | ((eptr[1] & 0x3fu) << 18) | \
|
||||||
|
((eptr[2] & 0x3fu) << 12) | ((eptr[3] & 0x3fu) << 6) | \
|
||||||
|
(eptr[4] & 0x3fu); \
|
||||||
|
else \
|
||||||
|
c = ((c & 0x01u) << 30) | ((eptr[1] & 0x3fu) << 24) | \
|
||||||
|
((eptr[2] & 0x3fu) << 18) | ((eptr[3] & 0x3fu) << 12) | \
|
||||||
|
((eptr[4] & 0x3fu) << 6) | (eptr[5] & 0x3fu); \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Base macro to pick up the remaining bytes of a UTF-8 character, advancing
|
||||||
|
the pointer. */
|
||||||
|
|
||||||
|
#define GETUTF8INC(c, eptr) \
|
||||||
|
{ \
|
||||||
|
if ((c & 0x20u) == 0) \
|
||||||
|
c = ((c & 0x1fu) << 6) | (*eptr++ & 0x3fu); \
|
||||||
|
else if ((c & 0x10u) == 0) \
|
||||||
|
{ \
|
||||||
|
c = ((c & 0x0fu) << 12) | ((*eptr & 0x3fu) << 6) | (eptr[1] & 0x3fu); \
|
||||||
|
eptr += 2; \
|
||||||
|
} \
|
||||||
|
else if ((c & 0x08u) == 0) \
|
||||||
|
{ \
|
||||||
|
c = ((c & 0x07u) << 18) | ((*eptr & 0x3fu) << 12) | \
|
||||||
|
((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
|
||||||
|
eptr += 3; \
|
||||||
|
} \
|
||||||
|
else if ((c & 0x04u) == 0) \
|
||||||
|
{ \
|
||||||
|
c = ((c & 0x03u) << 24) | ((*eptr & 0x3fu) << 18) | \
|
||||||
|
((eptr[1] & 0x3fu) << 12) | ((eptr[2] & 0x3fu) << 6) | \
|
||||||
|
(eptr[3] & 0x3fu); \
|
||||||
|
eptr += 4; \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
c = ((c & 0x01u) << 30) | ((*eptr & 0x3fu) << 24) | \
|
||||||
|
((eptr[1] & 0x3fu) << 18) | ((eptr[2] & 0x3fu) << 12) | \
|
||||||
|
((eptr[3] & 0x3fu) << 6) | (eptr[4] & 0x3fu); \
|
||||||
|
eptr += 5; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Base macro to pick up the remaining bytes of a UTF-8 character, not
|
||||||
|
advancing the pointer, incrementing the length. */
|
||||||
|
|
||||||
|
#define GETUTF8LEN(c, eptr, len) \
|
||||||
|
{ \
|
||||||
|
if ((c & 0x20u) == 0) \
|
||||||
|
{ \
|
||||||
|
c = ((c & 0x1fu) << 6) | (eptr[1] & 0x3fu); \
|
||||||
|
len++; \
|
||||||
|
} \
|
||||||
|
else if ((c & 0x10u) == 0) \
|
||||||
|
{ \
|
||||||
|
c = ((c & 0x0fu) << 12) | ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
|
||||||
|
len += 2; \
|
||||||
|
} \
|
||||||
|
else if ((c & 0x08u) == 0) \
|
||||||
|
{\
|
||||||
|
c = ((c & 0x07u) << 18) | ((eptr[1] & 0x3fu) << 12) | \
|
||||||
|
((eptr[2] & 0x3fu) << 6) | (eptr[3] & 0x3fu); \
|
||||||
|
len += 3; \
|
||||||
|
} \
|
||||||
|
else if ((c & 0x04u) == 0) \
|
||||||
|
{ \
|
||||||
|
c = ((c & 0x03u) << 24) | ((eptr[1] & 0x3fu) << 18) | \
|
||||||
|
((eptr[2] & 0x3fu) << 12) | ((eptr[3] & 0x3fu) << 6) | \
|
||||||
|
(eptr[4] & 0x3fu); \
|
||||||
|
len += 4; \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{\
|
||||||
|
c = ((c & 0x01u) << 30) | ((eptr[1] & 0x3fu) << 24) | \
|
||||||
|
((eptr[2] & 0x3fu) << 18) | ((eptr[3] & 0x3fu) << 12) | \
|
||||||
|
((eptr[4] & 0x3fu) << 6) | (eptr[5] & 0x3fu); \
|
||||||
|
len += 5; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* --------------- Whitespace macros ---------------- */
|
||||||
|
|
||||||
|
/* Tests for Unicode horizontal and vertical whitespace characters must check a
|
||||||
|
number of different values. Using a switch statement for this generates the
|
||||||
|
fastest code (no loop, no memory access), and there are several places in the
|
||||||
|
interpreter code where this happens. In order to ensure that all the case lists
|
||||||
|
remain in step, we use macros so that there is only one place where the lists
|
||||||
|
are defined.
|
||||||
|
|
||||||
|
These values are also required as lists in pcre2_compile.c when processing \h,
|
||||||
|
\H, \v and \V in a character class. The lists are defined in pcre2_tables.c,
|
||||||
|
but macros that define the values are here so that all the definitions are
|
||||||
|
together. The lists must be in ascending character order, terminated by
|
||||||
|
NOTACHAR (which is 0xffffffff).
|
||||||
|
|
||||||
|
Any changes should ensure that the various macros are kept in step with each
|
||||||
|
other. NOTE: The values also appear in pcre2_jit_compile.c. */
|
||||||
|
|
||||||
|
/* -------------- ASCII/Unicode environments -------------- */
|
||||||
|
|
||||||
|
#ifndef EBCDIC
|
||||||
|
|
||||||
|
/* Character U+180E (Mongolian Vowel Separator) is not included in the list of
|
||||||
|
spaces in the Unicode file PropList.txt, and Perl does not recognize it as a
|
||||||
|
space. However, in many other sources it is listed as a space and has been in
|
||||||
|
PCRE (both APIs) for a long time. */
|
||||||
|
|
||||||
|
#define HSPACE_LIST \
|
||||||
|
CHAR_HT, CHAR_SPACE, CHAR_NBSP, \
|
||||||
|
0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \
|
||||||
|
0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \
|
||||||
|
NOTACHAR
|
||||||
|
|
||||||
|
#define HSPACE_MULTIBYTE_CASES \
|
||||||
|
case 0x1680: /* OGHAM SPACE MARK */ \
|
||||||
|
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ \
|
||||||
|
case 0x2000: /* EN QUAD */ \
|
||||||
|
case 0x2001: /* EM QUAD */ \
|
||||||
|
case 0x2002: /* EN SPACE */ \
|
||||||
|
case 0x2003: /* EM SPACE */ \
|
||||||
|
case 0x2004: /* THREE-PER-EM SPACE */ \
|
||||||
|
case 0x2005: /* FOUR-PER-EM SPACE */ \
|
||||||
|
case 0x2006: /* SIX-PER-EM SPACE */ \
|
||||||
|
case 0x2007: /* FIGURE SPACE */ \
|
||||||
|
case 0x2008: /* PUNCTUATION SPACE */ \
|
||||||
|
case 0x2009: /* THIN SPACE */ \
|
||||||
|
case 0x200A: /* HAIR SPACE */ \
|
||||||
|
case 0x202f: /* NARROW NO-BREAK SPACE */ \
|
||||||
|
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ \
|
||||||
|
case 0x3000 /* IDEOGRAPHIC SPACE */
|
||||||
|
|
||||||
|
#define HSPACE_BYTE_CASES \
|
||||||
|
case CHAR_HT: \
|
||||||
|
case CHAR_SPACE: \
|
||||||
|
case CHAR_NBSP
|
||||||
|
|
||||||
|
#define HSPACE_CASES \
|
||||||
|
HSPACE_BYTE_CASES: \
|
||||||
|
HSPACE_MULTIBYTE_CASES
|
||||||
|
|
||||||
|
#define VSPACE_LIST \
|
||||||
|
CHAR_LF, CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, 0x2028, 0x2029, NOTACHAR
|
||||||
|
|
||||||
|
#define VSPACE_MULTIBYTE_CASES \
|
||||||
|
case 0x2028: /* LINE SEPARATOR */ \
|
||||||
|
case 0x2029 /* PARAGRAPH SEPARATOR */
|
||||||
|
|
||||||
|
#define VSPACE_BYTE_CASES \
|
||||||
|
case CHAR_LF: \
|
||||||
|
case CHAR_VT: \
|
||||||
|
case CHAR_FF: \
|
||||||
|
case CHAR_CR: \
|
||||||
|
case CHAR_NEL
|
||||||
|
|
||||||
|
#define VSPACE_CASES \
|
||||||
|
VSPACE_BYTE_CASES: \
|
||||||
|
VSPACE_MULTIBYTE_CASES
|
||||||
|
|
||||||
|
/* -------------- EBCDIC environments -------------- */
|
||||||
|
|
||||||
|
#else
|
||||||
|
#define HSPACE_LIST CHAR_HT, CHAR_SPACE, CHAR_NBSP, NOTACHAR
|
||||||
|
|
||||||
|
#define HSPACE_BYTE_CASES \
|
||||||
|
case CHAR_HT: \
|
||||||
|
case CHAR_SPACE: \
|
||||||
|
case CHAR_NBSP
|
||||||
|
|
||||||
|
#define HSPACE_CASES HSPACE_BYTE_CASES
|
||||||
|
|
||||||
|
#ifdef EBCDIC_NL25
|
||||||
|
#define VSPACE_LIST \
|
||||||
|
CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, CHAR_LF, NOTACHAR
|
||||||
|
#else
|
||||||
|
#define VSPACE_LIST \
|
||||||
|
CHAR_VT, CHAR_FF, CHAR_CR, CHAR_LF, CHAR_NEL, NOTACHAR
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define VSPACE_BYTE_CASES \
|
||||||
|
case CHAR_LF: \
|
||||||
|
case CHAR_VT: \
|
||||||
|
case CHAR_FF: \
|
||||||
|
case CHAR_CR: \
|
||||||
|
case CHAR_NEL
|
||||||
|
|
||||||
|
#define VSPACE_CASES VSPACE_BYTE_CASES
|
||||||
|
#endif /* EBCDIC */
|
||||||
|
|
||||||
|
/* -------------- End of whitespace macros -------------- */
|
||||||
|
|
||||||
|
|
||||||
|
/* PCRE2 is able to support several different kinds of newline (CR, LF, CRLF,
|
||||||
|
"any" and "anycrlf" at present). The following macros are used to package up
|
||||||
|
testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various
|
||||||
|
modules to indicate in which datablock the parameters exist, and what the
|
||||||
|
start/end of string field names are. */
|
||||||
|
|
||||||
|
#define NLTYPE_FIXED 0 /* Newline is a fixed length string */
|
||||||
|
#define NLTYPE_ANY 1 /* Newline is any Unicode line ending */
|
||||||
|
#define NLTYPE_ANYCRLF 2 /* Newline is CR, LF, or CRLF */
|
||||||
|
|
||||||
|
/* This macro checks for a newline at the given position */
|
||||||
|
|
||||||
|
#define IS_NEWLINE(p) \
|
||||||
|
((NLBLOCK->nltype != NLTYPE_FIXED)? \
|
||||||
|
((p) < NLBLOCK->PSEND && \
|
||||||
|
PRIV(is_newline)((p), NLBLOCK->nltype, NLBLOCK->PSEND, \
|
||||||
|
&(NLBLOCK->nllen), utf)) \
|
||||||
|
: \
|
||||||
|
((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
|
||||||
|
UCHAR21TEST(p) == NLBLOCK->nl[0] && \
|
||||||
|
(NLBLOCK->nllen == 1 || UCHAR21TEST(p+1) == NLBLOCK->nl[1]) \
|
||||||
|
) \
|
||||||
|
)
|
||||||
|
|
||||||
|
/* This macro checks for a newline immediately preceding the given position */
|
||||||
|
|
||||||
|
#define WAS_NEWLINE(p) \
|
||||||
|
((NLBLOCK->nltype != NLTYPE_FIXED)? \
|
||||||
|
((p) > NLBLOCK->PSSTART && \
|
||||||
|
PRIV(was_newline)((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \
|
||||||
|
&(NLBLOCK->nllen), utf)) \
|
||||||
|
: \
|
||||||
|
((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
|
||||||
|
UCHAR21TEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] && \
|
||||||
|
(NLBLOCK->nllen == 1 || UCHAR21TEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \
|
||||||
|
) \
|
||||||
|
)
|
||||||
|
|
||||||
|
/* Private flags containing information about the compiled pattern. The first
|
||||||
|
three must not be changed, because whichever is set is actually the number of
|
||||||
|
bytes in a code unit in that mode. */
|
||||||
|
|
||||||
|
#define PCRE2_MODE8 0x00000001u /* compiled in 8 bit mode */
|
||||||
|
#define PCRE2_MODE16 0x00000002u /* compiled in 16 bit mode */
|
||||||
|
#define PCRE2_MODE32 0x00000004u /* compiled in 32 bit mode */
|
||||||
|
#define PCRE2_FIRSTSET 0x00000010u /* first_code unit is set */
|
||||||
|
#define PCRE2_FIRSTCASELESS 0x00000020u /* caseless first code unit */
|
||||||
|
#define PCRE2_FIRSTMAPSET 0x00000040u /* bitmap of first code units is set */
|
||||||
|
#define PCRE2_LASTSET 0x00000080u /* last code unit is set */
|
||||||
|
#define PCRE2_LASTCASELESS 0x00000100u /* caseless last code unit */
|
||||||
|
#define PCRE2_STARTLINE 0x00000200u /* start after \n for multiline */
|
||||||
|
#define PCRE2_JCHANGED 0x00000400u /* j option used in pattern */
|
||||||
|
#define PCRE2_HASCRORLF 0x00000800u /* explicit \r or \n in pattern */
|
||||||
|
#define PCRE2_HASTHEN 0x00001000u /* pattern contains (*THEN) */
|
||||||
|
#define PCRE2_MATCH_EMPTY 0x00002000u /* pattern can match empty string */
|
||||||
|
#define PCRE2_BSR_SET 0x00004000u /* BSR was set in the pattern */
|
||||||
|
#define PCRE2_NL_SET 0x00008000u /* newline was set in the pattern */
|
||||||
|
#define PCRE2_NOTEMPTY_SET 0x00010000u /* (*NOTEMPTY) used ) keep */
|
||||||
|
#define PCRE2_NE_ATST_SET 0x00020000u /* (*NOTEMPTY_ATSTART) used) together */
|
||||||
|
#define PCRE2_DEREF_TABLES 0x00040000u /* release character tables */
|
||||||
|
#define PCRE2_NOJIT 0x00080000u /* (*NOJIT) used */
|
||||||
|
#define PCRE2_HASBKPORX 0x00100000u /* contains \P, \p, or \X */
|
||||||
|
#define PCRE2_DUPCAPUSED 0x00200000u /* contains (?| */
|
||||||
|
#define PCRE2_HASBKC 0x00400000u /* contains \C */
|
||||||
|
#define PCRE2_HASACCEPT 0x00800000u /* contains (*ACCEPT) */
|
||||||
|
|
||||||
|
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
|
||||||
|
|
||||||
|
/* Values for the matchedby field in a match data block. */
|
||||||
|
|
||||||
|
enum { PCRE2_MATCHEDBY_INTERPRETER, /* pcre2_match() */
|
||||||
|
PCRE2_MATCHEDBY_DFA_INTERPRETER, /* pcre2_dfa_match() */
|
||||||
|
PCRE2_MATCHEDBY_JIT }; /* pcre2_jit_match() */
|
||||||
|
|
||||||
|
/* Values for the flags field in a match data block. */
|
||||||
|
|
||||||
|
#define PCRE2_MD_COPIED_SUBJECT 0x01u
|
||||||
|
|
||||||
|
/* Magic number to provide a small check against being handed junk. */
|
||||||
|
|
||||||
|
#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */
|
||||||
|
|
||||||
|
/* The maximum remaining length of subject we are prepared to search for a
|
||||||
|
req_unit match from an anchored pattern. In 8-bit mode, memchr() is used and is
|
||||||
|
much faster than the search loop that has to be used in 16-bit and 32-bit
|
||||||
|
modes. */
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
#define REQ_CU_MAX 5000
|
||||||
|
#else
|
||||||
|
#define REQ_CU_MAX 2000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* The maximum nesting depth for Unicode character class sets.
|
||||||
|
Currently fixed. Warning: the interpreter relies on this so it can encode
|
||||||
|
the operand stack in a uint32_t. A nesting limit of 15 implies (15*2+1)=31
|
||||||
|
stack operands required, due to the fact that we have two (and only two)
|
||||||
|
levels of operator precedence. In the UTS#18 syntax, you can write 'x&&y[z]'
|
||||||
|
and in Perl syntax you can write '(?[ x - y & (z) ])', both of which imply
|
||||||
|
pushing the match results for x & y to the stack. */
|
||||||
|
|
||||||
|
#define ECLASS_NEST_LIMIT 15
|
||||||
|
|
||||||
|
/* Offsets for the bitmap tables in the cbits set of tables. Each table
|
||||||
|
contains a set of bits for a class map. Some classes are built by combining
|
||||||
|
these tables. */
|
||||||
|
|
||||||
|
#define cbit_space 0 /* [:space:] or \s */
|
||||||
|
#define cbit_xdigit 32 /* [:xdigit:] */
|
||||||
|
#define cbit_digit 64 /* [:digit:] or \d */
|
||||||
|
#define cbit_upper 96 /* [:upper:] */
|
||||||
|
#define cbit_lower 128 /* [:lower:] */
|
||||||
|
#define cbit_word 160 /* [:word:] or \w */
|
||||||
|
#define cbit_graph 192 /* [:graph:] */
|
||||||
|
#define cbit_print 224 /* [:print:] */
|
||||||
|
#define cbit_punct 256 /* [:punct:] */
|
||||||
|
#define cbit_cntrl 288 /* [:cntrl:] */
|
||||||
|
#define cbit_length 320 /* Length of the cbits table */
|
||||||
|
|
||||||
|
/* Bit definitions for entries in the ctypes table. Do not change these values
|
||||||
|
without checking pcre2_jit_compile.c, which has an assertion to ensure that
|
||||||
|
ctype_word has the value 16. */
|
||||||
|
|
||||||
|
#define ctype_space 0x01
|
||||||
|
#define ctype_letter 0x02
|
||||||
|
#define ctype_lcletter 0x04
|
||||||
|
#define ctype_digit 0x08
|
||||||
|
#define ctype_word 0x10 /* alphanumeric or '_' */
|
||||||
|
|
||||||
|
/* Offsets of the various tables from the base tables pointer, and
|
||||||
|
total length of the tables. */
|
||||||
|
|
||||||
|
#define lcc_offset 0 /* Lower case */
|
||||||
|
#define fcc_offset 256 /* Flip case */
|
||||||
|
#define cbits_offset 512 /* Character classes */
|
||||||
|
#define ctypes_offset (cbits_offset + cbit_length) /* Character types */
|
||||||
|
#define TABLES_LENGTH (ctypes_offset + 256)
|
||||||
|
|
||||||
|
/* Private flags used in compile_context.optimization_flags */
|
||||||
|
|
||||||
|
#define PCRE2_OPTIM_AUTO_POSSESS 0x00000001u
|
||||||
|
#define PCRE2_OPTIM_DOTSTAR_ANCHOR 0x00000002u
|
||||||
|
#define PCRE2_OPTIM_START_OPTIMIZE 0x00000004u
|
||||||
|
|
||||||
|
#define PCRE2_OPTIMIZATION_ALL 0x00000007u
|
||||||
|
|
||||||
|
/* -------------------- Character and string names ------------------------ */
|
||||||
|
|
||||||
|
/* If PCRE2 is to support UTF-8 on EBCDIC platforms, we cannot use normal
|
||||||
|
character constants like '*' because the compiler would emit their EBCDIC code,
|
||||||
|
which is different from their ASCII/UTF-8 code. Instead we define macros for
|
||||||
|
the characters so that they always use the ASCII/UTF-8 code when UTF-8 support
|
||||||
|
is enabled. When UTF-8 support is not enabled, the definitions use character
|
||||||
|
literals. Both character and string versions of each character are needed, and
|
||||||
|
there are some longer strings as well.
|
||||||
|
|
||||||
|
This means that, on EBCDIC platforms, the PCRE2 library can handle either
|
||||||
|
EBCDIC, or UTF-8, but not both. To support both in the same compiled library
|
||||||
|
would need different lookups depending on whether PCRE2_UTF was set or not.
|
||||||
|
This would make it impossible to use characters in switch/case statements,
|
||||||
|
which would reduce performance. For a theoretical use (which nobody has asked
|
||||||
|
for) in a minority area (EBCDIC platforms), this is not sensible. Any
|
||||||
|
application that did need both could compile two versions of the library, using
|
||||||
|
macros to give the functions distinct names. */
|
||||||
|
|
||||||
|
#ifndef SUPPORT_UNICODE
|
||||||
|
|
||||||
|
/* UTF-8 support is not enabled; use the platform-dependent character literals
|
||||||
|
so that PCRE2 works in both ASCII and EBCDIC environments, but only in non-UTF
|
||||||
|
mode. Newline characters are problematic in EBCDIC. Though it has CR and LF
|
||||||
|
characters, a common practice has been to use its NL (0x15) character as the
|
||||||
|
line terminator in C-like processing environments. However, sometimes the LF
|
||||||
|
(0x25) character is used instead, according to this Unicode document:
|
||||||
|
|
||||||
|
http://unicode.org/standard/reports/tr13/tr13-5.html
|
||||||
|
|
||||||
|
PCRE2 defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25
|
||||||
|
instead. Whichever is *not* chosen is defined as NEL.
|
||||||
|
|
||||||
|
In both ASCII and EBCDIC environments, CHAR_NL and CHAR_LF are synonyms for the
|
||||||
|
same code point. */
|
||||||
|
|
||||||
|
#ifdef EBCDIC
|
||||||
|
|
||||||
|
#ifndef EBCDIC_NL25
|
||||||
|
#define CHAR_NL '\x15'
|
||||||
|
#define CHAR_NEL '\x25'
|
||||||
|
#define STR_NL "\x15"
|
||||||
|
#define STR_NEL "\x25"
|
||||||
|
#else
|
||||||
|
#define CHAR_NL '\x25'
|
||||||
|
#define CHAR_NEL '\x15'
|
||||||
|
#define STR_NL "\x25"
|
||||||
|
#define STR_NEL "\x15"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define CHAR_LF CHAR_NL
|
||||||
|
#define STR_LF STR_NL
|
||||||
|
|
||||||
|
#define CHAR_ESC '\047'
|
||||||
|
#define CHAR_DEL '\007'
|
||||||
|
#define CHAR_NBSP ((unsigned char)'\x41')
|
||||||
|
#define STR_ESC "\047"
|
||||||
|
#define STR_DEL "\007"
|
||||||
|
|
||||||
|
#else /* Not EBCDIC */
|
||||||
|
|
||||||
|
/* In ASCII/Unicode, linefeed is '\n' and we equate this to NL for
|
||||||
|
compatibility. NEL is the Unicode newline character; make sure it is
|
||||||
|
a positive value. */
|
||||||
|
|
||||||
|
#define CHAR_LF '\n'
|
||||||
|
#define CHAR_NL CHAR_LF
|
||||||
|
#define CHAR_NEL ((unsigned char)'\x85')
|
||||||
|
#define CHAR_ESC '\033'
|
||||||
|
#define CHAR_DEL '\177'
|
||||||
|
#define CHAR_NBSP ((unsigned char)'\xa0')
|
||||||
|
|
||||||
|
#define STR_LF "\n"
|
||||||
|
#define STR_NL STR_LF
|
||||||
|
#define STR_NEL "\x85"
|
||||||
|
#define STR_ESC "\033"
|
||||||
|
#define STR_DEL "\177"
|
||||||
|
|
||||||
|
#endif /* EBCDIC */
|
||||||
|
|
||||||
|
/* The remaining definitions work in both environments. */
|
||||||
|
|
||||||
|
#define CHAR_NUL '\0'
|
||||||
|
#define CHAR_HT '\t'
|
||||||
|
#define CHAR_VT '\v'
|
||||||
|
#define CHAR_FF '\f'
|
||||||
|
#define CHAR_CR '\r'
|
||||||
|
#define CHAR_BS '\b'
|
||||||
|
#define CHAR_BEL '\a'
|
||||||
|
|
||||||
|
#define CHAR_SPACE ' '
|
||||||
|
#define CHAR_EXCLAMATION_MARK '!'
|
||||||
|
#define CHAR_QUOTATION_MARK '"'
|
||||||
|
#define CHAR_NUMBER_SIGN '#'
|
||||||
|
#define CHAR_DOLLAR_SIGN '$'
|
||||||
|
#define CHAR_PERCENT_SIGN '%'
|
||||||
|
#define CHAR_AMPERSAND '&'
|
||||||
|
#define CHAR_APOSTROPHE '\''
|
||||||
|
#define CHAR_LEFT_PARENTHESIS '('
|
||||||
|
#define CHAR_RIGHT_PARENTHESIS ')'
|
||||||
|
#define CHAR_ASTERISK '*'
|
||||||
|
#define CHAR_PLUS '+'
|
||||||
|
#define CHAR_COMMA ','
|
||||||
|
#define CHAR_MINUS '-'
|
||||||
|
#define CHAR_DOT '.'
|
||||||
|
#define CHAR_SLASH '/'
|
||||||
|
#define CHAR_0 '0'
|
||||||
|
#define CHAR_1 '1'
|
||||||
|
#define CHAR_2 '2'
|
||||||
|
#define CHAR_3 '3'
|
||||||
|
#define CHAR_4 '4'
|
||||||
|
#define CHAR_5 '5'
|
||||||
|
#define CHAR_6 '6'
|
||||||
|
#define CHAR_7 '7'
|
||||||
|
#define CHAR_8 '8'
|
||||||
|
#define CHAR_9 '9'
|
||||||
|
#define CHAR_COLON ':'
|
||||||
|
#define CHAR_SEMICOLON ';'
|
||||||
|
#define CHAR_LESS_THAN_SIGN '<'
|
||||||
|
#define CHAR_EQUALS_SIGN '='
|
||||||
|
#define CHAR_GREATER_THAN_SIGN '>'
|
||||||
|
#define CHAR_QUESTION_MARK '?'
|
||||||
|
#define CHAR_COMMERCIAL_AT '@'
|
||||||
|
#define CHAR_A 'A'
|
||||||
|
#define CHAR_B 'B'
|
||||||
|
#define CHAR_C 'C'
|
||||||
|
#define CHAR_D 'D'
|
||||||
|
#define CHAR_E 'E'
|
||||||
|
#define CHAR_F 'F'
|
||||||
|
#define CHAR_G 'G'
|
||||||
|
#define CHAR_H 'H'
|
||||||
|
#define CHAR_I 'I'
|
||||||
|
#define CHAR_J 'J'
|
||||||
|
#define CHAR_K 'K'
|
||||||
|
#define CHAR_L 'L'
|
||||||
|
#define CHAR_M 'M'
|
||||||
|
#define CHAR_N 'N'
|
||||||
|
#define CHAR_O 'O'
|
||||||
|
#define CHAR_P 'P'
|
||||||
|
#define CHAR_Q 'Q'
|
||||||
|
#define CHAR_R 'R'
|
||||||
|
#define CHAR_S 'S'
|
||||||
|
#define CHAR_T 'T'
|
||||||
|
#define CHAR_U 'U'
|
||||||
|
#define CHAR_V 'V'
|
||||||
|
#define CHAR_W 'W'
|
||||||
|
#define CHAR_X 'X'
|
||||||
|
#define CHAR_Y 'Y'
|
||||||
|
#define CHAR_Z 'Z'
|
||||||
|
#define CHAR_LEFT_SQUARE_BRACKET '['
|
||||||
|
#define CHAR_BACKSLASH '\\'
|
||||||
|
#define CHAR_RIGHT_SQUARE_BRACKET ']'
|
||||||
|
#define CHAR_CIRCUMFLEX_ACCENT '^'
|
||||||
|
#define CHAR_UNDERSCORE '_'
|
||||||
|
#define CHAR_GRAVE_ACCENT '`'
|
||||||
|
#define CHAR_a 'a'
|
||||||
|
#define CHAR_b 'b'
|
||||||
|
#define CHAR_c 'c'
|
||||||
|
#define CHAR_d 'd'
|
||||||
|
#define CHAR_e 'e'
|
||||||
|
#define CHAR_f 'f'
|
||||||
|
#define CHAR_g 'g'
|
||||||
|
#define CHAR_h 'h'
|
||||||
|
#define CHAR_i 'i'
|
||||||
|
#define CHAR_j 'j'
|
||||||
|
#define CHAR_k 'k'
|
||||||
|
#define CHAR_l 'l'
|
||||||
|
#define CHAR_m 'm'
|
||||||
|
#define CHAR_n 'n'
|
||||||
|
#define CHAR_o 'o'
|
||||||
|
#define CHAR_p 'p'
|
||||||
|
#define CHAR_q 'q'
|
||||||
|
#define CHAR_r 'r'
|
||||||
|
#define CHAR_s 's'
|
||||||
|
#define CHAR_t 't'
|
||||||
|
#define CHAR_u 'u'
|
||||||
|
#define CHAR_v 'v'
|
||||||
|
#define CHAR_w 'w'
|
||||||
|
#define CHAR_x 'x'
|
||||||
|
#define CHAR_y 'y'
|
||||||
|
#define CHAR_z 'z'
|
||||||
|
#define CHAR_LEFT_CURLY_BRACKET '{'
|
||||||
|
#define CHAR_VERTICAL_LINE '|'
|
||||||
|
#define CHAR_RIGHT_CURLY_BRACKET '}'
|
||||||
|
#define CHAR_TILDE '~'
|
||||||
|
|
||||||
|
#define STR_HT "\t"
|
||||||
|
#define STR_VT "\v"
|
||||||
|
#define STR_FF "\f"
|
||||||
|
#define STR_CR "\r"
|
||||||
|
#define STR_BS "\b"
|
||||||
|
#define STR_BEL "\a"
|
||||||
|
|
||||||
|
#define STR_SPACE " "
|
||||||
|
#define STR_EXCLAMATION_MARK "!"
|
||||||
|
#define STR_QUOTATION_MARK "\""
|
||||||
|
#define STR_NUMBER_SIGN "#"
|
||||||
|
#define STR_DOLLAR_SIGN "$"
|
||||||
|
#define STR_PERCENT_SIGN "%"
|
||||||
|
#define STR_AMPERSAND "&"
|
||||||
|
#define STR_APOSTROPHE "'"
|
||||||
|
#define STR_LEFT_PARENTHESIS "("
|
||||||
|
#define STR_RIGHT_PARENTHESIS ")"
|
||||||
|
#define STR_ASTERISK "*"
|
||||||
|
#define STR_PLUS "+"
|
||||||
|
#define STR_COMMA ","
|
||||||
|
#define STR_MINUS "-"
|
||||||
|
#define STR_DOT "."
|
||||||
|
#define STR_SLASH "/"
|
||||||
|
#define STR_0 "0"
|
||||||
|
#define STR_1 "1"
|
||||||
|
#define STR_2 "2"
|
||||||
|
#define STR_3 "3"
|
||||||
|
#define STR_4 "4"
|
||||||
|
#define STR_5 "5"
|
||||||
|
#define STR_6 "6"
|
||||||
|
#define STR_7 "7"
|
||||||
|
#define STR_8 "8"
|
||||||
|
#define STR_9 "9"
|
||||||
|
#define STR_COLON ":"
|
||||||
|
#define STR_SEMICOLON ";"
|
||||||
|
#define STR_LESS_THAN_SIGN "<"
|
||||||
|
#define STR_EQUALS_SIGN "="
|
||||||
|
#define STR_GREATER_THAN_SIGN ">"
|
||||||
|
#define STR_QUESTION_MARK "?"
|
||||||
|
#define STR_COMMERCIAL_AT "@"
|
||||||
|
#define STR_A "A"
|
||||||
|
#define STR_B "B"
|
||||||
|
#define STR_C "C"
|
||||||
|
#define STR_D "D"
|
||||||
|
#define STR_E "E"
|
||||||
|
#define STR_F "F"
|
||||||
|
#define STR_G "G"
|
||||||
|
#define STR_H "H"
|
||||||
|
#define STR_I "I"
|
||||||
|
#define STR_J "J"
|
||||||
|
#define STR_K "K"
|
||||||
|
#define STR_L "L"
|
||||||
|
#define STR_M "M"
|
||||||
|
#define STR_N "N"
|
||||||
|
#define STR_O "O"
|
||||||
|
#define STR_P "P"
|
||||||
|
#define STR_Q "Q"
|
||||||
|
#define STR_R "R"
|
||||||
|
#define STR_S "S"
|
||||||
|
#define STR_T "T"
|
||||||
|
#define STR_U "U"
|
||||||
|
#define STR_V "V"
|
||||||
|
#define STR_W "W"
|
||||||
|
#define STR_X "X"
|
||||||
|
#define STR_Y "Y"
|
||||||
|
#define STR_Z "Z"
|
||||||
|
#define STR_LEFT_SQUARE_BRACKET "["
|
||||||
|
#define STR_BACKSLASH "\\"
|
||||||
|
#define STR_RIGHT_SQUARE_BRACKET "]"
|
||||||
|
#define STR_CIRCUMFLEX_ACCENT "^"
|
||||||
|
#define STR_UNDERSCORE "_"
|
||||||
|
#define STR_GRAVE_ACCENT "`"
|
||||||
|
#define STR_a "a"
|
||||||
|
#define STR_b "b"
|
||||||
|
#define STR_c "c"
|
||||||
|
#define STR_d "d"
|
||||||
|
#define STR_e "e"
|
||||||
|
#define STR_f "f"
|
||||||
|
#define STR_g "g"
|
||||||
|
#define STR_h "h"
|
||||||
|
#define STR_i "i"
|
||||||
|
#define STR_j "j"
|
||||||
|
#define STR_k "k"
|
||||||
|
#define STR_l "l"
|
||||||
|
#define STR_m "m"
|
||||||
|
#define STR_n "n"
|
||||||
|
#define STR_o "o"
|
||||||
|
#define STR_p "p"
|
||||||
|
#define STR_q "q"
|
||||||
|
#define STR_r "r"
|
||||||
|
#define STR_s "s"
|
||||||
|
#define STR_t "t"
|
||||||
|
#define STR_u "u"
|
||||||
|
#define STR_v "v"
|
||||||
|
#define STR_w "w"
|
||||||
|
#define STR_x "x"
|
||||||
|
#define STR_y "y"
|
||||||
|
#define STR_z "z"
|
||||||
|
#define STR_LEFT_CURLY_BRACKET "{"
|
||||||
|
#define STR_VERTICAL_LINE "|"
|
||||||
|
#define STR_RIGHT_CURLY_BRACKET "}"
|
||||||
|
#define STR_TILDE "~"
|
||||||
|
|
||||||
|
#define STRING_ACCEPT0 "ACCEPT\0"
|
||||||
|
#define STRING_COMMIT0 "COMMIT\0"
|
||||||
|
#define STRING_F0 "F\0"
|
||||||
|
#define STRING_FAIL0 "FAIL\0"
|
||||||
|
#define STRING_MARK0 "MARK\0"
|
||||||
|
#define STRING_PRUNE0 "PRUNE\0"
|
||||||
|
#define STRING_SKIP0 "SKIP\0"
|
||||||
|
#define STRING_THEN "THEN"
|
||||||
|
|
||||||
|
#define STRING_atomic0 "atomic\0"
|
||||||
|
#define STRING_pla0 "pla\0"
|
||||||
|
#define STRING_plb0 "plb\0"
|
||||||
|
#define STRING_napla0 "napla\0"
|
||||||
|
#define STRING_naplb0 "naplb\0"
|
||||||
|
#define STRING_nla0 "nla\0"
|
||||||
|
#define STRING_nlb0 "nlb\0"
|
||||||
|
#define STRING_scs0 "scs\0"
|
||||||
|
#define STRING_sr0 "sr\0"
|
||||||
|
#define STRING_asr0 "asr\0"
|
||||||
|
#define STRING_positive_lookahead0 "positive_lookahead\0"
|
||||||
|
#define STRING_positive_lookbehind0 "positive_lookbehind\0"
|
||||||
|
#define STRING_non_atomic_positive_lookahead0 "non_atomic_positive_lookahead\0"
|
||||||
|
#define STRING_non_atomic_positive_lookbehind0 "non_atomic_positive_lookbehind\0"
|
||||||
|
#define STRING_negative_lookahead0 "negative_lookahead\0"
|
||||||
|
#define STRING_negative_lookbehind0 "negative_lookbehind\0"
|
||||||
|
#define STRING_script_run0 "script_run\0"
|
||||||
|
#define STRING_atomic_script_run "atomic_script_run"
|
||||||
|
#define STRING_scan_substring0 "scan_substring\0"
|
||||||
|
|
||||||
|
#define STRING_alpha0 "alpha\0"
|
||||||
|
#define STRING_lower0 "lower\0"
|
||||||
|
#define STRING_upper0 "upper\0"
|
||||||
|
#define STRING_alnum0 "alnum\0"
|
||||||
|
#define STRING_ascii0 "ascii\0"
|
||||||
|
#define STRING_blank0 "blank\0"
|
||||||
|
#define STRING_cntrl0 "cntrl\0"
|
||||||
|
#define STRING_digit0 "digit\0"
|
||||||
|
#define STRING_graph0 "graph\0"
|
||||||
|
#define STRING_print0 "print\0"
|
||||||
|
#define STRING_punct0 "punct\0"
|
||||||
|
#define STRING_space0 "space\0"
|
||||||
|
#define STRING_word0 "word\0"
|
||||||
|
#define STRING_xdigit "xdigit"
|
||||||
|
|
||||||
|
#define STRING_DEFINE "DEFINE"
|
||||||
|
#define STRING_VERSION "VERSION"
|
||||||
|
#define STRING_WEIRD_STARTWORD "[:<:]]"
|
||||||
|
#define STRING_WEIRD_ENDWORD "[:>:]]"
|
||||||
|
|
||||||
|
#define STRING_CR_RIGHTPAR "CR)"
|
||||||
|
#define STRING_LF_RIGHTPAR "LF)"
|
||||||
|
#define STRING_CRLF_RIGHTPAR "CRLF)"
|
||||||
|
#define STRING_ANY_RIGHTPAR "ANY)"
|
||||||
|
#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"
|
||||||
|
#define STRING_NUL_RIGHTPAR "NUL)"
|
||||||
|
#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
|
||||||
|
#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
|
||||||
|
#define STRING_UTF8_RIGHTPAR "UTF8)"
|
||||||
|
#define STRING_UTF16_RIGHTPAR "UTF16)"
|
||||||
|
#define STRING_UTF32_RIGHTPAR "UTF32)"
|
||||||
|
#define STRING_UTF_RIGHTPAR "UTF)"
|
||||||
|
#define STRING_UCP_RIGHTPAR "UCP)"
|
||||||
|
#define STRING_NO_AUTO_POSSESS_RIGHTPAR "NO_AUTO_POSSESS)"
|
||||||
|
#define STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR "NO_DOTSTAR_ANCHOR)"
|
||||||
|
#define STRING_NO_JIT_RIGHTPAR "NO_JIT)"
|
||||||
|
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
|
||||||
|
#define STRING_NOTEMPTY_RIGHTPAR "NOTEMPTY)"
|
||||||
|
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)"
|
||||||
|
#define STRING_CASELESS_RESTRICT_RIGHTPAR "CASELESS_RESTRICT)"
|
||||||
|
#define STRING_TURKISH_CASING_RIGHTPAR "TURKISH_CASING)"
|
||||||
|
#define STRING_LIMIT_HEAP_EQ "LIMIT_HEAP="
|
||||||
|
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
|
||||||
|
#define STRING_LIMIT_DEPTH_EQ "LIMIT_DEPTH="
|
||||||
|
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
|
||||||
|
#define STRING_MARK "MARK"
|
||||||
|
|
||||||
|
#define STRING_bc "bc"
|
||||||
|
#define STRING_bidiclass "bidiclass"
|
||||||
|
#define STRING_sc "sc"
|
||||||
|
#define STRING_script "script"
|
||||||
|
#define STRING_scriptextensions "scriptextensions"
|
||||||
|
#define STRING_scx "scx"
|
||||||
|
|
||||||
|
#else /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
/* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This
|
||||||
|
works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode
|
||||||
|
only. */
|
||||||
|
|
||||||
|
#define CHAR_HT '\011'
|
||||||
|
#define CHAR_VT '\013'
|
||||||
|
#define CHAR_FF '\014'
|
||||||
|
#define CHAR_CR '\015'
|
||||||
|
#define CHAR_LF '\012'
|
||||||
|
#define CHAR_NL CHAR_LF
|
||||||
|
#define CHAR_NEL ((unsigned char)'\x85')
|
||||||
|
#define CHAR_BS '\010'
|
||||||
|
#define CHAR_BEL '\007'
|
||||||
|
#define CHAR_ESC '\033'
|
||||||
|
#define CHAR_DEL '\177'
|
||||||
|
|
||||||
|
#define CHAR_NUL '\0'
|
||||||
|
#define CHAR_SPACE '\040'
|
||||||
|
#define CHAR_EXCLAMATION_MARK '\041'
|
||||||
|
#define CHAR_QUOTATION_MARK '\042'
|
||||||
|
#define CHAR_NUMBER_SIGN '\043'
|
||||||
|
#define CHAR_DOLLAR_SIGN '\044'
|
||||||
|
#define CHAR_PERCENT_SIGN '\045'
|
||||||
|
#define CHAR_AMPERSAND '\046'
|
||||||
|
#define CHAR_APOSTROPHE '\047'
|
||||||
|
#define CHAR_LEFT_PARENTHESIS '\050'
|
||||||
|
#define CHAR_RIGHT_PARENTHESIS '\051'
|
||||||
|
#define CHAR_ASTERISK '\052'
|
||||||
|
#define CHAR_PLUS '\053'
|
||||||
|
#define CHAR_COMMA '\054'
|
||||||
|
#define CHAR_MINUS '\055'
|
||||||
|
#define CHAR_DOT '\056'
|
||||||
|
#define CHAR_SLASH '\057'
|
||||||
|
#define CHAR_0 '\060'
|
||||||
|
#define CHAR_1 '\061'
|
||||||
|
#define CHAR_2 '\062'
|
||||||
|
#define CHAR_3 '\063'
|
||||||
|
#define CHAR_4 '\064'
|
||||||
|
#define CHAR_5 '\065'
|
||||||
|
#define CHAR_6 '\066'
|
||||||
|
#define CHAR_7 '\067'
|
||||||
|
#define CHAR_8 '\070'
|
||||||
|
#define CHAR_9 '\071'
|
||||||
|
#define CHAR_COLON '\072'
|
||||||
|
#define CHAR_SEMICOLON '\073'
|
||||||
|
#define CHAR_LESS_THAN_SIGN '\074'
|
||||||
|
#define CHAR_EQUALS_SIGN '\075'
|
||||||
|
#define CHAR_GREATER_THAN_SIGN '\076'
|
||||||
|
#define CHAR_QUESTION_MARK '\077'
|
||||||
|
#define CHAR_COMMERCIAL_AT '\100'
|
||||||
|
#define CHAR_A '\101'
|
||||||
|
#define CHAR_B '\102'
|
||||||
|
#define CHAR_C '\103'
|
||||||
|
#define CHAR_D '\104'
|
||||||
|
#define CHAR_E '\105'
|
||||||
|
#define CHAR_F '\106'
|
||||||
|
#define CHAR_G '\107'
|
||||||
|
#define CHAR_H '\110'
|
||||||
|
#define CHAR_I '\111'
|
||||||
|
#define CHAR_J '\112'
|
||||||
|
#define CHAR_K '\113'
|
||||||
|
#define CHAR_L '\114'
|
||||||
|
#define CHAR_M '\115'
|
||||||
|
#define CHAR_N '\116'
|
||||||
|
#define CHAR_O '\117'
|
||||||
|
#define CHAR_P '\120'
|
||||||
|
#define CHAR_Q '\121'
|
||||||
|
#define CHAR_R '\122'
|
||||||
|
#define CHAR_S '\123'
|
||||||
|
#define CHAR_T '\124'
|
||||||
|
#define CHAR_U '\125'
|
||||||
|
#define CHAR_V '\126'
|
||||||
|
#define CHAR_W '\127'
|
||||||
|
#define CHAR_X '\130'
|
||||||
|
#define CHAR_Y '\131'
|
||||||
|
#define CHAR_Z '\132'
|
||||||
|
#define CHAR_LEFT_SQUARE_BRACKET '\133'
|
||||||
|
#define CHAR_BACKSLASH '\134'
|
||||||
|
#define CHAR_RIGHT_SQUARE_BRACKET '\135'
|
||||||
|
#define CHAR_CIRCUMFLEX_ACCENT '\136'
|
||||||
|
#define CHAR_UNDERSCORE '\137'
|
||||||
|
#define CHAR_GRAVE_ACCENT '\140'
|
||||||
|
#define CHAR_a '\141'
|
||||||
|
#define CHAR_b '\142'
|
||||||
|
#define CHAR_c '\143'
|
||||||
|
#define CHAR_d '\144'
|
||||||
|
#define CHAR_e '\145'
|
||||||
|
#define CHAR_f '\146'
|
||||||
|
#define CHAR_g '\147'
|
||||||
|
#define CHAR_h '\150'
|
||||||
|
#define CHAR_i '\151'
|
||||||
|
#define CHAR_j '\152'
|
||||||
|
#define CHAR_k '\153'
|
||||||
|
#define CHAR_l '\154'
|
||||||
|
#define CHAR_m '\155'
|
||||||
|
#define CHAR_n '\156'
|
||||||
|
#define CHAR_o '\157'
|
||||||
|
#define CHAR_p '\160'
|
||||||
|
#define CHAR_q '\161'
|
||||||
|
#define CHAR_r '\162'
|
||||||
|
#define CHAR_s '\163'
|
||||||
|
#define CHAR_t '\164'
|
||||||
|
#define CHAR_u '\165'
|
||||||
|
#define CHAR_v '\166'
|
||||||
|
#define CHAR_w '\167'
|
||||||
|
#define CHAR_x '\170'
|
||||||
|
#define CHAR_y '\171'
|
||||||
|
#define CHAR_z '\172'
|
||||||
|
#define CHAR_LEFT_CURLY_BRACKET '\173'
|
||||||
|
#define CHAR_VERTICAL_LINE '\174'
|
||||||
|
#define CHAR_RIGHT_CURLY_BRACKET '\175'
|
||||||
|
#define CHAR_TILDE '\176'
|
||||||
|
#define CHAR_NBSP ((unsigned char)'\xa0')
|
||||||
|
|
||||||
|
#define STR_HT "\011"
|
||||||
|
#define STR_VT "\013"
|
||||||
|
#define STR_FF "\014"
|
||||||
|
#define STR_CR "\015"
|
||||||
|
#define STR_NL "\012"
|
||||||
|
#define STR_BS "\010"
|
||||||
|
#define STR_BEL "\007"
|
||||||
|
#define STR_ESC "\033"
|
||||||
|
#define STR_DEL "\177"
|
||||||
|
|
||||||
|
#define STR_SPACE "\040"
|
||||||
|
#define STR_EXCLAMATION_MARK "\041"
|
||||||
|
#define STR_QUOTATION_MARK "\042"
|
||||||
|
#define STR_NUMBER_SIGN "\043"
|
||||||
|
#define STR_DOLLAR_SIGN "\044"
|
||||||
|
#define STR_PERCENT_SIGN "\045"
|
||||||
|
#define STR_AMPERSAND "\046"
|
||||||
|
#define STR_APOSTROPHE "\047"
|
||||||
|
#define STR_LEFT_PARENTHESIS "\050"
|
||||||
|
#define STR_RIGHT_PARENTHESIS "\051"
|
||||||
|
#define STR_ASTERISK "\052"
|
||||||
|
#define STR_PLUS "\053"
|
||||||
|
#define STR_COMMA "\054"
|
||||||
|
#define STR_MINUS "\055"
|
||||||
|
#define STR_DOT "\056"
|
||||||
|
#define STR_SLASH "\057"
|
||||||
|
#define STR_0 "\060"
|
||||||
|
#define STR_1 "\061"
|
||||||
|
#define STR_2 "\062"
|
||||||
|
#define STR_3 "\063"
|
||||||
|
#define STR_4 "\064"
|
||||||
|
#define STR_5 "\065"
|
||||||
|
#define STR_6 "\066"
|
||||||
|
#define STR_7 "\067"
|
||||||
|
#define STR_8 "\070"
|
||||||
|
#define STR_9 "\071"
|
||||||
|
#define STR_COLON "\072"
|
||||||
|
#define STR_SEMICOLON "\073"
|
||||||
|
#define STR_LESS_THAN_SIGN "\074"
|
||||||
|
#define STR_EQUALS_SIGN "\075"
|
||||||
|
#define STR_GREATER_THAN_SIGN "\076"
|
||||||
|
#define STR_QUESTION_MARK "\077"
|
||||||
|
#define STR_COMMERCIAL_AT "\100"
|
||||||
|
#define STR_A "\101"
|
||||||
|
#define STR_B "\102"
|
||||||
|
#define STR_C "\103"
|
||||||
|
#define STR_D "\104"
|
||||||
|
#define STR_E "\105"
|
||||||
|
#define STR_F "\106"
|
||||||
|
#define STR_G "\107"
|
||||||
|
#define STR_H "\110"
|
||||||
|
#define STR_I "\111"
|
||||||
|
#define STR_J "\112"
|
||||||
|
#define STR_K "\113"
|
||||||
|
#define STR_L "\114"
|
||||||
|
#define STR_M "\115"
|
||||||
|
#define STR_N "\116"
|
||||||
|
#define STR_O "\117"
|
||||||
|
#define STR_P "\120"
|
||||||
|
#define STR_Q "\121"
|
||||||
|
#define STR_R "\122"
|
||||||
|
#define STR_S "\123"
|
||||||
|
#define STR_T "\124"
|
||||||
|
#define STR_U "\125"
|
||||||
|
#define STR_V "\126"
|
||||||
|
#define STR_W "\127"
|
||||||
|
#define STR_X "\130"
|
||||||
|
#define STR_Y "\131"
|
||||||
|
#define STR_Z "\132"
|
||||||
|
#define STR_LEFT_SQUARE_BRACKET "\133"
|
||||||
|
#define STR_BACKSLASH "\134"
|
||||||
|
#define STR_RIGHT_SQUARE_BRACKET "\135"
|
||||||
|
#define STR_CIRCUMFLEX_ACCENT "\136"
|
||||||
|
#define STR_UNDERSCORE "\137"
|
||||||
|
#define STR_GRAVE_ACCENT "\140"
|
||||||
|
#define STR_a "\141"
|
||||||
|
#define STR_b "\142"
|
||||||
|
#define STR_c "\143"
|
||||||
|
#define STR_d "\144"
|
||||||
|
#define STR_e "\145"
|
||||||
|
#define STR_f "\146"
|
||||||
|
#define STR_g "\147"
|
||||||
|
#define STR_h "\150"
|
||||||
|
#define STR_i "\151"
|
||||||
|
#define STR_j "\152"
|
||||||
|
#define STR_k "\153"
|
||||||
|
#define STR_l "\154"
|
||||||
|
#define STR_m "\155"
|
||||||
|
#define STR_n "\156"
|
||||||
|
#define STR_o "\157"
|
||||||
|
#define STR_p "\160"
|
||||||
|
#define STR_q "\161"
|
||||||
|
#define STR_r "\162"
|
||||||
|
#define STR_s "\163"
|
||||||
|
#define STR_t "\164"
|
||||||
|
#define STR_u "\165"
|
||||||
|
#define STR_v "\166"
|
||||||
|
#define STR_w "\167"
|
||||||
|
#define STR_x "\170"
|
||||||
|
#define STR_y "\171"
|
||||||
|
#define STR_z "\172"
|
||||||
|
#define STR_LEFT_CURLY_BRACKET "\173"
|
||||||
|
#define STR_VERTICAL_LINE "\174"
|
||||||
|
#define STR_RIGHT_CURLY_BRACKET "\175"
|
||||||
|
#define STR_TILDE "\176"
|
||||||
|
|
||||||
|
#define STRING_ACCEPT0 STR_A STR_C STR_C STR_E STR_P STR_T "\0"
|
||||||
|
#define STRING_COMMIT0 STR_C STR_O STR_M STR_M STR_I STR_T "\0"
|
||||||
|
#define STRING_F0 STR_F "\0"
|
||||||
|
#define STRING_FAIL0 STR_F STR_A STR_I STR_L "\0"
|
||||||
|
#define STRING_MARK0 STR_M STR_A STR_R STR_K "\0"
|
||||||
|
#define STRING_PRUNE0 STR_P STR_R STR_U STR_N STR_E "\0"
|
||||||
|
#define STRING_SKIP0 STR_S STR_K STR_I STR_P "\0"
|
||||||
|
#define STRING_THEN STR_T STR_H STR_E STR_N
|
||||||
|
|
||||||
|
#define STRING_atomic0 STR_a STR_t STR_o STR_m STR_i STR_c "\0"
|
||||||
|
#define STRING_pla0 STR_p STR_l STR_a "\0"
|
||||||
|
#define STRING_plb0 STR_p STR_l STR_b "\0"
|
||||||
|
#define STRING_napla0 STR_n STR_a STR_p STR_l STR_a "\0"
|
||||||
|
#define STRING_naplb0 STR_n STR_a STR_p STR_l STR_b "\0"
|
||||||
|
#define STRING_nla0 STR_n STR_l STR_a "\0"
|
||||||
|
#define STRING_nlb0 STR_n STR_l STR_b "\0"
|
||||||
|
#define STRING_scs0 STR_s STR_c STR_s "\0"
|
||||||
|
#define STRING_sr0 STR_s STR_r "\0"
|
||||||
|
#define STRING_asr0 STR_a STR_s STR_r "\0"
|
||||||
|
#define STRING_positive_lookahead0 STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
|
||||||
|
#define STRING_positive_lookbehind0 STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
|
||||||
|
#define STRING_non_atomic_positive_lookahead0 STR_n STR_o STR_n STR_UNDERSCORE STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
|
||||||
|
#define STRING_non_atomic_positive_lookbehind0 STR_n STR_o STR_n STR_UNDERSCORE STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
|
||||||
|
#define STRING_negative_lookahead0 STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
|
||||||
|
#define STRING_negative_lookbehind0 STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
|
||||||
|
#define STRING_script_run0 STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n "\0"
|
||||||
|
#define STRING_atomic_script_run STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n
|
||||||
|
#define STRING_scan_substring0 STR_s STR_c STR_a STR_n STR_UNDERSCORE STR_s STR_u STR_b STR_s STR_t STR_r STR_i STR_n STR_g "\0"
|
||||||
|
|
||||||
|
#define STRING_alpha0 STR_a STR_l STR_p STR_h STR_a "\0"
|
||||||
|
#define STRING_lower0 STR_l STR_o STR_w STR_e STR_r "\0"
|
||||||
|
#define STRING_upper0 STR_u STR_p STR_p STR_e STR_r "\0"
|
||||||
|
#define STRING_alnum0 STR_a STR_l STR_n STR_u STR_m "\0"
|
||||||
|
#define STRING_ascii0 STR_a STR_s STR_c STR_i STR_i "\0"
|
||||||
|
#define STRING_blank0 STR_b STR_l STR_a STR_n STR_k "\0"
|
||||||
|
#define STRING_cntrl0 STR_c STR_n STR_t STR_r STR_l "\0"
|
||||||
|
#define STRING_digit0 STR_d STR_i STR_g STR_i STR_t "\0"
|
||||||
|
#define STRING_graph0 STR_g STR_r STR_a STR_p STR_h "\0"
|
||||||
|
#define STRING_print0 STR_p STR_r STR_i STR_n STR_t "\0"
|
||||||
|
#define STRING_punct0 STR_p STR_u STR_n STR_c STR_t "\0"
|
||||||
|
#define STRING_space0 STR_s STR_p STR_a STR_c STR_e "\0"
|
||||||
|
#define STRING_word0 STR_w STR_o STR_r STR_d "\0"
|
||||||
|
#define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t
|
||||||
|
|
||||||
|
#define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E
|
||||||
|
#define STRING_VERSION STR_V STR_E STR_R STR_S STR_I STR_O STR_N
|
||||||
|
#define STRING_WEIRD_STARTWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
|
||||||
|
#define STRING_WEIRD_ENDWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
|
||||||
|
|
||||||
|
#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_NUL_RIGHTPAR STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_UTF16_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_UTF32_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_NO_AUTO_POSSESS_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_A STR_U STR_T STR_O STR_UNDERSCORE STR_P STR_O STR_S STR_S STR_E STR_S STR_S STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_D STR_O STR_T STR_S STR_T STR_A STR_R STR_UNDERSCORE STR_A STR_N STR_C STR_H STR_O STR_R STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_NO_JIT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_J STR_I STR_T STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_NOTEMPTY_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_CASELESS_RESTRICT_RIGHTPAR STR_C STR_A STR_S STR_E STR_L STR_E STR_S STR_S STR_UNDERSCORE STR_R STR_E STR_S STR_T STR_R STR_I STR_C STR_T STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_TURKISH_CASING_RIGHTPAR STR_T STR_U STR_R STR_K STR_I STR_S STR_H STR_UNDERSCORE STR_C STR_A STR_S STR_I STR_N STR_G STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_LIMIT_HEAP_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_H STR_E STR_A STR_P STR_EQUALS_SIGN
|
||||||
|
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
|
||||||
|
#define STRING_LIMIT_DEPTH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_D STR_E STR_P STR_T STR_H STR_EQUALS_SIGN
|
||||||
|
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
|
||||||
|
#define STRING_MARK STR_M STR_A STR_R STR_K
|
||||||
|
|
||||||
|
#define STRING_bc STR_b STR_c
|
||||||
|
#define STRING_bidiclass STR_b STR_i STR_d STR_i STR_c STR_l STR_a STR_s STR_s
|
||||||
|
#define STRING_sc STR_s STR_c
|
||||||
|
#define STRING_script STR_s STR_c STR_r STR_i STR_p STR_t
|
||||||
|
#define STRING_scriptextensions STR_s STR_c STR_r STR_i STR_p STR_t STR_e STR_x STR_t STR_e STR_n STR_s STR_i STR_o STR_n STR_s
|
||||||
|
#define STRING_scx STR_s STR_c STR_x
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
/* -------------------- End of character and string names -------------------*/
|
||||||
|
|
||||||
|
/* -------------------- Definitions for compiled patterns -------------------*/
|
||||||
|
|
||||||
|
/* Codes for different types of Unicode property. If these definitions are
|
||||||
|
changed, the autopossessifying table in pcre2_auto_possess.c must be updated to
|
||||||
|
match. */
|
||||||
|
|
||||||
|
#define PT_LAMP 0 /* L& - the union of Lu, Ll, Lt */
|
||||||
|
#define PT_GC 1 /* Specified general characteristic (e.g. L) */
|
||||||
|
#define PT_PC 2 /* Specified particular characteristic (e.g. Lu) */
|
||||||
|
#define PT_SC 3 /* Script only (e.g. Han) */
|
||||||
|
#define PT_SCX 4 /* Script extensions (includes SC) */
|
||||||
|
#define PT_ALNUM 5 /* Alphanumeric - the union of L and N */
|
||||||
|
#define PT_SPACE 6 /* Perl space - general category Z plus 9,10,12,13 */
|
||||||
|
#define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */
|
||||||
|
#define PT_WORD 8 /* Word - L, N, Mn, or Pc */
|
||||||
|
#define PT_CLIST 9 /* Pseudo-property: match character list */
|
||||||
|
#define PT_UCNC 10 /* Universal Character nameable character */
|
||||||
|
#define PT_BIDICL 11 /* Specified bidi class */
|
||||||
|
#define PT_BOOL 12 /* Boolean property */
|
||||||
|
#define PT_ANY 13 /* Must be the last entry!
|
||||||
|
Any property - matches all chars */
|
||||||
|
#define PT_TABSIZE PT_ANY /* Size of square table for autopossessify tests */
|
||||||
|
|
||||||
|
/* The following special properties are used only in XCLASS items, when POSIX
|
||||||
|
classes are specified and PCRE2_UCP is set - in other words, for Unicode
|
||||||
|
handling of these classes. They are not available via the \p or \P escapes like
|
||||||
|
those in the above list, and so they do not take part in the autopossessifying
|
||||||
|
table. */
|
||||||
|
|
||||||
|
#define PT_PXGRAPH 14 /* [:graph:] - characters that mark the paper */
|
||||||
|
#define PT_PXPRINT 15 /* [:print:] - [:graph:] plus non-control spaces */
|
||||||
|
#define PT_PXPUNCT 16 /* [:punct:] - punctuation characters */
|
||||||
|
#define PT_PXXDIGIT 17 /* [:xdigit:] - hex digits */
|
||||||
|
|
||||||
|
/* This value is used when parsing \p and \P escapes to indicate that neither
|
||||||
|
\p{script:...} nor \p{scx:...} has been encountered. */
|
||||||
|
|
||||||
|
#define PT_NOTSCRIPT 255
|
||||||
|
|
||||||
|
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
|
||||||
|
contain characters with values greater than 255. */
|
||||||
|
|
||||||
|
#define XCL_NOT 0x01 /* Flag: this is a negative class */
|
||||||
|
#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */
|
||||||
|
#define XCL_HASPROP 0x04 /* Flag: property checks are present. */
|
||||||
|
|
||||||
|
#define XCL_END 0 /* Marks end of individual items */
|
||||||
|
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */
|
||||||
|
#define XCL_RANGE 2 /* A range (two multibyte chars) follows */
|
||||||
|
#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */
|
||||||
|
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
|
||||||
|
/* This value represents the beginning of character lists. The value
|
||||||
|
is 16 bit long, and stored as a high and low byte pair in 8 bit mode.
|
||||||
|
The lower 12 bit contains information about character lists (see later). */
|
||||||
|
#define XCL_LIST (sizeof(PCRE2_UCHAR) == 1 ? 0x10 : 0x1000)
|
||||||
|
|
||||||
|
/* When a character class contains many characters/ranges,
|
||||||
|
they are stored in character lists. There are four character
|
||||||
|
lists which contain characters/ranges within a given range.
|
||||||
|
|
||||||
|
The name, character range and item size for each list:
|
||||||
|
Low16 [0x100 - 0x7fff] 16 bit items
|
||||||
|
High16 [0x8000 - 0xffff] 16 bit items
|
||||||
|
Low32 [0x10000 - 0x7fffffff] 32 bit items
|
||||||
|
High32 [0x80000000 - 0xffffffff] 32 bit items
|
||||||
|
|
||||||
|
The Low32 character list is used only when utf encoding or 32 bit
|
||||||
|
character width is enabled, and the High32 character is used only
|
||||||
|
when 32 bit character width is enabled.
|
||||||
|
|
||||||
|
Each character list contain items. The lowest bit represents that
|
||||||
|
an item is the beginning of a range (bit is cleared), or not (bit
|
||||||
|
is set). The other bits represent the character shifted left by
|
||||||
|
one, so its highest bit is discarded. Due to the layout of character
|
||||||
|
lists, the highest bit of a character is always known:
|
||||||
|
|
||||||
|
Low16 and Low32: the highest bit is always zero
|
||||||
|
High16 and High32: the highest bit is always one
|
||||||
|
|
||||||
|
The items are ordered in increasing order, so binary search can be
|
||||||
|
used to find the lower bound of an input character. The lower bound
|
||||||
|
is the highest item, which value is less or equal than the input
|
||||||
|
character. If the lower bit of the item is cleard, or the character
|
||||||
|
stored in the item equals to the input character, the input
|
||||||
|
character is in the character list. */
|
||||||
|
|
||||||
|
/* Character list constants. */
|
||||||
|
#define XCL_CHAR_LIST_LOW_16_START 0x100
|
||||||
|
#define XCL_CHAR_LIST_LOW_16_END 0x7fff
|
||||||
|
#define XCL_CHAR_LIST_LOW_16_ADD 0x0
|
||||||
|
|
||||||
|
#define XCL_CHAR_LIST_HIGH_16_START 0x8000
|
||||||
|
#define XCL_CHAR_LIST_HIGH_16_END 0xffff
|
||||||
|
#define XCL_CHAR_LIST_HIGH_16_ADD 0x8000
|
||||||
|
|
||||||
|
#define XCL_CHAR_LIST_LOW_32_START 0x10000
|
||||||
|
#define XCL_CHAR_LIST_LOW_32_END 0x7fffffff
|
||||||
|
#define XCL_CHAR_LIST_LOW_32_ADD 0x0
|
||||||
|
|
||||||
|
#define XCL_CHAR_LIST_HIGH_32_START 0x80000000
|
||||||
|
#define XCL_CHAR_LIST_HIGH_32_END 0xffffffff
|
||||||
|
#define XCL_CHAR_LIST_HIGH_32_ADD 0x80000000
|
||||||
|
|
||||||
|
/* Mask for getting the descriptors of character list ranges.
|
||||||
|
Each descriptor has XCL_TYPE_BIT_LEN bits, and can be processed
|
||||||
|
by XCL_BEGIN_WITH_RANGE and XCL_ITEM_COUNT_MASK macros. */
|
||||||
|
#define XCL_TYPE_MASK 0xfff
|
||||||
|
#define XCL_TYPE_BIT_LEN 3
|
||||||
|
/* If this bit is set, the first item of the character list is the
|
||||||
|
end of a range, which started before the starting character of the
|
||||||
|
character list. */
|
||||||
|
#define XCL_BEGIN_WITH_RANGE 0x4
|
||||||
|
/* Number of items in the character list: 0, 1, or 2. The value 3
|
||||||
|
represents that the item count is stored at the begining of the
|
||||||
|
character list. The item count has the same width as the items
|
||||||
|
in the character list (e.g. 16 bit for Low16 and High16 lists). */
|
||||||
|
#define XCL_ITEM_COUNT_MASK 0x3
|
||||||
|
/* Shift and flag for constructing character list items. The XCL_CHAR_END
|
||||||
|
is set, when the item is not the beginning of a range. The XCL_CHAR_SHIFT
|
||||||
|
can be used to encode / decode the character value stored in an item. */
|
||||||
|
#define XCL_CHAR_END 0x1
|
||||||
|
#define XCL_CHAR_SHIFT 1
|
||||||
|
|
||||||
|
/* Flag bits for an extended class (OP_ECLASS), which is used for complex
|
||||||
|
character matches such as [\p{Greek} && \p{Ll}]. */
|
||||||
|
|
||||||
|
#define ECL_MAP 0x01 /* Flag: a 32-byte map is present */
|
||||||
|
|
||||||
|
/* Type tags for the items stored in an extended class (OP_ECLASS). These items
|
||||||
|
follow the OP_ECLASS's flag char and bitmap, and represent a Reverse Polish
|
||||||
|
Notation list of operands and operators manipulating a stack of bits. */
|
||||||
|
|
||||||
|
#define ECL_AND 1 /* Pop two from the stack, AND, and push result. */
|
||||||
|
#define ECL_OR 2 /* Pop two from the stack, OR, and push result. */
|
||||||
|
#define ECL_XOR 3 /* Pop two from the stack, XOR, and push result. */
|
||||||
|
#define ECL_NOT 4 /* Pop one from the stack, NOT, and push result. */
|
||||||
|
#define ECL_XCLASS 5 /* XCLASS nested within ECLASS; match and push result. */
|
||||||
|
#define ECL_ANY 6 /* Temporary, only used during compilation. */
|
||||||
|
#define ECL_NONE 7 /* Temporary, only used during compilation. */
|
||||||
|
|
||||||
|
/* These are escaped items that aren't just an encoding of a particular data
|
||||||
|
value such as \n. They must have non-zero values, as check_escape() returns 0
|
||||||
|
for a data character. In the escapes[] table in pcre2_compile.c their values
|
||||||
|
are negated in order to distinguish them from data values.
|
||||||
|
|
||||||
|
They must appear here in the same order as in the opcode definitions below, up
|
||||||
|
to ESC_z. There's a dummy for OP_ALLANY because it corresponds to "." in DOTALL
|
||||||
|
mode rather than an escape sequence. It is also used for [^] in JavaScript
|
||||||
|
compatibility mode, and for \C in non-utf mode. In non-DOTALL mode, "." behaves
|
||||||
|
like \N.
|
||||||
|
|
||||||
|
ESC_ub is a special return from check_escape() when, in BSUX mode, \u{ is not
|
||||||
|
followed by hex digits and }, in which case it should mean a literal "u"
|
||||||
|
followed by a literal "{". This hack is necessary for cases like \u{ 12}
|
||||||
|
because without it, this is interpreted as u{12} now that spaces are allowed in
|
||||||
|
quantifiers.
|
||||||
|
|
||||||
|
Negative numbers are used to encode a backreference (\1, \2, \3, etc.) in
|
||||||
|
check_escape(). There are tests in the code for an escape greater than ESC_b
|
||||||
|
and less than ESC_Z to detect the types that may be repeated. These are the
|
||||||
|
types that consume characters. If any new escapes are put in between that don't
|
||||||
|
consume a character, that code will have to change. */
|
||||||
|
|
||||||
|
enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
|
||||||
|
ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,
|
||||||
|
ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z,
|
||||||
|
ESC_E, ESC_Q, ESC_g, ESC_k, ESC_ub };
|
||||||
|
|
||||||
|
|
||||||
|
/********************** Opcode definitions ******************/
|
||||||
|
|
||||||
|
/****** NOTE NOTE NOTE ******
|
||||||
|
|
||||||
|
Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in
|
||||||
|
order to the list of escapes immediately above. Furthermore, values up to
|
||||||
|
OP_DOLLM must not be changed without adjusting the table called autoposstab in
|
||||||
|
pcre2_auto_possess.c.
|
||||||
|
|
||||||
|
Whenever this list is updated, the two macro definitions that follow must be
|
||||||
|
updated to match. The possessification table called "opcode_possessify" in
|
||||||
|
pcre2_compile.c must also be updated, and also the tables called "coptable"
|
||||||
|
and "poptable" in pcre2_dfa_match.c.
|
||||||
|
|
||||||
|
****** NOTE NOTE NOTE ******/
|
||||||
|
|
||||||
|
|
||||||
|
/* The values between FIRST_AUTOTAB_OP and LAST_AUTOTAB_RIGHT_OP, inclusive,
|
||||||
|
are used in a table for deciding whether a repeated character type can be
|
||||||
|
auto-possessified. */
|
||||||
|
|
||||||
|
#define FIRST_AUTOTAB_OP OP_NOT_DIGIT
|
||||||
|
#define LAST_AUTOTAB_LEFT_OP OP_EXTUNI
|
||||||
|
#define LAST_AUTOTAB_RIGHT_OP OP_DOLLM
|
||||||
|
|
||||||
|
enum {
|
||||||
|
OP_END, /* 0 End of pattern */
|
||||||
|
|
||||||
|
/* Values corresponding to backslashed metacharacters */
|
||||||
|
|
||||||
|
OP_SOD, /* 1 Start of data: \A */
|
||||||
|
OP_SOM, /* 2 Start of match (subject + offset): \G */
|
||||||
|
OP_SET_SOM, /* 3 Set start of match (\K) */
|
||||||
|
OP_NOT_WORD_BOUNDARY, /* 4 \B -- see also OP_NOT_UCP_WORD_BOUNDARY */
|
||||||
|
OP_WORD_BOUNDARY, /* 5 \b -- see also OP_UCP_WORD_BOUNDARY */
|
||||||
|
OP_NOT_DIGIT, /* 6 \D */
|
||||||
|
OP_DIGIT, /* 7 \d */
|
||||||
|
OP_NOT_WHITESPACE, /* 8 \S */
|
||||||
|
OP_WHITESPACE, /* 9 \s */
|
||||||
|
OP_NOT_WORDCHAR, /* 10 \W */
|
||||||
|
OP_WORDCHAR, /* 11 \w */
|
||||||
|
|
||||||
|
OP_ANY, /* 12 Match any character except newline (\N) */
|
||||||
|
OP_ALLANY, /* 13 Match any character */
|
||||||
|
OP_ANYBYTE, /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */
|
||||||
|
OP_NOTPROP, /* 15 \P (not Unicode property) */
|
||||||
|
OP_PROP, /* 16 \p (Unicode property) */
|
||||||
|
OP_ANYNL, /* 17 \R (any newline sequence) */
|
||||||
|
OP_NOT_HSPACE, /* 18 \H (not horizontal whitespace) */
|
||||||
|
OP_HSPACE, /* 19 \h (horizontal whitespace) */
|
||||||
|
OP_NOT_VSPACE, /* 20 \V (not vertical whitespace) */
|
||||||
|
OP_VSPACE, /* 21 \v (vertical whitespace) */
|
||||||
|
OP_EXTUNI, /* 22 \X (extended Unicode sequence */
|
||||||
|
OP_EODN, /* 23 End of data or \n at end of data (\Z) */
|
||||||
|
OP_EOD, /* 24 End of data (\z) */
|
||||||
|
|
||||||
|
/* Line end assertions */
|
||||||
|
|
||||||
|
OP_DOLL, /* 25 End of line - not multiline */
|
||||||
|
OP_DOLLM, /* 26 End of line - multiline */
|
||||||
|
OP_CIRC, /* 27 Start of line - not multiline */
|
||||||
|
OP_CIRCM, /* 28 Start of line - multiline */
|
||||||
|
|
||||||
|
/* Single characters; caseful must precede the caseless ones, and these
|
||||||
|
must remain in this order, and adjacent. */
|
||||||
|
|
||||||
|
OP_CHAR, /* 29 Match one character, casefully */
|
||||||
|
OP_CHARI, /* 30 Match one character, caselessly */
|
||||||
|
OP_NOT, /* 31 Match one character, not the given one, casefully */
|
||||||
|
OP_NOTI, /* 32 Match one character, not the given one, caselessly */
|
||||||
|
|
||||||
|
/* The following sets of 13 opcodes must always be kept in step because
|
||||||
|
the offset from the first one is used to generate the others. */
|
||||||
|
|
||||||
|
/* Repeated characters; caseful must precede the caseless ones */
|
||||||
|
|
||||||
|
OP_STAR, /* 33 The maximizing and minimizing versions of */
|
||||||
|
OP_MINSTAR, /* 34 these six opcodes must come in pairs, with */
|
||||||
|
OP_PLUS, /* 35 the minimizing one second. */
|
||||||
|
OP_MINPLUS, /* 36 */
|
||||||
|
OP_QUERY, /* 37 */
|
||||||
|
OP_MINQUERY, /* 38 */
|
||||||
|
|
||||||
|
OP_UPTO, /* 39 From 0 to n matches of one character, caseful*/
|
||||||
|
OP_MINUPTO, /* 40 */
|
||||||
|
OP_EXACT, /* 41 Exactly n matches */
|
||||||
|
|
||||||
|
OP_POSSTAR, /* 42 Possessified star, caseful */
|
||||||
|
OP_POSPLUS, /* 43 Possessified plus, caseful */
|
||||||
|
OP_POSQUERY, /* 44 Posesssified query, caseful */
|
||||||
|
OP_POSUPTO, /* 45 Possessified upto, caseful */
|
||||||
|
|
||||||
|
/* Repeated characters; caseless must follow the caseful ones */
|
||||||
|
|
||||||
|
OP_STARI, /* 46 */
|
||||||
|
OP_MINSTARI, /* 47 */
|
||||||
|
OP_PLUSI, /* 48 */
|
||||||
|
OP_MINPLUSI, /* 49 */
|
||||||
|
OP_QUERYI, /* 50 */
|
||||||
|
OP_MINQUERYI, /* 51 */
|
||||||
|
|
||||||
|
OP_UPTOI, /* 52 From 0 to n matches of one character, caseless */
|
||||||
|
OP_MINUPTOI, /* 53 */
|
||||||
|
OP_EXACTI, /* 54 */
|
||||||
|
|
||||||
|
OP_POSSTARI, /* 55 Possessified star, caseless */
|
||||||
|
OP_POSPLUSI, /* 56 Possessified plus, caseless */
|
||||||
|
OP_POSQUERYI, /* 57 Posesssified query, caseless */
|
||||||
|
OP_POSUPTOI, /* 58 Possessified upto, caseless */
|
||||||
|
|
||||||
|
/* The negated ones must follow the non-negated ones, and match them */
|
||||||
|
/* Negated repeated character, caseful; must precede the caseless ones */
|
||||||
|
|
||||||
|
OP_NOTSTAR, /* 59 The maximizing and minimizing versions of */
|
||||||
|
OP_NOTMINSTAR, /* 60 these six opcodes must come in pairs, with */
|
||||||
|
OP_NOTPLUS, /* 61 the minimizing one second. They must be in */
|
||||||
|
OP_NOTMINPLUS, /* 62 exactly the same order as those above. */
|
||||||
|
OP_NOTQUERY, /* 63 */
|
||||||
|
OP_NOTMINQUERY, /* 64 */
|
||||||
|
|
||||||
|
OP_NOTUPTO, /* 65 From 0 to n matches, caseful */
|
||||||
|
OP_NOTMINUPTO, /* 66 */
|
||||||
|
OP_NOTEXACT, /* 67 Exactly n matches */
|
||||||
|
|
||||||
|
OP_NOTPOSSTAR, /* 68 Possessified versions, caseful */
|
||||||
|
OP_NOTPOSPLUS, /* 69 */
|
||||||
|
OP_NOTPOSQUERY, /* 70 */
|
||||||
|
OP_NOTPOSUPTO, /* 71 */
|
||||||
|
|
||||||
|
/* Negated repeated character, caseless; must follow the caseful ones */
|
||||||
|
|
||||||
|
OP_NOTSTARI, /* 72 */
|
||||||
|
OP_NOTMINSTARI, /* 73 */
|
||||||
|
OP_NOTPLUSI, /* 74 */
|
||||||
|
OP_NOTMINPLUSI, /* 75 */
|
||||||
|
OP_NOTQUERYI, /* 76 */
|
||||||
|
OP_NOTMINQUERYI, /* 77 */
|
||||||
|
|
||||||
|
OP_NOTUPTOI, /* 78 From 0 to n matches, caseless */
|
||||||
|
OP_NOTMINUPTOI, /* 79 */
|
||||||
|
OP_NOTEXACTI, /* 80 Exactly n matches */
|
||||||
|
|
||||||
|
OP_NOTPOSSTARI, /* 81 Possessified versions, caseless */
|
||||||
|
OP_NOTPOSPLUSI, /* 82 */
|
||||||
|
OP_NOTPOSQUERYI, /* 83 */
|
||||||
|
OP_NOTPOSUPTOI, /* 84 */
|
||||||
|
|
||||||
|
/* Character types */
|
||||||
|
|
||||||
|
OP_TYPESTAR, /* 85 The maximizing and minimizing versions of */
|
||||||
|
OP_TYPEMINSTAR, /* 86 these six opcodes must come in pairs, with */
|
||||||
|
OP_TYPEPLUS, /* 87 the minimizing one second. These codes must */
|
||||||
|
OP_TYPEMINPLUS, /* 88 be in exactly the same order as those above. */
|
||||||
|
OP_TYPEQUERY, /* 89 */
|
||||||
|
OP_TYPEMINQUERY, /* 90 */
|
||||||
|
|
||||||
|
OP_TYPEUPTO, /* 91 From 0 to n matches */
|
||||||
|
OP_TYPEMINUPTO, /* 92 */
|
||||||
|
OP_TYPEEXACT, /* 93 Exactly n matches */
|
||||||
|
|
||||||
|
OP_TYPEPOSSTAR, /* 94 Possessified versions */
|
||||||
|
OP_TYPEPOSPLUS, /* 95 */
|
||||||
|
OP_TYPEPOSQUERY, /* 96 */
|
||||||
|
OP_TYPEPOSUPTO, /* 97 */
|
||||||
|
|
||||||
|
/* These are used for character classes and back references; only the
|
||||||
|
first six are the same as the sets above. */
|
||||||
|
|
||||||
|
OP_CRSTAR, /* 98 The maximizing and minimizing versions of */
|
||||||
|
OP_CRMINSTAR, /* 99 all these opcodes must come in pairs, with */
|
||||||
|
OP_CRPLUS, /* 100 the minimizing one second. These codes must */
|
||||||
|
OP_CRMINPLUS, /* 101 be in exactly the same order as those above. */
|
||||||
|
OP_CRQUERY, /* 102 */
|
||||||
|
OP_CRMINQUERY, /* 103 */
|
||||||
|
|
||||||
|
OP_CRRANGE, /* 104 These are different to the three sets above. */
|
||||||
|
OP_CRMINRANGE, /* 105 */
|
||||||
|
|
||||||
|
OP_CRPOSSTAR, /* 106 Possessified versions */
|
||||||
|
OP_CRPOSPLUS, /* 107 */
|
||||||
|
OP_CRPOSQUERY, /* 108 */
|
||||||
|
OP_CRPOSRANGE, /* 109 */
|
||||||
|
|
||||||
|
/* End of quantifier opcodes */
|
||||||
|
|
||||||
|
OP_CLASS, /* 110 Match a character class, chars < 256 only */
|
||||||
|
OP_NCLASS, /* 111 Same, but the bitmap was created from a negative
|
||||||
|
class - the difference is relevant only when a
|
||||||
|
character > 255 is encountered. */
|
||||||
|
OP_XCLASS, /* 112 Extended class for handling > 255 chars within the
|
||||||
|
class. This does both positive and negative. */
|
||||||
|
OP_ECLASS, /* 113 Really-extended class, for handling logical
|
||||||
|
expressions computed over characters. */
|
||||||
|
OP_REF, /* 114 Match a back reference, casefully */
|
||||||
|
OP_REFI, /* 115 Match a back reference, caselessly */
|
||||||
|
OP_DNREF, /* 116 Match a duplicate name backref, casefully */
|
||||||
|
OP_DNREFI, /* 117 Match a duplicate name backref, caselessly */
|
||||||
|
OP_RECURSE, /* 118 Match a numbered subpattern (possibly recursive) */
|
||||||
|
OP_CALLOUT, /* 119 Call out to external function if provided */
|
||||||
|
OP_CALLOUT_STR, /* 120 Call out with string argument */
|
||||||
|
|
||||||
|
OP_ALT, /* 121 Start of alternation */
|
||||||
|
OP_KET, /* 122 End of group that doesn't have an unbounded repeat */
|
||||||
|
OP_KETRMAX, /* 123 These two must remain together and in this */
|
||||||
|
OP_KETRMIN, /* 124 order. They are for groups the repeat for ever. */
|
||||||
|
OP_KETRPOS, /* 125 Possessive unlimited repeat. */
|
||||||
|
|
||||||
|
/* The assertions must come before BRA, CBRA, ONCE, and COND. */
|
||||||
|
|
||||||
|
OP_REVERSE, /* 126 Move pointer back - used in lookbehind assertions */
|
||||||
|
OP_VREVERSE, /* 127 Move pointer back - variable */
|
||||||
|
OP_ASSERT, /* 128 Positive lookahead */
|
||||||
|
OP_ASSERT_NOT, /* 129 Negative lookahead */
|
||||||
|
OP_ASSERTBACK, /* 130 Positive lookbehind */
|
||||||
|
OP_ASSERTBACK_NOT, /* 131 Negative lookbehind */
|
||||||
|
OP_ASSERT_NA, /* 132 Positive non-atomic lookahead */
|
||||||
|
OP_ASSERTBACK_NA, /* 133 Positive non-atomic lookbehind */
|
||||||
|
OP_ASSERT_SCS, /* 134 Scan substring */
|
||||||
|
|
||||||
|
/* ONCE, SCRIPT_RUN, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come
|
||||||
|
immediately after the assertions, with ONCE first, as there's a test for >=
|
||||||
|
ONCE for a subpattern that isn't an assertion. The POS versions must
|
||||||
|
immediately follow the non-POS versions in each case. */
|
||||||
|
|
||||||
|
OP_ONCE, /* 135 Atomic group, contains captures */
|
||||||
|
OP_SCRIPT_RUN, /* 136 Non-capture, but check characters' scripts */
|
||||||
|
OP_BRA, /* 137 Start of non-capturing bracket */
|
||||||
|
OP_BRAPOS, /* 138 Ditto, with unlimited, possessive repeat */
|
||||||
|
OP_CBRA, /* 139 Start of capturing bracket */
|
||||||
|
OP_CBRAPOS, /* 140 Ditto, with unlimited, possessive repeat */
|
||||||
|
OP_COND, /* 141 Conditional group */
|
||||||
|
|
||||||
|
/* These five must follow the previous five, in the same order. There's a
|
||||||
|
check for >= SBRA to distinguish the two sets. */
|
||||||
|
|
||||||
|
OP_SBRA, /* 142 Start of non-capturing bracket, check empty */
|
||||||
|
OP_SBRAPOS, /* 143 Ditto, with unlimited, possessive repeat */
|
||||||
|
OP_SCBRA, /* 144 Start of capturing bracket, check empty */
|
||||||
|
OP_SCBRAPOS, /* 145 Ditto, with unlimited, possessive repeat */
|
||||||
|
OP_SCOND, /* 146 Conditional group, check empty */
|
||||||
|
|
||||||
|
/* The next two pairs must (respectively) be kept together. */
|
||||||
|
|
||||||
|
OP_CREF, /* 147 Used to hold a capture number as condition */
|
||||||
|
OP_DNCREF, /* 148 Used to point to duplicate names as a condition */
|
||||||
|
OP_RREF, /* 149 Used to hold a recursion number as condition */
|
||||||
|
OP_DNRREF, /* 150 Used to point to duplicate names as a condition */
|
||||||
|
OP_FALSE, /* 151 Always false (used by DEFINE and VERSION) */
|
||||||
|
OP_TRUE, /* 152 Always true (used by VERSION) */
|
||||||
|
|
||||||
|
OP_BRAZERO, /* 153 These two must remain together and in this */
|
||||||
|
OP_BRAMINZERO, /* 154 order. */
|
||||||
|
OP_BRAPOSZERO, /* 155 */
|
||||||
|
|
||||||
|
/* These are backtracking control verbs */
|
||||||
|
|
||||||
|
OP_MARK, /* 156 always has an argument */
|
||||||
|
OP_PRUNE, /* 157 */
|
||||||
|
OP_PRUNE_ARG, /* 158 same, but with argument */
|
||||||
|
OP_SKIP, /* 159 */
|
||||||
|
OP_SKIP_ARG, /* 160 same, but with argument */
|
||||||
|
OP_THEN, /* 161 */
|
||||||
|
OP_THEN_ARG, /* 162 same, but with argument */
|
||||||
|
OP_COMMIT, /* 163 */
|
||||||
|
OP_COMMIT_ARG, /* 164 same, but with argument */
|
||||||
|
|
||||||
|
/* These are forced failure and success verbs. FAIL and ACCEPT do accept an
|
||||||
|
argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL)
|
||||||
|
without the need for a special opcode. */
|
||||||
|
|
||||||
|
OP_FAIL, /* 165 */
|
||||||
|
OP_ACCEPT, /* 166 */
|
||||||
|
OP_ASSERT_ACCEPT, /* 167 Used inside assertions */
|
||||||
|
OP_CLOSE, /* 168 Used before OP_ACCEPT to close open captures */
|
||||||
|
|
||||||
|
/* This is used to skip a subpattern with a {0} quantifier */
|
||||||
|
|
||||||
|
OP_SKIPZERO, /* 169 */
|
||||||
|
|
||||||
|
/* This is used to identify a DEFINE group during compilation so that it can
|
||||||
|
be checked for having only one branch. It is changed to OP_FALSE before
|
||||||
|
compilation finishes. */
|
||||||
|
|
||||||
|
OP_DEFINE, /* 170 */
|
||||||
|
|
||||||
|
/* These opcodes replace their normal counterparts in UCP mode when
|
||||||
|
PCRE2_EXTRA_ASCII_BSW is not set. */
|
||||||
|
|
||||||
|
OP_NOT_UCP_WORD_BOUNDARY, /* 171 */
|
||||||
|
OP_UCP_WORD_BOUNDARY, /* 172 */
|
||||||
|
|
||||||
|
/* This is not an opcode, but is used to check that tables indexed by opcode
|
||||||
|
are the correct length, in order to catch updating errors - there have been
|
||||||
|
some in the past. */
|
||||||
|
|
||||||
|
OP_TABLE_LENGTH
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
|
||||||
|
definitions that follow must also be updated to match. There are also tables
|
||||||
|
called "opcode_possessify" in pcre2_compile.c and "coptable" and "poptable" in
|
||||||
|
pcre2_dfa_match.c that must be updated. */
|
||||||
|
|
||||||
|
|
||||||
|
/* This macro defines textual names for all the opcodes. These are used only
|
||||||
|
for debugging, and some of them are only partial names. The macro is referenced
|
||||||
|
only in pcre2_printint.c, which fills out the full names in many cases (and in
|
||||||
|
some cases doesn't actually use these names at all). */
|
||||||
|
|
||||||
|
#define OP_NAME_LIST \
|
||||||
|
"End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d", \
|
||||||
|
"\\S", "\\s", "\\W", "\\w", "Any", "AllAny", "Anybyte", \
|
||||||
|
"notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \
|
||||||
|
"extuni", "\\Z", "\\z", \
|
||||||
|
"$", "$", "^", "^", "char", "chari", "not", "noti", \
|
||||||
|
"*", "*?", "+", "+?", "?", "??", \
|
||||||
|
"{", "{", "{", \
|
||||||
|
"*+","++", "?+", "{", \
|
||||||
|
"*", "*?", "+", "+?", "?", "??", \
|
||||||
|
"{", "{", "{", \
|
||||||
|
"*+","++", "?+", "{", \
|
||||||
|
"*", "*?", "+", "+?", "?", "??", \
|
||||||
|
"{", "{", "{", \
|
||||||
|
"*+","++", "?+", "{", \
|
||||||
|
"*", "*?", "+", "+?", "?", "??", \
|
||||||
|
"{", "{", "{", \
|
||||||
|
"*+","++", "?+", "{", \
|
||||||
|
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
||||||
|
"*+","++", "?+", "{", \
|
||||||
|
"*", "*?", "+", "+?", "?", "??", "{", "{", \
|
||||||
|
"*+","++", "?+", "{", \
|
||||||
|
"class", "nclass", "xclass", "eclass", \
|
||||||
|
"Ref", "Refi", "DnRef", "DnRefi", \
|
||||||
|
"Recurse", "Callout", "CalloutStr", \
|
||||||
|
"Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \
|
||||||
|
"Reverse", "VReverse", "Assert", "Assert not", \
|
||||||
|
"Assert back", "Assert back not", \
|
||||||
|
"Non-atomic assert", "Non-atomic assert back", \
|
||||||
|
"Scan substring", \
|
||||||
|
"Once", \
|
||||||
|
"Script run", \
|
||||||
|
"Bra", "BraPos", "CBra", "CBraPos", \
|
||||||
|
"Cond", \
|
||||||
|
"SBra", "SBraPos", "SCBra", "SCBraPos", \
|
||||||
|
"SCond", \
|
||||||
|
"Capture ref", "Capture dnref", "Cond rec", "Cond dnrec", \
|
||||||
|
"Cond false", "Cond true", \
|
||||||
|
"Brazero", "Braminzero", "Braposzero", \
|
||||||
|
"*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \
|
||||||
|
"*THEN", "*THEN", "*COMMIT", "*COMMIT", "*FAIL", \
|
||||||
|
"*ACCEPT", "*ASSERT_ACCEPT", \
|
||||||
|
"Close", "Skip zero", "Define", "\\B (ucp)", "\\b (ucp)"
|
||||||
|
|
||||||
|
|
||||||
|
/* This macro defines the length of fixed length operations in the compiled
|
||||||
|
regex. The lengths are used when searching for specific things, and also in the
|
||||||
|
debugging printing of a compiled regex. We use a macro so that it can be
|
||||||
|
defined close to the definitions of the opcodes themselves.
|
||||||
|
|
||||||
|
As things have been extended, some of these are no longer fixed lenths, but are
|
||||||
|
minima instead. For example, the length of a single-character repeat may vary
|
||||||
|
in UTF-8 mode. The code that uses this table must know about such things. */
|
||||||
|
|
||||||
|
#define OP_LENGTHS \
|
||||||
|
1, /* End */ \
|
||||||
|
1, 1, 1, 1, 1, /* \A, \G, \K, \B, \b */ \
|
||||||
|
1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ \
|
||||||
|
1, 1, 1, /* Any, AllAny, Anybyte */ \
|
||||||
|
3, 3, /* \P, \p */ \
|
||||||
|
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \
|
||||||
|
1, /* \X */ \
|
||||||
|
1, 1, 1, 1, 1, 1, /* \Z, \z, $, $M ^, ^M */ \
|
||||||
|
2, /* Char - the minimum length */ \
|
||||||
|
2, /* Chari - the minimum length */ \
|
||||||
|
2, /* not */ \
|
||||||
|
2, /* noti */ \
|
||||||
|
/* Positive single-char repeats ** These are */ \
|
||||||
|
2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \
|
||||||
|
2+IMM2_SIZE, 2+IMM2_SIZE, /* upto, minupto ** mode */ \
|
||||||
|
2+IMM2_SIZE, /* exact */ \
|
||||||
|
2, 2, 2, 2+IMM2_SIZE, /* *+, ++, ?+, upto+ */ \
|
||||||
|
2, 2, 2, 2, 2, 2, /* *I, *?I, +I, +?I, ?I, ??I ** UTF-8 */ \
|
||||||
|
2+IMM2_SIZE, 2+IMM2_SIZE, /* upto I, minupto I */ \
|
||||||
|
2+IMM2_SIZE, /* exact I */ \
|
||||||
|
2, 2, 2, 2+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */ \
|
||||||
|
/* Negative single-char repeats - only for chars < 256 */ \
|
||||||
|
2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \
|
||||||
|
2+IMM2_SIZE, 2+IMM2_SIZE, /* NOT upto, minupto */ \
|
||||||
|
2+IMM2_SIZE, /* NOT exact */ \
|
||||||
|
2, 2, 2, 2+IMM2_SIZE, /* Possessive NOT *, +, ?, upto */ \
|
||||||
|
2, 2, 2, 2, 2, 2, /* NOT *I, *?I, +I, +?I, ?I, ??I */ \
|
||||||
|
2+IMM2_SIZE, 2+IMM2_SIZE, /* NOT upto I, minupto I */ \
|
||||||
|
2+IMM2_SIZE, /* NOT exact I */ \
|
||||||
|
2, 2, 2, 2+IMM2_SIZE, /* Possessive NOT *I, +I, ?I, upto I */ \
|
||||||
|
/* Positive type repeats */ \
|
||||||
|
2, 2, 2, 2, 2, 2, /* Type *, *?, +, +?, ?, ?? */ \
|
||||||
|
2+IMM2_SIZE, 2+IMM2_SIZE, /* Type upto, minupto */ \
|
||||||
|
2+IMM2_SIZE, /* Type exact */ \
|
||||||
|
2, 2, 2, 2+IMM2_SIZE, /* Possessive *+, ++, ?+, upto+ */ \
|
||||||
|
/* Character class & ref repeats */ \
|
||||||
|
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \
|
||||||
|
1+2*IMM2_SIZE, 1+2*IMM2_SIZE, /* CRRANGE, CRMINRANGE */ \
|
||||||
|
1, 1, 1, 1+2*IMM2_SIZE, /* Possessive *+, ++, ?+, CRPOSRANGE */ \
|
||||||
|
1+(32/sizeof(PCRE2_UCHAR)), /* CLASS */ \
|
||||||
|
1+(32/sizeof(PCRE2_UCHAR)), /* NCLASS */ \
|
||||||
|
0, /* XCLASS - variable length */ \
|
||||||
|
0, /* ECLASS - variable length */ \
|
||||||
|
1+IMM2_SIZE, /* REF */ \
|
||||||
|
1+IMM2_SIZE+1, /* REFI */ \
|
||||||
|
1+2*IMM2_SIZE, /* DNREF */ \
|
||||||
|
1+2*IMM2_SIZE+1, /* DNREFI */ \
|
||||||
|
1+LINK_SIZE, /* RECURSE */ \
|
||||||
|
1+2*LINK_SIZE+1, /* CALLOUT */ \
|
||||||
|
0, /* CALLOUT_STR - variable length */ \
|
||||||
|
1+LINK_SIZE, /* Alt */ \
|
||||||
|
1+LINK_SIZE, /* Ket */ \
|
||||||
|
1+LINK_SIZE, /* KetRmax */ \
|
||||||
|
1+LINK_SIZE, /* KetRmin */ \
|
||||||
|
1+LINK_SIZE, /* KetRpos */ \
|
||||||
|
1+IMM2_SIZE, /* Reverse */ \
|
||||||
|
1+2*IMM2_SIZE, /* VReverse */ \
|
||||||
|
1+LINK_SIZE, /* Assert */ \
|
||||||
|
1+LINK_SIZE, /* Assert not */ \
|
||||||
|
1+LINK_SIZE, /* Assert behind */ \
|
||||||
|
1+LINK_SIZE, /* Assert behind not */ \
|
||||||
|
1+LINK_SIZE, /* NA Assert */ \
|
||||||
|
1+LINK_SIZE, /* NA Assert behind */ \
|
||||||
|
1+LINK_SIZE, /* Scan substring */ \
|
||||||
|
1+LINK_SIZE, /* ONCE */ \
|
||||||
|
1+LINK_SIZE, /* SCRIPT_RUN */ \
|
||||||
|
1+LINK_SIZE, /* BRA */ \
|
||||||
|
1+LINK_SIZE, /* BRAPOS */ \
|
||||||
|
1+LINK_SIZE+IMM2_SIZE, /* CBRA */ \
|
||||||
|
1+LINK_SIZE+IMM2_SIZE, /* CBRAPOS */ \
|
||||||
|
1+LINK_SIZE, /* COND */ \
|
||||||
|
1+LINK_SIZE, /* SBRA */ \
|
||||||
|
1+LINK_SIZE, /* SBRAPOS */ \
|
||||||
|
1+LINK_SIZE+IMM2_SIZE, /* SCBRA */ \
|
||||||
|
1+LINK_SIZE+IMM2_SIZE, /* SCBRAPOS */ \
|
||||||
|
1+LINK_SIZE, /* SCOND */ \
|
||||||
|
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* CREF, DNCREF */ \
|
||||||
|
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* RREF, DNRREF */ \
|
||||||
|
1, 1, /* FALSE, TRUE */ \
|
||||||
|
1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \
|
||||||
|
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \
|
||||||
|
1, 3, /* SKIP, SKIP_ARG */ \
|
||||||
|
1, 3, /* THEN, THEN_ARG */ \
|
||||||
|
1, 3, /* COMMIT, COMMIT_ARG */ \
|
||||||
|
1, 1, 1, /* FAIL, ACCEPT, ASSERT_ACCEPT */ \
|
||||||
|
1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \
|
||||||
|
1, /* DEFINE */ \
|
||||||
|
1, 1 /* \B and \b in UCP mode */
|
||||||
|
|
||||||
|
/* A magic value for OP_RREF to indicate the "any recursion" condition. */
|
||||||
|
|
||||||
|
#define RREF_ANY 0xffff
|
||||||
|
|
||||||
|
/* Constants used by OP_REFI and OP_DNREFI to control matching behaviour. */
|
||||||
|
|
||||||
|
#define REFI_FLAG_CASELESS_RESTRICT 0x1
|
||||||
|
#define REFI_FLAG_TURKISH_CASING 0x2
|
||||||
|
|
||||||
|
|
||||||
|
/* ---------- Private structures that are mode-independent. ---------- */
|
||||||
|
|
||||||
|
/* Structure to hold data for custom memory management. */
|
||||||
|
|
||||||
|
typedef struct pcre2_memctl {
|
||||||
|
void * (*malloc)(size_t, void *);
|
||||||
|
void (*free)(void *, void *);
|
||||||
|
void *memory_data;
|
||||||
|
} pcre2_memctl;
|
||||||
|
|
||||||
|
/* Structure for building a chain of open capturing subpatterns during
|
||||||
|
compiling, so that instructions to close them can be compiled when (*ACCEPT) is
|
||||||
|
encountered. */
|
||||||
|
|
||||||
|
typedef struct open_capitem {
|
||||||
|
struct open_capitem *next; /* Chain link */
|
||||||
|
uint16_t number; /* Capture number */
|
||||||
|
uint16_t assert_depth; /* Assertion depth when opened */
|
||||||
|
} open_capitem;
|
||||||
|
|
||||||
|
/* Layout of the UCP type table that translates property names into types and
|
||||||
|
codes. Each entry used to point directly to a name, but to reduce the number of
|
||||||
|
relocations in shared libraries, it now has an offset into a single string
|
||||||
|
instead. */
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
uint16_t name_offset;
|
||||||
|
uint16_t type;
|
||||||
|
uint16_t value;
|
||||||
|
} ucp_type_table;
|
||||||
|
|
||||||
|
/* Unicode character database (UCD) record format */
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
uint8_t script; /* ucp_Arabic, etc. */
|
||||||
|
uint8_t chartype; /* ucp_Cc, etc. (general categories) */
|
||||||
|
uint8_t gbprop; /* ucp_gbControl, etc. (grapheme break property) */
|
||||||
|
uint8_t caseset; /* offset to multichar other cases or zero */
|
||||||
|
int32_t other_case; /* offset to other case, or zero if none */
|
||||||
|
uint16_t scriptx_bidiclass; /* script extension (11 bit) and bidi class (5 bit) values */
|
||||||
|
uint16_t bprops; /* binary properties offset */
|
||||||
|
} ucd_record;
|
||||||
|
|
||||||
|
/* UCD access macros */
|
||||||
|
|
||||||
|
#define UCD_BLOCK_SIZE 128
|
||||||
|
#define REAL_GET_UCD(ch) (PRIV(ucd_records) + \
|
||||||
|
PRIV(ucd_stage2)[PRIV(ucd_stage1)[(int)(ch) / UCD_BLOCK_SIZE] * \
|
||||||
|
UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE])
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
#define GET_UCD(ch) ((ch > MAX_UTF_CODE_POINT)? \
|
||||||
|
PRIV(dummy_ucd_record) : REAL_GET_UCD(ch))
|
||||||
|
#else
|
||||||
|
#define GET_UCD(ch) REAL_GET_UCD(ch)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define UCD_SCRIPTX_MASK 0x3ff
|
||||||
|
#define UCD_BIDICLASS_SHIFT 11
|
||||||
|
#define UCD_BPROPS_MASK 0xfff
|
||||||
|
|
||||||
|
#define UCD_SCRIPTX_PROP(prop) ((prop)->scriptx_bidiclass & UCD_SCRIPTX_MASK)
|
||||||
|
#define UCD_BIDICLASS_PROP(prop) ((prop)->scriptx_bidiclass >> UCD_BIDICLASS_SHIFT)
|
||||||
|
#define UCD_BPROPS_PROP(prop) ((prop)->bprops & UCD_BPROPS_MASK)
|
||||||
|
|
||||||
|
#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
|
||||||
|
#define UCD_SCRIPT(ch) GET_UCD(ch)->script
|
||||||
|
#define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
|
||||||
|
#define UCD_GRAPHBREAK(ch) GET_UCD(ch)->gbprop
|
||||||
|
#define UCD_CASESET(ch) GET_UCD(ch)->caseset
|
||||||
|
#define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case)))
|
||||||
|
#define UCD_SCRIPTX(ch) UCD_SCRIPTX_PROP(GET_UCD(ch))
|
||||||
|
#define UCD_BPROPS(ch) UCD_BPROPS_PROP(GET_UCD(ch))
|
||||||
|
#define UCD_BIDICLASS(ch) UCD_BIDICLASS_PROP(GET_UCD(ch))
|
||||||
|
#define UCD_ANY_I(ch) \
|
||||||
|
/* match any of the four characters 'i', 'I', U+0130, U+0131 */ \
|
||||||
|
(((uint32_t)(ch) | 0x20u) == 0x69u || ((uint32_t)(ch) | 1u) == 0x0131u)
|
||||||
|
#define UCD_DOTTED_I(ch) \
|
||||||
|
((uint32_t)(ch) == 0x69u || (uint32_t)(ch) == 0x0130u)
|
||||||
|
#define UCD_FOLD_I_TURKISH(ch) \
|
||||||
|
((uint32_t)(ch) == 0x0130u ? 0x69u : \
|
||||||
|
(uint32_t)(ch) == 0x49u ? 0x0131u : (uint32_t)(ch))
|
||||||
|
|
||||||
|
/* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words
|
||||||
|
that form a bitmap representing a list of scripts or boolean properties. These
|
||||||
|
macros test or set a bit in the map by number. */
|
||||||
|
|
||||||
|
#define MAPBIT(map,n) ((map)[(n)/32]&(1u<<((n)%32)))
|
||||||
|
#define MAPSET(map,n) ((map)[(n)/32]|=(1u<<((n)%32)))
|
||||||
|
|
||||||
|
/* Header for serialized pcre2 codes. */
|
||||||
|
|
||||||
|
typedef struct pcre2_serialized_data {
|
||||||
|
uint32_t magic;
|
||||||
|
uint32_t version;
|
||||||
|
uint32_t config;
|
||||||
|
int32_t number_of_codes;
|
||||||
|
} pcre2_serialized_data;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* ----------------- Items that need PCRE2_CODE_UNIT_WIDTH ----------------- */
|
||||||
|
|
||||||
|
/* When this file is included by pcre2test, PCRE2_CODE_UNIT_WIDTH is defined as
|
||||||
|
0, so the following items are omitted. */
|
||||||
|
|
||||||
|
#if defined PCRE2_CODE_UNIT_WIDTH && PCRE2_CODE_UNIT_WIDTH != 0
|
||||||
|
|
||||||
|
/* EBCDIC is supported only for the 8-bit library. */
|
||||||
|
|
||||||
|
#if defined EBCDIC && PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
|
#error EBCDIC is not supported for the 16-bit or 32-bit libraries
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* This is the largest non-UTF code point. */
|
||||||
|
|
||||||
|
#define MAX_NON_UTF_CHAR (0xffffffffU >> (32 - PCRE2_CODE_UNIT_WIDTH))
|
||||||
|
|
||||||
|
/* Internal shared data tables and variables. These are used by more than one
|
||||||
|
of the exported public functions. They have to be "external" in the C sense,
|
||||||
|
but are not part of the PCRE2 public API. Although the data for some of them is
|
||||||
|
identical in all libraries, they must have different names so that multiple
|
||||||
|
libraries can be simultaneously linked to a single application. However, UTF-8
|
||||||
|
tables are needed only when compiling the 8-bit library. */
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
extern const int PRIV(utf8_table1)[];
|
||||||
|
extern const int PRIV(utf8_table1_size);
|
||||||
|
extern const int PRIV(utf8_table2)[];
|
||||||
|
extern const int PRIV(utf8_table3)[];
|
||||||
|
extern const uint8_t PRIV(utf8_table4)[];
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define _pcre2_OP_lengths PCRE2_SUFFIX(_pcre2_OP_lengths_)
|
||||||
|
#define _pcre2_callout_end_delims PCRE2_SUFFIX(_pcre2_callout_end_delims_)
|
||||||
|
#define _pcre2_callout_start_delims PCRE2_SUFFIX(_pcre2_callout_start_delims_)
|
||||||
|
#define _pcre2_default_compile_context PCRE2_SUFFIX(_pcre2_default_compile_context_)
|
||||||
|
#define _pcre2_default_convert_context PCRE2_SUFFIX(_pcre2_default_convert_context_)
|
||||||
|
#define _pcre2_default_match_context PCRE2_SUFFIX(_pcre2_default_match_context_)
|
||||||
|
#define _pcre2_default_tables PCRE2_SUFFIX(_pcre2_default_tables_)
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
#define _pcre2_dummy_ucd_record PCRE2_SUFFIX(_pcre2_dummy_ucd_record_)
|
||||||
|
#endif
|
||||||
|
#define _pcre2_hspace_list PCRE2_SUFFIX(_pcre2_hspace_list_)
|
||||||
|
#define _pcre2_vspace_list PCRE2_SUFFIX(_pcre2_vspace_list_)
|
||||||
|
#define _pcre2_ucd_boolprop_sets PCRE2_SUFFIX(_pcre2_ucd_boolprop_sets_)
|
||||||
|
#define _pcre2_ucd_caseless_sets PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_)
|
||||||
|
#define _pcre2_ucd_turkish_dotted_i_caseset PCRE2_SUFFIX(_pcre2_ucd_turkish_dotted_i_caseset_)
|
||||||
|
#define _pcre2_ucd_nocase_ranges PCRE2_SUFFIX(_pcre2_ucd_nocase_ranges_)
|
||||||
|
#define _pcre2_ucd_nocase_ranges_size PCRE2_SUFFIX(_pcre2_ucd_nocase_ranges_size_)
|
||||||
|
#define _pcre2_ucd_digit_sets PCRE2_SUFFIX(_pcre2_ucd_digit_sets_)
|
||||||
|
#define _pcre2_ucd_script_sets PCRE2_SUFFIX(_pcre2_ucd_script_sets_)
|
||||||
|
#define _pcre2_ucd_records PCRE2_SUFFIX(_pcre2_ucd_records_)
|
||||||
|
#define _pcre2_ucd_stage1 PCRE2_SUFFIX(_pcre2_ucd_stage1_)
|
||||||
|
#define _pcre2_ucd_stage2 PCRE2_SUFFIX(_pcre2_ucd_stage2_)
|
||||||
|
#define _pcre2_ucp_gbtable PCRE2_SUFFIX(_pcre2_ucp_gbtable_)
|
||||||
|
#define _pcre2_ucp_gentype PCRE2_SUFFIX(_pcre2_ucp_gentype_)
|
||||||
|
#define _pcre2_ucp_typerange PCRE2_SUFFIX(_pcre2_ucp_typerange_)
|
||||||
|
#define _pcre2_unicode_version PCRE2_SUFFIX(_pcre2_unicode_version_)
|
||||||
|
#define _pcre2_utt PCRE2_SUFFIX(_pcre2_utt_)
|
||||||
|
#define _pcre2_utt_names PCRE2_SUFFIX(_pcre2_utt_names_)
|
||||||
|
#define _pcre2_utt_size PCRE2_SUFFIX(_pcre2_utt_size_)
|
||||||
|
|
||||||
|
extern const uint8_t PRIV(OP_lengths)[];
|
||||||
|
extern const uint32_t PRIV(callout_end_delims)[];
|
||||||
|
extern const uint32_t PRIV(callout_start_delims)[];
|
||||||
|
extern pcre2_compile_context PRIV(default_compile_context);
|
||||||
|
extern pcre2_convert_context PRIV(default_convert_context);
|
||||||
|
extern pcre2_match_context PRIV(default_match_context);
|
||||||
|
extern const uint8_t PRIV(default_tables)[];
|
||||||
|
extern const uint32_t PRIV(hspace_list)[];
|
||||||
|
extern const uint32_t PRIV(vspace_list)[];
|
||||||
|
extern const uint32_t PRIV(ucd_boolprop_sets)[];
|
||||||
|
extern const uint32_t PRIV(ucd_caseless_sets)[];
|
||||||
|
extern const uint32_t PRIV(ucd_turkish_dotted_i_caseset);
|
||||||
|
extern const uint32_t PRIV(ucd_nocase_ranges)[];
|
||||||
|
extern const uint32_t PRIV(ucd_nocase_ranges_size);
|
||||||
|
extern const uint32_t PRIV(ucd_digit_sets)[];
|
||||||
|
extern const uint32_t PRIV(ucd_script_sets)[];
|
||||||
|
extern const ucd_record PRIV(ucd_records)[];
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
extern const ucd_record PRIV(dummy_ucd_record)[];
|
||||||
|
#endif
|
||||||
|
extern const uint16_t PRIV(ucd_stage1)[];
|
||||||
|
extern const uint16_t PRIV(ucd_stage2)[];
|
||||||
|
extern const uint32_t PRIV(ucp_gbtable)[];
|
||||||
|
extern const uint32_t PRIV(ucp_gentype)[];
|
||||||
|
#ifdef SUPPORT_JIT
|
||||||
|
extern const int PRIV(ucp_typerange)[];
|
||||||
|
#endif
|
||||||
|
extern const char *PRIV(unicode_version);
|
||||||
|
extern const ucp_type_table PRIV(utt)[];
|
||||||
|
extern const char PRIV(utt_names)[];
|
||||||
|
extern const size_t PRIV(utt_size);
|
||||||
|
|
||||||
|
/* Mode-dependent macros and hidden and private structures are defined in a
|
||||||
|
separate file so that pcre2test can include them at all supported widths. When
|
||||||
|
compiling the library, PCRE2_CODE_UNIT_WIDTH will be defined, and we can
|
||||||
|
include them at the appropriate width, after setting up suffix macros for the
|
||||||
|
private structures. */
|
||||||
|
|
||||||
|
#define branch_chain PCRE2_SUFFIX(branch_chain_)
|
||||||
|
#define compile_block PCRE2_SUFFIX(compile_block_)
|
||||||
|
#define dfa_match_block PCRE2_SUFFIX(dfa_match_block_)
|
||||||
|
#define match_block PCRE2_SUFFIX(match_block_)
|
||||||
|
#define named_group PCRE2_SUFFIX(named_group_)
|
||||||
|
|
||||||
|
#include "pcre2_intmodedep.h"
|
||||||
|
|
||||||
|
/* Private "external" functions. These are internal functions that are called
|
||||||
|
from modules other than the one in which they are defined. They have to be
|
||||||
|
"external" in the C sense, but are not part of the PCRE2 public API. They are
|
||||||
|
not referenced from pcre2test, and must not be defined when no code unit width
|
||||||
|
is available. */
|
||||||
|
|
||||||
|
#define _pcre2_auto_possessify PCRE2_SUFFIX(_pcre2_auto_possessify_)
|
||||||
|
#define _pcre2_check_escape PCRE2_SUFFIX(_pcre2_check_escape_)
|
||||||
|
#define _pcre2_extuni PCRE2_SUFFIX(_pcre2_extuni_)
|
||||||
|
#define _pcre2_find_bracket PCRE2_SUFFIX(_pcre2_find_bracket_)
|
||||||
|
#define _pcre2_is_newline PCRE2_SUFFIX(_pcre2_is_newline_)
|
||||||
|
#define _pcre2_jit_free_rodata PCRE2_SUFFIX(_pcre2_jit_free_rodata_)
|
||||||
|
#define _pcre2_jit_free PCRE2_SUFFIX(_pcre2_jit_free_)
|
||||||
|
#define _pcre2_jit_get_size PCRE2_SUFFIX(_pcre2_jit_get_size_)
|
||||||
|
#define _pcre2_jit_get_target PCRE2_SUFFIX(_pcre2_jit_get_target_)
|
||||||
|
#define _pcre2_memctl_malloc PCRE2_SUFFIX(_pcre2_memctl_malloc_)
|
||||||
|
#define _pcre2_ord2utf PCRE2_SUFFIX(_pcre2_ord2utf_)
|
||||||
|
#define _pcre2_script_run PCRE2_SUFFIX(_pcre2_script_run_)
|
||||||
|
#define _pcre2_strcmp PCRE2_SUFFIX(_pcre2_strcmp_)
|
||||||
|
#define _pcre2_strcmp_c8 PCRE2_SUFFIX(_pcre2_strcmp_c8_)
|
||||||
|
#define _pcre2_strcpy_c8 PCRE2_SUFFIX(_pcre2_strcpy_c8_)
|
||||||
|
#define _pcre2_strlen PCRE2_SUFFIX(_pcre2_strlen_)
|
||||||
|
#define _pcre2_strncmp PCRE2_SUFFIX(_pcre2_strncmp_)
|
||||||
|
#define _pcre2_strncmp_c8 PCRE2_SUFFIX(_pcre2_strncmp_c8_)
|
||||||
|
#define _pcre2_study PCRE2_SUFFIX(_pcre2_study_)
|
||||||
|
#define _pcre2_valid_utf PCRE2_SUFFIX(_pcre2_valid_utf_)
|
||||||
|
#define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_)
|
||||||
|
#define _pcre2_xclass PCRE2_SUFFIX(_pcre2_xclass_)
|
||||||
|
#define _pcre2_eclass PCRE2_SUFFIX(_pcre2_eclass_)
|
||||||
|
|
||||||
|
extern int _pcre2_auto_possessify(PCRE2_UCHAR *,
|
||||||
|
const compile_block *);
|
||||||
|
extern int _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *,
|
||||||
|
int *, uint32_t, uint32_t, uint32_t, BOOL, compile_block *);
|
||||||
|
extern PCRE2_SPTR _pcre2_extuni(uint32_t, PCRE2_SPTR, PCRE2_SPTR, PCRE2_SPTR,
|
||||||
|
BOOL, int *);
|
||||||
|
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
|
||||||
|
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
|
||||||
|
uint32_t *, BOOL);
|
||||||
|
extern void _pcre2_jit_free_rodata(void *, void *);
|
||||||
|
extern void _pcre2_jit_free(void *, pcre2_memctl *);
|
||||||
|
extern size_t _pcre2_jit_get_size(void *);
|
||||||
|
const char * _pcre2_jit_get_target(void);
|
||||||
|
extern void * _pcre2_memctl_malloc(size_t, pcre2_memctl *);
|
||||||
|
extern unsigned int _pcre2_ord2utf(uint32_t, PCRE2_UCHAR *);
|
||||||
|
extern BOOL _pcre2_script_run(PCRE2_SPTR, PCRE2_SPTR, BOOL);
|
||||||
|
extern int _pcre2_strcmp(PCRE2_SPTR, PCRE2_SPTR);
|
||||||
|
extern int _pcre2_strcmp_c8(PCRE2_SPTR, const char *);
|
||||||
|
extern PCRE2_SIZE _pcre2_strcpy_c8(PCRE2_UCHAR *, const char *);
|
||||||
|
extern PCRE2_SIZE _pcre2_strlen(PCRE2_SPTR);
|
||||||
|
extern int _pcre2_strncmp(PCRE2_SPTR, PCRE2_SPTR, size_t);
|
||||||
|
extern int _pcre2_strncmp_c8(PCRE2_SPTR, const char *, size_t);
|
||||||
|
extern int _pcre2_study(pcre2_real_code *);
|
||||||
|
extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *);
|
||||||
|
extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
|
||||||
|
uint32_t *, BOOL);
|
||||||
|
extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, const uint8_t *, BOOL);
|
||||||
|
extern BOOL _pcre2_eclass(uint32_t, PCRE2_SPTR, PCRE2_SPTR,
|
||||||
|
const uint8_t *, BOOL);
|
||||||
|
|
||||||
|
/* This function is needed only when memmove() is not available. */
|
||||||
|
|
||||||
|
#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
|
||||||
|
#define _pcre2_memmove PCRE2_SUFFIX(_pcre2_memmove)
|
||||||
|
extern void * _pcre2_memmove(void *, const void *, size_t);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH */
|
||||||
|
|
||||||
|
extern BOOL PRIV(ckd_smul)(PCRE2_SIZE *, int, int);
|
||||||
|
|
||||||
|
#include "pcre2_util.h"
|
||||||
|
|
||||||
|
#endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */
|
||||||
|
|
||||||
|
/* End of pcre2_internal.h */
|
||||||
973
3rd/pcre2/src/pcre2_intmodedep.h
Normal file
973
3rd/pcre2/src/pcre2_intmodedep.h
Normal file
@@ -0,0 +1,973 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains mode-dependent macro and structure definitions. The
|
||||||
|
file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined.
|
||||||
|
These mode-dependent items are kept in a separate file so that they can also be
|
||||||
|
#included multiple times for different code unit widths by pcre2test in order
|
||||||
|
to have access to the hidden structures at all supported widths.
|
||||||
|
|
||||||
|
Some of the mode-dependent macros are required at different widths for
|
||||||
|
different parts of the pcre2test code (in particular, the included
|
||||||
|
pcre2_printint.c file). We undefine them here so that they can be re-defined for
|
||||||
|
multiple inclusions. Not all of these are used in pcre2test, but it's easier
|
||||||
|
just to undefine them all. */
|
||||||
|
|
||||||
|
#undef ACROSSCHAR
|
||||||
|
#undef BACKCHAR
|
||||||
|
#undef BYTES2CU
|
||||||
|
#undef CHMAX_255
|
||||||
|
#undef CU2BYTES
|
||||||
|
#undef FORWARDCHAR
|
||||||
|
#undef FORWARDCHARTEST
|
||||||
|
#undef GET
|
||||||
|
#undef GET2
|
||||||
|
#undef GETCHAR
|
||||||
|
#undef GETCHARINC
|
||||||
|
#undef GETCHARINCTEST
|
||||||
|
#undef GETCHARLEN
|
||||||
|
#undef GETCHARLENTEST
|
||||||
|
#undef GETCHARTEST
|
||||||
|
#undef GET_EXTRALEN
|
||||||
|
#undef HAS_EXTRALEN
|
||||||
|
#undef IMM2_SIZE
|
||||||
|
#undef MAX_255
|
||||||
|
#undef MAX_MARK
|
||||||
|
#undef MAX_PATTERN_SIZE
|
||||||
|
#undef MAX_UTF_SINGLE_CU
|
||||||
|
#undef NOT_FIRSTCU
|
||||||
|
#undef PUT
|
||||||
|
#undef PUT2
|
||||||
|
#undef PUT2INC
|
||||||
|
#undef PUTCHAR
|
||||||
|
#undef PUTINC
|
||||||
|
#undef TABLE_GET
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* -------------------------- MACROS ----------------------------- */
|
||||||
|
|
||||||
|
/* PCRE keeps offsets in its compiled code as at least 16-bit quantities
|
||||||
|
(always stored in big-endian order in 8-bit mode) by default. These are used,
|
||||||
|
for example, to link from the start of a subpattern to its alternatives and its
|
||||||
|
end. The use of 16 bits per offset limits the size of an 8-bit compiled regex
|
||||||
|
to around 64K, which is big enough for almost everybody. However, I received a
|
||||||
|
request for an even bigger limit. For this reason, and also to make the code
|
||||||
|
easier to maintain, the storing and loading of offsets from the compiled code
|
||||||
|
unit string is now handled by the macros that are defined here.
|
||||||
|
|
||||||
|
The macros are controlled by the value of LINK_SIZE. This defaults to 2, but
|
||||||
|
values of 3 or 4 are also supported. */
|
||||||
|
|
||||||
|
/* ------------------- 8-bit support ------------------ */
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
|
||||||
|
#if LINK_SIZE == 2
|
||||||
|
#define PUT(a,n,d) \
|
||||||
|
(a[n] = (PCRE2_UCHAR)((d) >> 8)), \
|
||||||
|
(a[(n)+1] = (PCRE2_UCHAR)((d) & 255))
|
||||||
|
#define GET(a,n) \
|
||||||
|
(unsigned int)(((a)[n] << 8) | (a)[(n)+1])
|
||||||
|
#define MAX_PATTERN_SIZE (1 << 16)
|
||||||
|
|
||||||
|
#elif LINK_SIZE == 3
|
||||||
|
#define PUT(a,n,d) \
|
||||||
|
(a[n] = (PCRE2_UCHAR)((d) >> 16)), \
|
||||||
|
(a[(n)+1] = (PCRE2_UCHAR)((d) >> 8)), \
|
||||||
|
(a[(n)+2] = (PCRE2_UCHAR)((d) & 255))
|
||||||
|
#define GET(a,n) \
|
||||||
|
(unsigned int)(((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2])
|
||||||
|
#define MAX_PATTERN_SIZE (1 << 24)
|
||||||
|
|
||||||
|
#elif LINK_SIZE == 4
|
||||||
|
#define PUT(a,n,d) \
|
||||||
|
(a[n] = (PCRE2_UCHAR)((d) >> 24)), \
|
||||||
|
(a[(n)+1] = (PCRE2_UCHAR)((d) >> 16)), \
|
||||||
|
(a[(n)+2] = (PCRE2_UCHAR)((d) >> 8)), \
|
||||||
|
(a[(n)+3] = (PCRE2_UCHAR)((d) & 255))
|
||||||
|
#define GET(a,n) \
|
||||||
|
(unsigned int)(((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3])
|
||||||
|
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
|
||||||
|
|
||||||
|
#else
|
||||||
|
#error LINK_SIZE must be 2, 3, or 4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/* ------------------- 16-bit support ------------------ */
|
||||||
|
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
|
||||||
|
#if LINK_SIZE == 2
|
||||||
|
#undef LINK_SIZE
|
||||||
|
#define LINK_SIZE 1
|
||||||
|
#define PUT(a,n,d) \
|
||||||
|
(a[n] = (PCRE2_UCHAR)(d))
|
||||||
|
#define GET(a,n) \
|
||||||
|
(a[n])
|
||||||
|
#define MAX_PATTERN_SIZE (1 << 16)
|
||||||
|
|
||||||
|
#elif LINK_SIZE == 3 || LINK_SIZE == 4
|
||||||
|
#undef LINK_SIZE
|
||||||
|
#define LINK_SIZE 2
|
||||||
|
#define PUT(a,n,d) \
|
||||||
|
(a[n] = (PCRE2_UCHAR)((d) >> 16)), \
|
||||||
|
(a[(n)+1] = (PCRE2_UCHAR)((d) & 65535))
|
||||||
|
#define GET(a,n) \
|
||||||
|
(unsigned int)(((a)[n] << 16) | (a)[(n)+1])
|
||||||
|
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
|
||||||
|
|
||||||
|
#else
|
||||||
|
#error LINK_SIZE must be 2, 3, or 4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/* ------------------- 32-bit support ------------------ */
|
||||||
|
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
#undef LINK_SIZE
|
||||||
|
#define LINK_SIZE 1
|
||||||
|
#define PUT(a,n,d) \
|
||||||
|
(a[n] = (d))
|
||||||
|
#define GET(a,n) \
|
||||||
|
(a[n])
|
||||||
|
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
|
||||||
|
|
||||||
|
#else
|
||||||
|
#error Unsupported compiling mode
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/* --------------- Other mode-specific macros ----------------- */
|
||||||
|
|
||||||
|
/* PCRE uses some other (at least) 16-bit quantities that do not change when
|
||||||
|
the size of offsets changes. There are used for repeat counts and for other
|
||||||
|
things such as capturing parenthesis numbers in back references.
|
||||||
|
|
||||||
|
Define the number of code units required to hold a 16-bit count/offset, and
|
||||||
|
macros to load and store such a value. For reasons that I do not understand,
|
||||||
|
the expression in the 8-bit GET2 macro is treated by gcc as a signed
|
||||||
|
expression, even when a is declared as unsigned. It seems that any kind of
|
||||||
|
arithmetic results in a signed value. Hence the cast. */
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
#define IMM2_SIZE 2
|
||||||
|
#define GET2(a,n) (unsigned int)(((a)[n] << 8) | (a)[(n)+1])
|
||||||
|
#define PUT2(a,n,d) a[n] = (d) >> 8, a[(n)+1] = (d) & 255
|
||||||
|
|
||||||
|
#else /* Code units are 16 or 32 bits */
|
||||||
|
#define IMM2_SIZE 1
|
||||||
|
#define GET2(a,n) a[n]
|
||||||
|
#define PUT2(a,n,d) a[n] = d
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
|
||||||
|
whether its argument, which is assumed to be one code unit, is less than 256.
|
||||||
|
The CHMAX_255 macro does not assume one code unit. The maximum length of a MARK
|
||||||
|
name must fit in one code unit; currently it is set to 255 or 65535. The
|
||||||
|
TABLE_GET macro is used to access elements of tables containing exactly 256
|
||||||
|
items. Its argument is a code unit. When code points can be greater than 255, a
|
||||||
|
check is needed before accessing these tables. */
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
#define MAX_255(c) TRUE
|
||||||
|
#define MAX_MARK ((1u << 8) - 1)
|
||||||
|
#define TABLE_GET(c, table, default) ((table)[c])
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
#define SUPPORT_WIDE_CHARS
|
||||||
|
#define CHMAX_255(c) ((c) <= 255u)
|
||||||
|
#else
|
||||||
|
#define CHMAX_255(c) TRUE
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
#else /* Code units are 16 or 32 bits */
|
||||||
|
#define CHMAX_255(c) ((c) <= 255u)
|
||||||
|
#define MAX_255(c) ((c) <= 255u)
|
||||||
|
#define MAX_MARK ((1u << 16) - 1)
|
||||||
|
#define SUPPORT_WIDE_CHARS
|
||||||
|
#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/* ----------------- Character-handling macros ----------------- */
|
||||||
|
|
||||||
|
/* There is a proposed future special "UTF-21" mode, in which only the lowest
|
||||||
|
21 bits of a 32-bit character are interpreted as UTF, with the remaining 11
|
||||||
|
high-order bits available to the application for other uses. In preparation for
|
||||||
|
the future implementation of this mode, there are macros that load a data item
|
||||||
|
and, if in this special mode, mask it to 21 bits. These macros all have names
|
||||||
|
starting with UCHAR21. In all other modes, including the normal 32-bit
|
||||||
|
library, the macros all have the same simple definitions. When the new mode is
|
||||||
|
implemented, it is expected that these definitions will be varied appropriately
|
||||||
|
using #ifdef when compiling the library that supports the special mode. */
|
||||||
|
|
||||||
|
#define UCHAR21(eptr) (*(eptr))
|
||||||
|
#define UCHAR21TEST(eptr) (*(eptr))
|
||||||
|
#define UCHAR21INC(eptr) (*(eptr)++)
|
||||||
|
#define UCHAR21INCTEST(eptr) (*(eptr)++)
|
||||||
|
|
||||||
|
/* When UTF encoding is being used, a character is no longer just a single
|
||||||
|
byte in 8-bit mode or a single short in 16-bit mode. The macros for character
|
||||||
|
handling generate simple sequences when used in the basic mode, and more
|
||||||
|
complicated ones for UTF characters. GETCHARLENTEST and other macros are not
|
||||||
|
used when UTF is not supported. To make sure they can never even appear when
|
||||||
|
UTF support is omitted, we don't even define them. */
|
||||||
|
|
||||||
|
#ifndef SUPPORT_UNICODE
|
||||||
|
|
||||||
|
/* #define MAX_UTF_SINGLE_CU */
|
||||||
|
/* #define HAS_EXTRALEN(c) */
|
||||||
|
/* #define GET_EXTRALEN(c) */
|
||||||
|
/* #define NOT_FIRSTCU(c) */
|
||||||
|
#define GETCHAR(c, eptr) c = *eptr;
|
||||||
|
#define GETCHARTEST(c, eptr) c = *eptr;
|
||||||
|
#define GETCHARINC(c, eptr) c = *eptr++;
|
||||||
|
#define GETCHARINCTEST(c, eptr) c = *eptr++;
|
||||||
|
#define GETCHARLEN(c, eptr, len) c = *eptr;
|
||||||
|
#define PUTCHAR(c, p) (*p = c, 1)
|
||||||
|
/* #define GETCHARLENTEST(c, eptr, len) */
|
||||||
|
/* #define BACKCHAR(eptr) */
|
||||||
|
/* #define FORWARDCHAR(eptr) */
|
||||||
|
/* #define FORWARCCHARTEST(eptr,end) */
|
||||||
|
/* #define ACROSSCHAR(condition, eptr, action) */
|
||||||
|
|
||||||
|
#else /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
/* ------------------- 8-bit support ------------------ */
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
|
||||||
|
|
||||||
|
/* The largest UTF code point that can be encoded as a single code unit. */
|
||||||
|
|
||||||
|
#define MAX_UTF_SINGLE_CU 127
|
||||||
|
|
||||||
|
/* Tests whether the code point needs extra characters to decode. */
|
||||||
|
|
||||||
|
#define HAS_EXTRALEN(c) HASUTF8EXTRALEN(c)
|
||||||
|
|
||||||
|
/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
|
||||||
|
Otherwise it has an undefined behaviour. */
|
||||||
|
|
||||||
|
#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3fu])
|
||||||
|
|
||||||
|
/* Returns TRUE, if the given value is not the first code unit of a UTF
|
||||||
|
sequence. */
|
||||||
|
|
||||||
|
#define NOT_FIRSTCU(c) (((c) & 0xc0u) == 0x80u)
|
||||||
|
|
||||||
|
/* Get the next UTF-8 character, not advancing the pointer. This is called when
|
||||||
|
we know we are in UTF-8 mode. */
|
||||||
|
|
||||||
|
#define GETCHAR(c, eptr) \
|
||||||
|
c = *eptr; \
|
||||||
|
if (c >= 0xc0u) GETUTF8(c, eptr);
|
||||||
|
|
||||||
|
/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
|
||||||
|
pointer. */
|
||||||
|
|
||||||
|
#define GETCHARTEST(c, eptr) \
|
||||||
|
c = *eptr; \
|
||||||
|
if (utf && c >= 0xc0u) GETUTF8(c, eptr);
|
||||||
|
|
||||||
|
/* Get the next UTF-8 character, advancing the pointer. This is called when we
|
||||||
|
know we are in UTF-8 mode. */
|
||||||
|
|
||||||
|
#define GETCHARINC(c, eptr) \
|
||||||
|
c = *eptr++; \
|
||||||
|
if (c >= 0xc0u) GETUTF8INC(c, eptr);
|
||||||
|
|
||||||
|
/* Get the next character, testing for UTF-8 mode, and advancing the pointer.
|
||||||
|
This is called when we don't know if we are in UTF-8 mode. */
|
||||||
|
|
||||||
|
#define GETCHARINCTEST(c, eptr) \
|
||||||
|
c = *eptr++; \
|
||||||
|
if (utf && c >= 0xc0u) GETUTF8INC(c, eptr);
|
||||||
|
|
||||||
|
/* Get the next UTF-8 character, not advancing the pointer, incrementing length
|
||||||
|
if there are extra bytes. This is called when we know we are in UTF-8 mode. */
|
||||||
|
|
||||||
|
#define GETCHARLEN(c, eptr, len) \
|
||||||
|
c = *eptr; \
|
||||||
|
if (c >= 0xc0u) GETUTF8LEN(c, eptr, len);
|
||||||
|
|
||||||
|
/* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the
|
||||||
|
pointer, incrementing length if there are extra bytes. This is called when we
|
||||||
|
do not know if we are in UTF-8 mode. */
|
||||||
|
|
||||||
|
#define GETCHARLENTEST(c, eptr, len) \
|
||||||
|
c = *eptr; \
|
||||||
|
if (utf && c >= 0xc0u) GETUTF8LEN(c, eptr, len);
|
||||||
|
|
||||||
|
/* If the pointer is not at the start of a character, move it back until
|
||||||
|
it is. This is called only in UTF-8 mode - we don't put a test within the macro
|
||||||
|
because almost all calls are already within a block of UTF-8 only code. */
|
||||||
|
|
||||||
|
#define BACKCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr--
|
||||||
|
|
||||||
|
/* Same as above, just in the other direction. */
|
||||||
|
#define FORWARDCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr++
|
||||||
|
#define FORWARDCHARTEST(eptr,end) while(eptr < end && (*eptr & 0xc0u) == 0x80u) eptr++
|
||||||
|
|
||||||
|
/* Same as above, but it allows a fully customizable form. */
|
||||||
|
#define ACROSSCHAR(condition, eptr, action) \
|
||||||
|
while((condition) && ((*eptr) & 0xc0u) == 0x80u) action
|
||||||
|
|
||||||
|
/* Deposit a character into memory, returning the number of code units. */
|
||||||
|
|
||||||
|
#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
|
||||||
|
PRIV(ord2utf)(c,p) : (*p = c, 1))
|
||||||
|
|
||||||
|
|
||||||
|
/* ------------------- 16-bit support ------------------ */
|
||||||
|
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
|
||||||
|
|
||||||
|
/* The largest UTF code point that can be encoded as a single code unit. */
|
||||||
|
|
||||||
|
#define MAX_UTF_SINGLE_CU 65535
|
||||||
|
|
||||||
|
/* Tests whether the code point needs extra characters to decode. */
|
||||||
|
|
||||||
|
#define HAS_EXTRALEN(c) (((c) & 0xfc00u) == 0xd800u)
|
||||||
|
|
||||||
|
/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
|
||||||
|
Otherwise it has an undefined behaviour. */
|
||||||
|
|
||||||
|
#define GET_EXTRALEN(c) 1
|
||||||
|
|
||||||
|
/* Returns TRUE, if the given value is not the first code unit of a UTF
|
||||||
|
sequence. */
|
||||||
|
|
||||||
|
#define NOT_FIRSTCU(c) (((c) & 0xfc00u) == 0xdc00u)
|
||||||
|
|
||||||
|
/* Base macro to pick up the low surrogate of a UTF-16 character, not
|
||||||
|
advancing the pointer. */
|
||||||
|
|
||||||
|
#define GETUTF16(c, eptr) \
|
||||||
|
{ c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; }
|
||||||
|
|
||||||
|
/* Get the next UTF-16 character, not advancing the pointer. This is called when
|
||||||
|
we know we are in UTF-16 mode. */
|
||||||
|
|
||||||
|
#define GETCHAR(c, eptr) \
|
||||||
|
c = *eptr; \
|
||||||
|
if ((c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr);
|
||||||
|
|
||||||
|
/* Get the next UTF-16 character, testing for UTF-16 mode, and not advancing the
|
||||||
|
pointer. */
|
||||||
|
|
||||||
|
#define GETCHARTEST(c, eptr) \
|
||||||
|
c = *eptr; \
|
||||||
|
if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr);
|
||||||
|
|
||||||
|
/* Base macro to pick up the low surrogate of a UTF-16 character, advancing
|
||||||
|
the pointer. */
|
||||||
|
|
||||||
|
#define GETUTF16INC(c, eptr) \
|
||||||
|
{ c = (((c & 0x3ffu) << 10) | (*eptr++ & 0x3ffu)) + 0x10000u; }
|
||||||
|
|
||||||
|
/* Get the next UTF-16 character, advancing the pointer. This is called when we
|
||||||
|
know we are in UTF-16 mode. */
|
||||||
|
|
||||||
|
#define GETCHARINC(c, eptr) \
|
||||||
|
c = *eptr++; \
|
||||||
|
if ((c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr);
|
||||||
|
|
||||||
|
/* Get the next character, testing for UTF-16 mode, and advancing the pointer.
|
||||||
|
This is called when we don't know if we are in UTF-16 mode. */
|
||||||
|
|
||||||
|
#define GETCHARINCTEST(c, eptr) \
|
||||||
|
c = *eptr++; \
|
||||||
|
if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr);
|
||||||
|
|
||||||
|
/* Base macro to pick up the low surrogate of a UTF-16 character, not
|
||||||
|
advancing the pointer, incrementing the length. */
|
||||||
|
|
||||||
|
#define GETUTF16LEN(c, eptr, len) \
|
||||||
|
{ c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; len++; }
|
||||||
|
|
||||||
|
/* Get the next UTF-16 character, not advancing the pointer, incrementing
|
||||||
|
length if there is a low surrogate. This is called when we know we are in
|
||||||
|
UTF-16 mode. */
|
||||||
|
|
||||||
|
#define GETCHARLEN(c, eptr, len) \
|
||||||
|
c = *eptr; \
|
||||||
|
if ((c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len);
|
||||||
|
|
||||||
|
/* Get the next UTF-16 character, testing for UTF-16 mode, not advancing the
|
||||||
|
pointer, incrementing length if there is a low surrogate. This is called when
|
||||||
|
we do not know if we are in UTF-16 mode. */
|
||||||
|
|
||||||
|
#define GETCHARLENTEST(c, eptr, len) \
|
||||||
|
c = *eptr; \
|
||||||
|
if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len);
|
||||||
|
|
||||||
|
/* If the pointer is not at the start of a character, move it back until
|
||||||
|
it is. This is called only in UTF-16 mode - we don't put a test within the
|
||||||
|
macro because almost all calls are already within a block of UTF-16 only
|
||||||
|
code. */
|
||||||
|
|
||||||
|
#define BACKCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr--
|
||||||
|
|
||||||
|
/* Same as above, just in the other direction. */
|
||||||
|
#define FORWARDCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr++
|
||||||
|
#define FORWARDCHARTEST(eptr,end) if (eptr < end && (*eptr & 0xfc00u) == 0xdc00u) eptr++
|
||||||
|
|
||||||
|
/* Same as above, but it allows a fully customizable form. */
|
||||||
|
#define ACROSSCHAR(condition, eptr, action) \
|
||||||
|
if ((condition) && ((*eptr) & 0xfc00u) == 0xdc00u) action
|
||||||
|
|
||||||
|
/* Deposit a character into memory, returning the number of code units. */
|
||||||
|
|
||||||
|
#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
|
||||||
|
PRIV(ord2utf)(c,p) : (*p = c, 1))
|
||||||
|
|
||||||
|
|
||||||
|
/* ------------------- 32-bit support ------------------ */
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
/* These are trivial for the 32-bit library, since all UTF-32 characters fit
|
||||||
|
into one PCRE2_UCHAR unit. */
|
||||||
|
|
||||||
|
#define MAX_UTF_SINGLE_CU (0x10ffffu)
|
||||||
|
#define HAS_EXTRALEN(c) (0)
|
||||||
|
#define GET_EXTRALEN(c) (0)
|
||||||
|
#define NOT_FIRSTCU(c) (0)
|
||||||
|
|
||||||
|
/* Get the next UTF-32 character, not advancing the pointer. This is called when
|
||||||
|
we know we are in UTF-32 mode. */
|
||||||
|
|
||||||
|
#define GETCHAR(c, eptr) \
|
||||||
|
c = *(eptr);
|
||||||
|
|
||||||
|
/* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the
|
||||||
|
pointer. */
|
||||||
|
|
||||||
|
#define GETCHARTEST(c, eptr) \
|
||||||
|
c = *(eptr);
|
||||||
|
|
||||||
|
/* Get the next UTF-32 character, advancing the pointer. This is called when we
|
||||||
|
know we are in UTF-32 mode. */
|
||||||
|
|
||||||
|
#define GETCHARINC(c, eptr) \
|
||||||
|
c = *((eptr)++);
|
||||||
|
|
||||||
|
/* Get the next character, testing for UTF-32 mode, and advancing the pointer.
|
||||||
|
This is called when we don't know if we are in UTF-32 mode. */
|
||||||
|
|
||||||
|
#define GETCHARINCTEST(c, eptr) \
|
||||||
|
c = *((eptr)++);
|
||||||
|
|
||||||
|
/* Get the next UTF-32 character, not advancing the pointer, not incrementing
|
||||||
|
length (since all UTF-32 is of length 1). This is called when we know we are in
|
||||||
|
UTF-32 mode. */
|
||||||
|
|
||||||
|
#define GETCHARLEN(c, eptr, len) \
|
||||||
|
GETCHAR(c, eptr)
|
||||||
|
|
||||||
|
/* Get the next UTF-32character, testing for UTF-32 mode, not advancing the
|
||||||
|
pointer, not incrementing the length (since all UTF-32 is of length 1).
|
||||||
|
This is called when we do not know if we are in UTF-32 mode. */
|
||||||
|
|
||||||
|
#define GETCHARLENTEST(c, eptr, len) \
|
||||||
|
GETCHARTEST(c, eptr)
|
||||||
|
|
||||||
|
/* If the pointer is not at the start of a character, move it back until
|
||||||
|
it is. This is called only in UTF-32 mode - we don't put a test within the
|
||||||
|
macro because almost all calls are already within a block of UTF-32 only
|
||||||
|
code.
|
||||||
|
|
||||||
|
These are all no-ops since all UTF-32 characters fit into one PCRE2_UCHAR. */
|
||||||
|
|
||||||
|
#define BACKCHAR(eptr) do { } while (0)
|
||||||
|
|
||||||
|
/* Same as above, just in the other direction. */
|
||||||
|
|
||||||
|
#define FORWARDCHAR(eptr) do { } while (0)
|
||||||
|
#define FORWARDCHARTEST(eptr,end) do { } while (0)
|
||||||
|
|
||||||
|
/* Same as above, but it allows a fully customizable form. */
|
||||||
|
|
||||||
|
#define ACROSSCHAR(condition, eptr, action) do { } while (0)
|
||||||
|
|
||||||
|
/* Deposit a character into memory, returning the number of code units. */
|
||||||
|
|
||||||
|
#define PUTCHAR(c, p) (*p = c, 1)
|
||||||
|
|
||||||
|
#endif /* UTF-32 character handling */
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
|
||||||
|
/* Mode-dependent macros that have the same definition in all modes. */
|
||||||
|
|
||||||
|
#define CU2BYTES(x) ((x)*((PCRE2_CODE_UNIT_WIDTH/8)))
|
||||||
|
#define BYTES2CU(x) ((x)/((PCRE2_CODE_UNIT_WIDTH/8)))
|
||||||
|
#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE
|
||||||
|
#define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE
|
||||||
|
|
||||||
|
|
||||||
|
/* ----------------------- HIDDEN STRUCTURES ----------------------------- */
|
||||||
|
|
||||||
|
/* NOTE: All these structures *must* start with a pcre2_memctl structure. The
|
||||||
|
code that uses them is simpler because it assumes this. */
|
||||||
|
|
||||||
|
/* The real general context structure. At present it holds only data for custom
|
||||||
|
memory control. */
|
||||||
|
|
||||||
|
/* WARNING: if this is ever changed, code in pcre2_substitute.c will have to be
|
||||||
|
changed because it builds a general context "by hand" in order to avoid the
|
||||||
|
malloc() call in pcre2_general_context)_create(). There is also code in
|
||||||
|
pcre2_match.c that makes the same assumption. */
|
||||||
|
|
||||||
|
typedef struct pcre2_real_general_context {
|
||||||
|
pcre2_memctl memctl;
|
||||||
|
} pcre2_real_general_context;
|
||||||
|
|
||||||
|
/* The real compile context structure */
|
||||||
|
|
||||||
|
typedef struct pcre2_real_compile_context {
|
||||||
|
pcre2_memctl memctl;
|
||||||
|
int (*stack_guard)(uint32_t, void *);
|
||||||
|
void *stack_guard_data;
|
||||||
|
const uint8_t *tables;
|
||||||
|
PCRE2_SIZE max_pattern_length;
|
||||||
|
PCRE2_SIZE max_pattern_compiled_length;
|
||||||
|
uint16_t bsr_convention;
|
||||||
|
uint16_t newline_convention;
|
||||||
|
uint32_t parens_nest_limit;
|
||||||
|
uint32_t extra_options;
|
||||||
|
uint32_t max_varlookbehind;
|
||||||
|
uint32_t optimization_flags;
|
||||||
|
} pcre2_real_compile_context;
|
||||||
|
|
||||||
|
/* The real match context structure. */
|
||||||
|
|
||||||
|
typedef struct pcre2_real_match_context {
|
||||||
|
pcre2_memctl memctl;
|
||||||
|
#ifdef SUPPORT_JIT
|
||||||
|
pcre2_jit_callback jit_callback;
|
||||||
|
void *jit_callback_data;
|
||||||
|
#endif
|
||||||
|
int (*callout)(pcre2_callout_block *, void *);
|
||||||
|
void *callout_data;
|
||||||
|
int (*substitute_callout)(pcre2_substitute_callout_block *, void *);
|
||||||
|
void *substitute_callout_data;
|
||||||
|
PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *,
|
||||||
|
PCRE2_SIZE, int, void *);
|
||||||
|
void *substitute_case_callout_data;
|
||||||
|
PCRE2_SIZE offset_limit;
|
||||||
|
uint32_t heap_limit;
|
||||||
|
uint32_t match_limit;
|
||||||
|
uint32_t depth_limit;
|
||||||
|
} pcre2_real_match_context;
|
||||||
|
|
||||||
|
/* The real convert context structure. */
|
||||||
|
|
||||||
|
typedef struct pcre2_real_convert_context {
|
||||||
|
pcre2_memctl memctl;
|
||||||
|
uint32_t glob_separator;
|
||||||
|
uint32_t glob_escape;
|
||||||
|
} pcre2_real_convert_context;
|
||||||
|
|
||||||
|
/* The real compiled code structure. The type for the blocksize field is
|
||||||
|
defined specially because it is required in pcre2_serialize_decode() when
|
||||||
|
copying the size from possibly unaligned memory into a variable of the same
|
||||||
|
type. Use a macro rather than a typedef to avoid compiler warnings when this
|
||||||
|
file is included multiple times by pcre2test. LOOKBEHIND_MAX specifies the
|
||||||
|
largest lookbehind that is supported. (OP_REVERSE and OP_VREVERSE in a pattern
|
||||||
|
have 16-bit arguments in 8-bit and 16-bit modes, so we need no more than a
|
||||||
|
16-bit field here.) */
|
||||||
|
|
||||||
|
#undef CODE_BLOCKSIZE_TYPE
|
||||||
|
#define CODE_BLOCKSIZE_TYPE PCRE2_SIZE
|
||||||
|
|
||||||
|
#undef LOOKBEHIND_MAX
|
||||||
|
#define LOOKBEHIND_MAX UINT16_MAX
|
||||||
|
|
||||||
|
typedef struct pcre2_real_code {
|
||||||
|
pcre2_memctl memctl; /* Memory control fields */
|
||||||
|
const uint8_t *tables; /* The character tables */
|
||||||
|
void *executable_jit; /* Pointer to JIT code */
|
||||||
|
uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */
|
||||||
|
CODE_BLOCKSIZE_TYPE blocksize; /* Total (bytes) that was malloc-ed */
|
||||||
|
CODE_BLOCKSIZE_TYPE code_start; /* Byte code start offset */
|
||||||
|
uint32_t magic_number; /* Paranoid and endianness check */
|
||||||
|
uint32_t compile_options; /* Options passed to pcre2_compile() */
|
||||||
|
uint32_t overall_options; /* Options after processing the pattern */
|
||||||
|
uint32_t extra_options; /* Taken from compile_context */
|
||||||
|
uint32_t flags; /* Various state flags */
|
||||||
|
uint32_t limit_heap; /* Limit set in the pattern */
|
||||||
|
uint32_t limit_match; /* Limit set in the pattern */
|
||||||
|
uint32_t limit_depth; /* Limit set in the pattern */
|
||||||
|
uint32_t first_codeunit; /* Starting code unit */
|
||||||
|
uint32_t last_codeunit; /* This codeunit must be seen */
|
||||||
|
uint16_t bsr_convention; /* What \R matches */
|
||||||
|
uint16_t newline_convention; /* What is a newline? */
|
||||||
|
uint16_t max_lookbehind; /* Longest lookbehind (characters) */
|
||||||
|
uint16_t minlength; /* Minimum length of match */
|
||||||
|
uint16_t top_bracket; /* Highest numbered group */
|
||||||
|
uint16_t top_backref; /* Highest numbered back reference */
|
||||||
|
uint16_t name_entry_size; /* Size (code units) of table entries */
|
||||||
|
uint16_t name_count; /* Number of name entries in the table */
|
||||||
|
uint32_t optimization_flags; /* Optimizations enabled at compile time */
|
||||||
|
} pcre2_real_code;
|
||||||
|
|
||||||
|
/* The real match data structure. Define ovector as large as it can ever
|
||||||
|
actually be so that array bound checkers don't grumble. Memory for this
|
||||||
|
structure is obtained by calling pcre2_match_data_create(), which sets the size
|
||||||
|
as the offset of ovector plus a pair of elements for each capturable string, so
|
||||||
|
the size varies from call to call. As the maximum number of capturing
|
||||||
|
subpatterns is 65535 we must allow for 65536 strings to include the overall
|
||||||
|
match. (See also the heapframe structure below.) */
|
||||||
|
|
||||||
|
struct heapframe; /* Forward reference */
|
||||||
|
|
||||||
|
typedef struct pcre2_real_match_data {
|
||||||
|
pcre2_memctl memctl; /* Memory control fields */
|
||||||
|
const pcre2_real_code *code; /* The pattern used for the match */
|
||||||
|
PCRE2_SPTR subject; /* The subject that was matched */
|
||||||
|
PCRE2_SPTR mark; /* Pointer to last mark */
|
||||||
|
struct heapframe *heapframes; /* Backtracking frames heap memory */
|
||||||
|
PCRE2_SIZE heapframes_size; /* Malloc-ed size */
|
||||||
|
PCRE2_SIZE subject_length; /* Subject length */
|
||||||
|
PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
|
||||||
|
PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
|
||||||
|
PCRE2_SIZE startchar; /* Offset to starting code unit */
|
||||||
|
uint8_t matchedby; /* Type of match (normal, JIT, DFA) */
|
||||||
|
uint8_t flags; /* Various flags */
|
||||||
|
uint16_t oveccount; /* Number of pairs */
|
||||||
|
int rc; /* The return code from the match */
|
||||||
|
PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
|
||||||
|
} pcre2_real_match_data;
|
||||||
|
|
||||||
|
|
||||||
|
/* ----------------------- PRIVATE STRUCTURES ----------------------------- */
|
||||||
|
|
||||||
|
/* These structures are not needed for pcre2test. */
|
||||||
|
|
||||||
|
#ifndef PCRE2_PCRE2TEST
|
||||||
|
|
||||||
|
/* Structures for checking for mutual function recursion when scanning compiled
|
||||||
|
or parsed code. */
|
||||||
|
|
||||||
|
typedef struct recurse_check {
|
||||||
|
struct recurse_check *prev;
|
||||||
|
PCRE2_SPTR group;
|
||||||
|
} recurse_check;
|
||||||
|
|
||||||
|
typedef struct parsed_recurse_check {
|
||||||
|
struct parsed_recurse_check *prev;
|
||||||
|
uint32_t *groupptr;
|
||||||
|
} parsed_recurse_check;
|
||||||
|
|
||||||
|
/* Structure for building a cache when filling in pattern recursion offsets. */
|
||||||
|
|
||||||
|
typedef struct recurse_cache {
|
||||||
|
PCRE2_SPTR group;
|
||||||
|
int groupnumber;
|
||||||
|
} recurse_cache;
|
||||||
|
|
||||||
|
/* Structure for maintaining a chain of pointers to the currently incomplete
|
||||||
|
branches, for testing for left recursion while compiling. */
|
||||||
|
|
||||||
|
typedef struct branch_chain {
|
||||||
|
struct branch_chain *outer;
|
||||||
|
PCRE2_UCHAR *current_branch;
|
||||||
|
} branch_chain;
|
||||||
|
|
||||||
|
/* Structure for building a list of named groups during the first pass of
|
||||||
|
compiling. */
|
||||||
|
|
||||||
|
typedef struct named_group {
|
||||||
|
PCRE2_SPTR name; /* Points to the name in the pattern */
|
||||||
|
uint32_t number; /* Group number */
|
||||||
|
uint16_t length; /* Length of the name */
|
||||||
|
uint16_t isdup; /* TRUE if a duplicate */
|
||||||
|
} named_group;
|
||||||
|
|
||||||
|
/* Structure for caching sorted ranges. This improves the performance
|
||||||
|
of translating META code to byte code. */
|
||||||
|
|
||||||
|
typedef struct class_ranges {
|
||||||
|
struct class_ranges *next; /* Next class ranges */
|
||||||
|
size_t char_lists_size; /* Total size of encoded char lists */
|
||||||
|
size_t char_lists_start; /* Start offset of encoded char lists */
|
||||||
|
uint16_t range_list_size; /* Size of ranges array */
|
||||||
|
uint16_t char_lists_types; /* The XCL_LIST header of char lists */
|
||||||
|
/* Followed by the list of ranges (start/end pairs) */
|
||||||
|
} class_ranges;
|
||||||
|
|
||||||
|
typedef union class_bits_storage {
|
||||||
|
uint8_t classbits[32];
|
||||||
|
uint32_t classwords[8];
|
||||||
|
} class_bits_storage;
|
||||||
|
|
||||||
|
/* Structure for passing "static" information around between the functions
|
||||||
|
doing the compiling, so that they are thread-safe. */
|
||||||
|
|
||||||
|
typedef struct compile_block {
|
||||||
|
pcre2_real_compile_context *cx; /* Points to the compile context */
|
||||||
|
const uint8_t *lcc; /* Points to lower casing table */
|
||||||
|
const uint8_t *fcc; /* Points to case-flipping table */
|
||||||
|
const uint8_t *cbits; /* Points to character type table */
|
||||||
|
const uint8_t *ctypes; /* Points to table of type maps */
|
||||||
|
PCRE2_UCHAR *start_workspace; /* The start of working space */
|
||||||
|
PCRE2_UCHAR *start_code; /* The start of the compiled code */
|
||||||
|
PCRE2_SPTR start_pattern; /* The start of the pattern */
|
||||||
|
PCRE2_SPTR end_pattern; /* The end of the pattern */
|
||||||
|
PCRE2_UCHAR *name_table; /* The name/number table */
|
||||||
|
PCRE2_SIZE workspace_size; /* Size of workspace */
|
||||||
|
PCRE2_SIZE small_ref_offset[10]; /* Offsets for \1 to \9 */
|
||||||
|
PCRE2_SIZE erroroffset; /* Offset of error in pattern */
|
||||||
|
class_bits_storage classbits; /* Temporary store for classbits */
|
||||||
|
uint16_t names_found; /* Number of entries so far */
|
||||||
|
uint16_t name_entry_size; /* Size of each entry */
|
||||||
|
uint16_t parens_depth; /* Depth of nested parentheses */
|
||||||
|
uint16_t assert_depth; /* Depth of nested assertions */
|
||||||
|
named_group *named_groups; /* Points to vector in pre-compile */
|
||||||
|
uint32_t named_group_list_size; /* Number of entries in the list */
|
||||||
|
uint32_t external_options; /* External (initial) options */
|
||||||
|
uint32_t external_flags; /* External flag bits to be set */
|
||||||
|
uint32_t bracount; /* Count of capturing parentheses */
|
||||||
|
uint32_t lastcapture; /* Last capture encountered */
|
||||||
|
uint32_t *parsed_pattern; /* Parsed pattern buffer */
|
||||||
|
uint32_t *parsed_pattern_end; /* Parsed pattern should not get here */
|
||||||
|
uint32_t *groupinfo; /* Group info vector */
|
||||||
|
uint32_t top_backref; /* Maximum back reference */
|
||||||
|
uint32_t backref_map; /* Bitmap of low back refs */
|
||||||
|
uint32_t nltype; /* Newline type */
|
||||||
|
uint32_t nllen; /* Newline string length */
|
||||||
|
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
|
||||||
|
uint8_t class_op_used[ECLASS_NEST_LIMIT]; /* Operation used for
|
||||||
|
extended classes */
|
||||||
|
uint32_t req_varyopt; /* "After variable item" flag for reqbyte */
|
||||||
|
uint32_t max_varlookbehind; /* Limit for variable lookbehinds */
|
||||||
|
int max_lookbehind; /* Maximum lookbehind encountered (characters) */
|
||||||
|
BOOL had_accept; /* (*ACCEPT) encountered */
|
||||||
|
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
|
||||||
|
BOOL had_recurse; /* Had a pattern recursion or subroutine call */
|
||||||
|
BOOL dupnames; /* Duplicate names exist */
|
||||||
|
#ifdef SUPPORT_WIDE_CHARS
|
||||||
|
class_ranges *cranges; /* First class range. */
|
||||||
|
class_ranges *next_cranges; /* Next class range. */
|
||||||
|
size_t char_lists_size; /* Current size of character lists */
|
||||||
|
#endif
|
||||||
|
} compile_block;
|
||||||
|
|
||||||
|
/* Structure for keeping the properties of the in-memory stack used
|
||||||
|
by the JIT matcher. */
|
||||||
|
|
||||||
|
typedef struct pcre2_real_jit_stack {
|
||||||
|
pcre2_memctl memctl;
|
||||||
|
void* stack;
|
||||||
|
} pcre2_real_jit_stack;
|
||||||
|
|
||||||
|
/* Structure for items in a linked list that represents an explicit recursive
|
||||||
|
call within the pattern when running pcre2_dfa_match(). */
|
||||||
|
|
||||||
|
typedef struct dfa_recursion_info {
|
||||||
|
struct dfa_recursion_info *prevrec;
|
||||||
|
PCRE2_SPTR subject_position;
|
||||||
|
PCRE2_SPTR last_used_ptr;
|
||||||
|
uint32_t group_num;
|
||||||
|
} dfa_recursion_info;
|
||||||
|
|
||||||
|
/* Structure for "stack" frames that are used for remembering backtracking
|
||||||
|
positions during matching. As these are used in a vector, with the ovector item
|
||||||
|
being extended, the size of the structure must be a multiple of PCRE2_SIZE. The
|
||||||
|
only way to check this at compile time is to force an error by generating an
|
||||||
|
array with a negative size. By putting this in a typedef (which is never used),
|
||||||
|
we don't generate any code when all is well. */
|
||||||
|
|
||||||
|
typedef struct heapframe {
|
||||||
|
|
||||||
|
/* The first set of fields are variables that have to be preserved over calls
|
||||||
|
to RRMATCH(), but which do not need to be copied to new frames. */
|
||||||
|
|
||||||
|
PCRE2_SPTR ecode; /* The current position in the pattern */
|
||||||
|
PCRE2_SPTR temp_sptr[2]; /* Used for short-term PCRE2_SPTR values */
|
||||||
|
PCRE2_SIZE length; /* Used for character, string, or code lengths */
|
||||||
|
PCRE2_SIZE back_frame; /* Amount to subtract on RRETURN */
|
||||||
|
PCRE2_SIZE temp_size; /* Used for short-term PCRE2_SIZE values */
|
||||||
|
uint32_t rdepth; /* Function "recursion" depth within pcre2_match() */
|
||||||
|
uint32_t group_frame_type; /* Type information for group frames */
|
||||||
|
uint32_t temp_32[4]; /* Used for short-term 32-bit or BOOL values */
|
||||||
|
uint8_t return_id; /* Where to go on in internal "return" */
|
||||||
|
uint8_t op; /* Processing opcode */
|
||||||
|
|
||||||
|
/* At this point, the structure is 16-bit aligned. On most architectures
|
||||||
|
the alignment requirement for a pointer will ensure that the eptr field below
|
||||||
|
is 32-bit or 64-bit aligned. However, on m68k it is fine to have a pointer
|
||||||
|
that is 16-bit aligned. We must therefore ensure that what comes between here
|
||||||
|
and eptr is an odd multiple of 16 bits so as to get back into 32-bit
|
||||||
|
alignment. This happens naturally when PCRE2_UCHAR is 8 bits wide, but needs
|
||||||
|
fudges in the other cases. In the 32-bit case the padding comes first so that
|
||||||
|
the occu field itself is 32-bit aligned. Without the padding, this structure
|
||||||
|
is no longer a multiple of PCRE2_SIZE on m68k, and the check below fails. */
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
PCRE2_UCHAR occu[6]; /* Used for other case code units */
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
PCRE2_UCHAR occu[2]; /* Used for other case code units */
|
||||||
|
uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */
|
||||||
|
#else
|
||||||
|
uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */
|
||||||
|
PCRE2_UCHAR occu[1]; /* Used for other case code units */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* The rest have to be copied from the previous frame whenever a new frame
|
||||||
|
becomes current. The final field is specified as a large vector so that
|
||||||
|
runtime array bound checks don't catch references to it. However, for any
|
||||||
|
specific call to pcre2_match() the memory allocated for each frame structure
|
||||||
|
allows for exactly the right size ovector for the number of capturing
|
||||||
|
parentheses. (See also the comment for pcre2_real_match_data above.) */
|
||||||
|
|
||||||
|
PCRE2_SPTR eptr; /* MUST BE FIRST */
|
||||||
|
PCRE2_SPTR start_match; /* Can be adjusted by \K */
|
||||||
|
PCRE2_SPTR mark; /* Most recent mark on the success path */
|
||||||
|
PCRE2_SPTR recurse_last_used; /* Last character used at time of pattern recursion */
|
||||||
|
uint32_t current_recurse; /* Group number of current (deepest) pattern recursion */
|
||||||
|
uint32_t capture_last; /* Most recent capture */
|
||||||
|
PCRE2_SIZE last_group_offset; /* Saved offset to most recent group frame */
|
||||||
|
PCRE2_SIZE offset_top; /* Offset after highest capture */
|
||||||
|
PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
|
||||||
|
} heapframe;
|
||||||
|
|
||||||
|
/* Assert that the size of the heapframe structure is a multiple of PCRE2_SIZE.
|
||||||
|
See various comments above. */
|
||||||
|
|
||||||
|
STATIC_ASSERT((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0, heapframe_size);
|
||||||
|
|
||||||
|
/* Structure for computing the alignment of heapframe. */
|
||||||
|
|
||||||
|
typedef struct heapframe_align {
|
||||||
|
char unalign; /* Completely unalign the current offset */
|
||||||
|
heapframe frame; /* Offset is its alignment */
|
||||||
|
} heapframe_align;
|
||||||
|
|
||||||
|
/* This define is the minimum alignment required for a heapframe, in bytes. */
|
||||||
|
|
||||||
|
#define HEAPFRAME_ALIGNMENT offsetof(heapframe_align, frame)
|
||||||
|
|
||||||
|
/* Structure for passing "static" information around between the functions
|
||||||
|
doing traditional NFA matching (pcre2_match() and friends). */
|
||||||
|
|
||||||
|
typedef struct match_block {
|
||||||
|
pcre2_memctl memctl; /* For general use */
|
||||||
|
uint32_t heap_limit; /* As it says */
|
||||||
|
uint32_t match_limit; /* As it says */
|
||||||
|
uint32_t match_limit_depth; /* As it says */
|
||||||
|
uint32_t match_call_count; /* Number of times a new frame is created */
|
||||||
|
BOOL hitend; /* Hit the end of the subject at some point */
|
||||||
|
BOOL hasthen; /* Pattern contains (*THEN) */
|
||||||
|
BOOL allowemptypartial; /* Allow empty hard partial */
|
||||||
|
const uint8_t *lcc; /* Points to lower casing table */
|
||||||
|
const uint8_t *fcc; /* Points to case-flipping table */
|
||||||
|
const uint8_t *ctypes; /* Points to table of type maps */
|
||||||
|
PCRE2_SIZE start_offset; /* The start offset value */
|
||||||
|
PCRE2_SIZE end_offset_top; /* Highwater mark at end of match */
|
||||||
|
uint16_t partial; /* PARTIAL options */
|
||||||
|
uint16_t bsr_convention; /* \R interpretation */
|
||||||
|
uint16_t name_count; /* Number of names in name table */
|
||||||
|
uint16_t name_entry_size; /* Size of entry in names table */
|
||||||
|
PCRE2_SPTR name_table; /* Table of group names */
|
||||||
|
PCRE2_SPTR start_code; /* For use in pattern recursion */
|
||||||
|
PCRE2_SPTR start_subject; /* Start of the subject string */
|
||||||
|
PCRE2_SPTR check_subject; /* Where UTF-checked from */
|
||||||
|
PCRE2_SPTR end_subject; /* Usable end of the subject string */
|
||||||
|
PCRE2_SPTR true_end_subject; /* Actual end of the subject string */
|
||||||
|
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
|
||||||
|
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
||||||
|
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
|
||||||
|
PCRE2_SPTR mark; /* Mark pointer to pass back on success */
|
||||||
|
PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
|
||||||
|
PCRE2_SPTR verb_ecode_ptr; /* For passing back info */
|
||||||
|
PCRE2_SPTR verb_skip_ptr; /* For passing back a (*SKIP) name */
|
||||||
|
uint32_t verb_current_recurse; /* Current recursion group when (*VERB) happens */
|
||||||
|
uint32_t moptions; /* Match options */
|
||||||
|
uint32_t poptions; /* Pattern options */
|
||||||
|
uint32_t skip_arg_count; /* For counting SKIP_ARGs */
|
||||||
|
uint32_t ignore_skip_arg; /* For re-run when SKIP arg name not found */
|
||||||
|
uint32_t nltype; /* Newline type */
|
||||||
|
uint32_t nllen; /* Newline string length */
|
||||||
|
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
||||||
|
pcre2_callout_block *cb; /* Points to a callout block */
|
||||||
|
void *callout_data; /* To pass back to callouts */
|
||||||
|
int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
|
||||||
|
} match_block;
|
||||||
|
|
||||||
|
/* A similar structure is used for the same purpose by the DFA matching
|
||||||
|
functions. */
|
||||||
|
|
||||||
|
typedef struct dfa_match_block {
|
||||||
|
pcre2_memctl memctl; /* For general use */
|
||||||
|
PCRE2_SPTR start_code; /* Start of the compiled pattern */
|
||||||
|
PCRE2_SPTR start_subject ; /* Start of the subject string */
|
||||||
|
PCRE2_SPTR end_subject; /* End of subject string */
|
||||||
|
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
||||||
|
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
|
||||||
|
const uint8_t *tables; /* Character tables */
|
||||||
|
PCRE2_SIZE start_offset; /* The start offset value */
|
||||||
|
uint32_t heap_limit; /* As it says */
|
||||||
|
PCRE2_SIZE heap_used; /* As it says */
|
||||||
|
uint32_t match_limit; /* As it says */
|
||||||
|
uint32_t match_limit_depth; /* As it says */
|
||||||
|
uint32_t match_call_count; /* Number of calls of internal function */
|
||||||
|
uint32_t moptions; /* Match options */
|
||||||
|
uint32_t poptions; /* Pattern options */
|
||||||
|
uint32_t nltype; /* Newline type */
|
||||||
|
uint32_t nllen; /* Newline string length */
|
||||||
|
BOOL allowemptypartial; /* Allow empty hard partial */
|
||||||
|
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
||||||
|
uint16_t bsr_convention; /* \R interpretation */
|
||||||
|
pcre2_callout_block *cb; /* Points to a callout block */
|
||||||
|
void *callout_data; /* To pass back to callouts */
|
||||||
|
int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
|
||||||
|
dfa_recursion_info *recursive; /* Linked list of pattern recursion data */
|
||||||
|
} dfa_match_block;
|
||||||
|
|
||||||
|
#endif /* PCRE2_PCRE2TEST */
|
||||||
|
|
||||||
|
/* End of pcre2_intmodedep.h */
|
||||||
2280
3rd/pcre2/src/pcre2_jit_char_inc.h
Normal file
2280
3rd/pcre2/src/pcre2_jit_char_inc.h
Normal file
@@ -0,0 +1,2280 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
This module by Zoltan Herczeg
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* XClass matching code. */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_WIDE_CHARS
|
||||||
|
|
||||||
|
#define ECLASS_CHAR_DATA STACK_TOP
|
||||||
|
#define ECLASS_STACK_DATA STACK_LIMIT
|
||||||
|
|
||||||
|
#define SET_CHAR_OFFSET(value) \
|
||||||
|
if ((value) != charoffset) \
|
||||||
|
{ \
|
||||||
|
if ((value) < charoffset) \
|
||||||
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
|
||||||
|
else \
|
||||||
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
|
||||||
|
} \
|
||||||
|
charoffset = (value);
|
||||||
|
|
||||||
|
#define READ_FROM_CHAR_LIST(destination) \
|
||||||
|
if (list_ind <= 1) \
|
||||||
|
{ \
|
||||||
|
destination = *(const uint16_t*)next_char; \
|
||||||
|
next_char += 2; \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
destination = *(const uint32_t*)next_char; \
|
||||||
|
next_char += 4; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define XCLASS_LOCAL_RANGES_SIZE 32
|
||||||
|
#define XCLASS_LOCAL_RANGES_LOG2_SIZE 5
|
||||||
|
|
||||||
|
typedef struct xclass_stack_item {
|
||||||
|
sljit_u32 first_item;
|
||||||
|
sljit_u32 last_item;
|
||||||
|
struct sljit_jump *jump;
|
||||||
|
} xclass_stack_item;
|
||||||
|
|
||||||
|
typedef struct xclass_ranges {
|
||||||
|
size_t range_count;
|
||||||
|
/* Pointer to ranges. A stack area is provided when a small buffer is enough. */
|
||||||
|
uint32_t *ranges;
|
||||||
|
uint32_t local_ranges[XCLASS_LOCAL_RANGES_SIZE * 2];
|
||||||
|
/* Stack size must be log2(ranges / 2). */
|
||||||
|
xclass_stack_item *stack;
|
||||||
|
xclass_stack_item local_stack[XCLASS_LOCAL_RANGES_LOG2_SIZE];
|
||||||
|
} xclass_ranges;
|
||||||
|
|
||||||
|
static void xclass_compute_ranges(compiler_common *common, PCRE2_SPTR cc, xclass_ranges *ranges)
|
||||||
|
{
|
||||||
|
DEFINE_COMPILER;
|
||||||
|
size_t range_count = 0, est_range_count;
|
||||||
|
size_t est_stack_size, tmp;
|
||||||
|
uint32_t type, list_ind;
|
||||||
|
uint32_t est_type;
|
||||||
|
uint32_t char_list_add, range_start, range_end;
|
||||||
|
const uint8_t *next_char;
|
||||||
|
const uint8_t *est_next_char;
|
||||||
|
#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
|
||||||
|
BOOL utf = common->utf;
|
||||||
|
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
|
||||||
|
|
||||||
|
if (*cc == XCL_SINGLE || *cc == XCL_RANGE)
|
||||||
|
{
|
||||||
|
/* Only a few ranges are present. */
|
||||||
|
do
|
||||||
|
{
|
||||||
|
type = *cc++;
|
||||||
|
SLJIT_ASSERT(type == XCL_SINGLE || type == XCL_RANGE);
|
||||||
|
GETCHARINCTEST(range_end, cc);
|
||||||
|
ranges->ranges[range_count] = range_end;
|
||||||
|
|
||||||
|
if (type == XCL_RANGE)
|
||||||
|
{
|
||||||
|
GETCHARINCTEST(range_end, cc);
|
||||||
|
}
|
||||||
|
|
||||||
|
ranges->ranges[range_count + 1] = range_end;
|
||||||
|
range_count += 2;
|
||||||
|
}
|
||||||
|
while (*cc != XCL_END);
|
||||||
|
|
||||||
|
SLJIT_ASSERT(range_count <= XCLASS_LOCAL_RANGES_SIZE);
|
||||||
|
ranges->range_count = range_count;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
SLJIT_ASSERT(cc[0] >= XCL_LIST);
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
type = (uint32_t)(cc[0] << 8) | cc[1];
|
||||||
|
cc += 2;
|
||||||
|
#else
|
||||||
|
type = cc[0];
|
||||||
|
cc++;
|
||||||
|
#endif /* CODE_UNIT_WIDTH */
|
||||||
|
|
||||||
|
/* Align characters. */
|
||||||
|
next_char = (const uint8_t*)common->start - (GET(cc, 0) << 1);
|
||||||
|
type &= XCL_TYPE_MASK;
|
||||||
|
|
||||||
|
/* Estimate size. */
|
||||||
|
est_next_char = next_char;
|
||||||
|
est_type = type;
|
||||||
|
est_range_count = 0;
|
||||||
|
list_ind = 0;
|
||||||
|
|
||||||
|
while (est_type > 0)
|
||||||
|
{
|
||||||
|
uint32_t item_count = est_type & XCL_ITEM_COUNT_MASK;
|
||||||
|
|
||||||
|
if (item_count == XCL_ITEM_COUNT_MASK)
|
||||||
|
{
|
||||||
|
if (list_ind <= 1)
|
||||||
|
{
|
||||||
|
item_count = *(const uint16_t*)est_next_char;
|
||||||
|
est_next_char += 2;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
item_count = *(const uint32_t*)est_next_char;
|
||||||
|
est_next_char += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
est_type >>= XCL_TYPE_BIT_LEN;
|
||||||
|
est_next_char += (size_t)item_count << (list_ind <= 1 ? 1 : 2);
|
||||||
|
list_ind++;
|
||||||
|
est_range_count += item_count + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (est_range_count > XCLASS_LOCAL_RANGES_SIZE)
|
||||||
|
{
|
||||||
|
est_stack_size = 0;
|
||||||
|
tmp = est_range_count - 1;
|
||||||
|
|
||||||
|
/* Compute log2(est_range_count) */
|
||||||
|
while (tmp > 0)
|
||||||
|
{
|
||||||
|
est_stack_size++;
|
||||||
|
tmp >>= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ranges->stack = (xclass_stack_item*)SLJIT_MALLOC((sizeof(xclass_stack_item) * est_stack_size)
|
||||||
|
+ ((sizeof(uint32_t) << 1) * (size_t)est_range_count), compiler->allocator_data);
|
||||||
|
|
||||||
|
if (ranges->stack == NULL)
|
||||||
|
{
|
||||||
|
sljit_set_compiler_memory_error(compiler);
|
||||||
|
ranges->ranges = NULL;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ranges->ranges = (uint32_t*)(ranges->stack + est_stack_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
char_list_add = XCL_CHAR_LIST_LOW_16_ADD;
|
||||||
|
range_start = ~(uint32_t)0;
|
||||||
|
list_ind = 0;
|
||||||
|
|
||||||
|
if ((type & XCL_BEGIN_WITH_RANGE) != 0)
|
||||||
|
range_start = XCL_CHAR_LIST_LOW_16_START;
|
||||||
|
|
||||||
|
while (type > 0)
|
||||||
|
{
|
||||||
|
uint32_t item_count = type & XCL_ITEM_COUNT_MASK;
|
||||||
|
|
||||||
|
if (item_count == XCL_ITEM_COUNT_MASK)
|
||||||
|
{
|
||||||
|
READ_FROM_CHAR_LIST(item_count);
|
||||||
|
SLJIT_ASSERT(item_count >= XCL_ITEM_COUNT_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (item_count > 0)
|
||||||
|
{
|
||||||
|
READ_FROM_CHAR_LIST(range_end);
|
||||||
|
|
||||||
|
if ((range_end & XCL_CHAR_END) != 0)
|
||||||
|
{
|
||||||
|
range_end = char_list_add + (range_end >> XCL_CHAR_SHIFT);
|
||||||
|
|
||||||
|
if (range_start == ~(uint32_t)0)
|
||||||
|
range_start = range_end;
|
||||||
|
|
||||||
|
ranges->ranges[range_count] = range_start;
|
||||||
|
ranges->ranges[range_count + 1] = range_end;
|
||||||
|
range_count += 2;
|
||||||
|
range_start = ~(uint32_t)0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
range_start = char_list_add + (range_end >> XCL_CHAR_SHIFT);
|
||||||
|
|
||||||
|
item_count--;
|
||||||
|
}
|
||||||
|
|
||||||
|
list_ind++;
|
||||||
|
type >>= XCL_TYPE_BIT_LEN;
|
||||||
|
|
||||||
|
if (range_start == ~(uint32_t)0)
|
||||||
|
{
|
||||||
|
if ((type & XCL_BEGIN_WITH_RANGE) != 0)
|
||||||
|
{
|
||||||
|
if (list_ind == 1) range_start = XCL_CHAR_LIST_HIGH_16_START;
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
else if (list_ind == 2) range_start = XCL_CHAR_LIST_LOW_32_START;
|
||||||
|
else range_start = XCL_CHAR_LIST_HIGH_32_START;
|
||||||
|
#else
|
||||||
|
else range_start = XCL_CHAR_LIST_LOW_32_START;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if ((type & XCL_BEGIN_WITH_RANGE) == 0)
|
||||||
|
{
|
||||||
|
if (list_ind == 1) range_end = XCL_CHAR_LIST_LOW_16_END;
|
||||||
|
else if (list_ind == 2) range_end = XCL_CHAR_LIST_HIGH_16_END;
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
else if (list_ind == 3) range_end = XCL_CHAR_LIST_LOW_32_END;
|
||||||
|
else range_end = XCL_CHAR_LIST_HIGH_32_END;
|
||||||
|
#else
|
||||||
|
else range_end = XCL_CHAR_LIST_LOW_32_END;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ranges->ranges[range_count] = range_start;
|
||||||
|
ranges->ranges[range_count + 1] = range_end;
|
||||||
|
range_count += 2;
|
||||||
|
range_start = ~(uint32_t)0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (list_ind == 1) char_list_add = XCL_CHAR_LIST_HIGH_16_ADD;
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
else if (list_ind == 2) char_list_add = XCL_CHAR_LIST_LOW_32_ADD;
|
||||||
|
else char_list_add = XCL_CHAR_LIST_HIGH_32_ADD;
|
||||||
|
#else
|
||||||
|
else char_list_add = XCL_CHAR_LIST_LOW_32_ADD;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
SLJIT_ASSERT(range_count > 0 && range_count <= (est_range_count << 1));
|
||||||
|
SLJIT_ASSERT(next_char <= (const uint8_t*)common->start);
|
||||||
|
ranges->range_count = range_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void xclass_check_bitset(compiler_common *common, const sljit_u8 *bitset, jump_list **found, jump_list **backtracks)
|
||||||
|
{
|
||||||
|
DEFINE_COMPILER;
|
||||||
|
struct sljit_jump *jump;
|
||||||
|
|
||||||
|
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
|
||||||
|
if (!optimize_class(common, bitset, (bitset[31] & 0x80) != 0, TRUE, found))
|
||||||
|
{
|
||||||
|
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
|
||||||
|
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
|
||||||
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)bitset);
|
||||||
|
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
|
||||||
|
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
|
||||||
|
add_jump(compiler, found, JUMP(SLJIT_NOT_ZERO));
|
||||||
|
}
|
||||||
|
|
||||||
|
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
|
||||||
|
JUMPHERE(jump);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
|
||||||
|
|
||||||
|
static void xclass_update_min_max(compiler_common *common, PCRE2_SPTR cc, sljit_u32 *min_ptr, sljit_u32 *max_ptr)
|
||||||
|
{
|
||||||
|
uint32_t type, list_ind, c;
|
||||||
|
sljit_u32 min = *min_ptr;
|
||||||
|
sljit_u32 max = *max_ptr;
|
||||||
|
uint32_t char_list_add;
|
||||||
|
const uint8_t *next_char;
|
||||||
|
BOOL utf = TRUE;
|
||||||
|
|
||||||
|
/* This function is pointless without utf 8/16. */
|
||||||
|
SLJIT_ASSERT(common->utf);
|
||||||
|
if (*cc == XCL_SINGLE || *cc == XCL_RANGE)
|
||||||
|
{
|
||||||
|
/* Only a few ranges are present. */
|
||||||
|
do
|
||||||
|
{
|
||||||
|
type = *cc++;
|
||||||
|
SLJIT_ASSERT(type == XCL_SINGLE || type == XCL_RANGE);
|
||||||
|
GETCHARINCTEST(c, cc);
|
||||||
|
|
||||||
|
if (c < min)
|
||||||
|
min = c;
|
||||||
|
|
||||||
|
if (type == XCL_RANGE)
|
||||||
|
{
|
||||||
|
GETCHARINCTEST(c, cc);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c > max)
|
||||||
|
max = c;
|
||||||
|
}
|
||||||
|
while (*cc != XCL_END);
|
||||||
|
|
||||||
|
SLJIT_ASSERT(min <= MAX_UTF_CODE_POINT && max <= MAX_UTF_CODE_POINT && min <= max);
|
||||||
|
*min_ptr = min;
|
||||||
|
*max_ptr = max;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
SLJIT_ASSERT(cc[0] >= XCL_LIST);
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
type = (uint32_t)(cc[0] << 8) | cc[1];
|
||||||
|
cc += 2;
|
||||||
|
#else
|
||||||
|
type = cc[0];
|
||||||
|
cc++;
|
||||||
|
#endif /* CODE_UNIT_WIDTH */
|
||||||
|
|
||||||
|
/* Align characters. */
|
||||||
|
next_char = (const uint8_t*)common->start - (GET(cc, 0) << 1);
|
||||||
|
type &= XCL_TYPE_MASK;
|
||||||
|
|
||||||
|
SLJIT_ASSERT(type != 0);
|
||||||
|
|
||||||
|
/* Detect minimum. */
|
||||||
|
|
||||||
|
/* Skip unused ranges. */
|
||||||
|
list_ind = 0;
|
||||||
|
while ((type & (XCL_BEGIN_WITH_RANGE | XCL_ITEM_COUNT_MASK)) == 0)
|
||||||
|
{
|
||||||
|
type >>= XCL_TYPE_BIT_LEN;
|
||||||
|
list_ind++;
|
||||||
|
}
|
||||||
|
|
||||||
|
SLJIT_ASSERT(list_ind <= 2);
|
||||||
|
switch (list_ind)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
char_list_add = XCL_CHAR_LIST_LOW_16_ADD;
|
||||||
|
c = XCL_CHAR_LIST_LOW_16_START;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 1:
|
||||||
|
char_list_add = XCL_CHAR_LIST_HIGH_16_ADD;
|
||||||
|
c = XCL_CHAR_LIST_HIGH_16_START;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
char_list_add = XCL_CHAR_LIST_LOW_32_ADD;
|
||||||
|
c = XCL_CHAR_LIST_LOW_32_START;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((type & XCL_BEGIN_WITH_RANGE) != 0)
|
||||||
|
{
|
||||||
|
if (c < min)
|
||||||
|
min = c;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if ((type & XCL_ITEM_COUNT_MASK) == XCL_ITEM_COUNT_MASK)
|
||||||
|
{
|
||||||
|
if (list_ind <= 1)
|
||||||
|
c = *(const uint16_t*)(next_char + 2);
|
||||||
|
else
|
||||||
|
c = *(const uint32_t*)(next_char + 4);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (list_ind <= 1)
|
||||||
|
c = *(const uint16_t*)next_char;
|
||||||
|
else
|
||||||
|
c = *(const uint32_t*)next_char;
|
||||||
|
}
|
||||||
|
|
||||||
|
c = char_list_add + (c >> XCL_CHAR_SHIFT);
|
||||||
|
if (c < min)
|
||||||
|
min = c;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Detect maximum. */
|
||||||
|
|
||||||
|
/* Skip intermediate ranges. */
|
||||||
|
while (TRUE)
|
||||||
|
{
|
||||||
|
if ((type & XCL_ITEM_COUNT_MASK) == XCL_ITEM_COUNT_MASK)
|
||||||
|
{
|
||||||
|
if (list_ind <= 1)
|
||||||
|
{
|
||||||
|
c = *(const uint16_t*)next_char;
|
||||||
|
next_char += (c + 1) << 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
c = *(const uint32_t*)next_char;
|
||||||
|
next_char += (c + 1) << 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
next_char += (type & XCL_ITEM_COUNT_MASK) << (list_ind <= 1 ? 1 : 2);
|
||||||
|
|
||||||
|
if ((type >> XCL_TYPE_BIT_LEN) == 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
list_ind++;
|
||||||
|
type >>= XCL_TYPE_BIT_LEN;
|
||||||
|
}
|
||||||
|
|
||||||
|
SLJIT_ASSERT(list_ind <= 2 && type != 0);
|
||||||
|
switch (list_ind)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
char_list_add = XCL_CHAR_LIST_LOW_16_ADD;
|
||||||
|
c = XCL_CHAR_LIST_LOW_16_END;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 1:
|
||||||
|
char_list_add = XCL_CHAR_LIST_HIGH_16_ADD;
|
||||||
|
c = XCL_CHAR_LIST_HIGH_16_END;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
char_list_add = XCL_CHAR_LIST_LOW_32_ADD;
|
||||||
|
c = XCL_CHAR_LIST_LOW_32_END;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((type & XCL_ITEM_COUNT_MASK) != 0)
|
||||||
|
{
|
||||||
|
/* Type is reused as temporary. */
|
||||||
|
if (list_ind <= 1)
|
||||||
|
type = *(const uint16_t*)(next_char - 2);
|
||||||
|
else
|
||||||
|
type = *(const uint32_t*)(next_char - 4);
|
||||||
|
|
||||||
|
if (type & XCL_CHAR_END)
|
||||||
|
c = char_list_add + (type >> XCL_CHAR_SHIFT);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c > max)
|
||||||
|
max = c;
|
||||||
|
|
||||||
|
SLJIT_ASSERT(min <= MAX_UTF_CODE_POINT && max <= MAX_UTF_CODE_POINT && min <= max);
|
||||||
|
*min_ptr = min;
|
||||||
|
*max_ptr = max;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
|
||||||
|
|
||||||
|
#define XCLASS_IS_ECLASS 0x001
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
#define XCLASS_SAVE_CHAR 0x002
|
||||||
|
#define XCLASS_HAS_TYPE 0x004
|
||||||
|
#define XCLASS_HAS_SCRIPT 0x008
|
||||||
|
#define XCLASS_HAS_SCRIPT_EXTENSION 0x010
|
||||||
|
#define XCLASS_HAS_BOOL 0x020
|
||||||
|
#define XCLASS_HAS_BIDICL 0x040
|
||||||
|
#define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL)
|
||||||
|
#define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080
|
||||||
|
#define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100
|
||||||
|
#define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCAL0 0x200
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
|
||||||
|
|
||||||
|
/* TMP3 must be preserved because it is used by compile_iterator_matchingpath. */
|
||||||
|
static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, sljit_u32 status)
|
||||||
|
{
|
||||||
|
DEFINE_COMPILER;
|
||||||
|
jump_list *found = NULL;
|
||||||
|
jump_list *check_result = NULL;
|
||||||
|
jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
|
||||||
|
sljit_uw c, charoffset;
|
||||||
|
sljit_u32 max = READ_CHAR_MAX, min = 0;
|
||||||
|
struct sljit_jump *jump = NULL;
|
||||||
|
PCRE2_UCHAR flags;
|
||||||
|
PCRE2_SPTR ccbegin;
|
||||||
|
sljit_u32 compares, invertcmp, depth;
|
||||||
|
sljit_u32 first_item, last_item, mid_item;
|
||||||
|
sljit_u32 range_start, range_end;
|
||||||
|
xclass_ranges ranges;
|
||||||
|
BOOL has_cmov, last_range_set;
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
sljit_u32 category_list = 0;
|
||||||
|
sljit_u32 items;
|
||||||
|
int typereg = TMP1;
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
SLJIT_ASSERT(common->locals_size >= SSIZE_OF(sw));
|
||||||
|
/* Scanning the necessary info. */
|
||||||
|
flags = *cc++;
|
||||||
|
ccbegin = cc;
|
||||||
|
compares = 0;
|
||||||
|
|
||||||
|
if (flags & XCL_MAP)
|
||||||
|
cc += 32 / sizeof(PCRE2_UCHAR);
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
|
||||||
|
{
|
||||||
|
compares++;
|
||||||
|
cc++;
|
||||||
|
|
||||||
|
items = 0;
|
||||||
|
|
||||||
|
switch(*cc)
|
||||||
|
{
|
||||||
|
case PT_LAMP:
|
||||||
|
items = UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_GC:
|
||||||
|
items = UCPCAT_RANGE(PRIV(ucp_typerange)[(int)cc[1] * 2], PRIV(ucp_typerange)[(int)cc[1] * 2 + 1]);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_PC:
|
||||||
|
items = UCPCAT(cc[1]);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_WORD:
|
||||||
|
items = UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_ALNUM:
|
||||||
|
items = UCPCAT_L | UCPCAT_N;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_SCX:
|
||||||
|
status |= XCLASS_HAS_SCRIPT_EXTENSION;
|
||||||
|
if (cc[-1] == XCL_NOTPROP)
|
||||||
|
{
|
||||||
|
status |= XCLASS_SCRIPT_EXTENSION_NOTPROP;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
compares++;
|
||||||
|
/* Fall through */
|
||||||
|
|
||||||
|
case PT_SC:
|
||||||
|
status |= XCLASS_HAS_SCRIPT;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_SPACE:
|
||||||
|
case PT_PXSPACE:
|
||||||
|
case PT_PXGRAPH:
|
||||||
|
case PT_PXPRINT:
|
||||||
|
case PT_PXPUNCT:
|
||||||
|
status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_UCNC:
|
||||||
|
case PT_PXXDIGIT:
|
||||||
|
status |= XCLASS_SAVE_CHAR;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_BOOL:
|
||||||
|
status |= XCLASS_HAS_BOOL;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_BIDICL:
|
||||||
|
status |= XCLASS_HAS_BIDICL;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
SLJIT_UNREACHABLE();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (items > 0)
|
||||||
|
{
|
||||||
|
if (cc[-1] == XCL_NOTPROP)
|
||||||
|
items ^= UCPCAT_ALL;
|
||||||
|
category_list |= items;
|
||||||
|
status |= XCLASS_HAS_TYPE;
|
||||||
|
compares--;
|
||||||
|
}
|
||||||
|
|
||||||
|
cc += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (category_list == UCPCAT_ALL)
|
||||||
|
{
|
||||||
|
/* All or no characters are accepted, same as dotall. */
|
||||||
|
if (status & XCLASS_IS_ECLASS)
|
||||||
|
{
|
||||||
|
if (list != backtracks)
|
||||||
|
OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
|
||||||
|
if (list == backtracks)
|
||||||
|
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (category_list != 0)
|
||||||
|
compares++;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (*cc != XCL_END)
|
||||||
|
{
|
||||||
|
#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
|
||||||
|
if (common->utf && compares == 0 && !(status & XCLASS_IS_ECLASS))
|
||||||
|
{
|
||||||
|
SLJIT_ASSERT(category_list == 0);
|
||||||
|
max = 0;
|
||||||
|
min = (flags & XCL_MAP) != 0 ? 0 : READ_CHAR_MAX;
|
||||||
|
xclass_update_min_max(common, cc, &min, &max);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
compares++;
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
status |= XCLASS_SAVE_CHAR;
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
SLJIT_ASSERT(compares > 0 || category_list != 0);
|
||||||
|
#else /* !SUPPORT_UNICODE */
|
||||||
|
SLJIT_ASSERT(compares > 0);
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
/* We are not necessary in utf mode even in 8 bit mode. */
|
||||||
|
cc = ccbegin;
|
||||||
|
if (!(status & XCLASS_IS_ECLASS))
|
||||||
|
{
|
||||||
|
if ((flags & XCL_NOT) != 0)
|
||||||
|
read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
read_char(common, min, max, (status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0);
|
||||||
|
#else /* !SUPPORT_UNICODE */
|
||||||
|
read_char(common, min, max, NULL, 0);
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((flags & XCL_MAP) != 0)
|
||||||
|
{
|
||||||
|
SLJIT_ASSERT(!(status & XCLASS_IS_ECLASS));
|
||||||
|
xclass_check_bitset(common, (const sljit_u8 *)cc, &found, backtracks);
|
||||||
|
cc += 32 / sizeof(PCRE2_UCHAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (status & XCLASS_NEEDS_UCD)
|
||||||
|
{
|
||||||
|
if ((status & (XCLASS_SAVE_CHAR | XCLASS_IS_ECLASS)) == XCLASS_SAVE_CHAR)
|
||||||
|
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
if (!common->utf)
|
||||||
|
{
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
|
||||||
|
SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, UNASSIGNED_UTF_CHAR, TMP1);
|
||||||
|
}
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||||
|
|
||||||
|
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
|
||||||
|
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
|
||||||
|
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
|
||||||
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
|
||||||
|
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
|
||||||
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||||
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
|
||||||
|
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
|
||||||
|
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
|
||||||
|
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
|
||||||
|
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
||||||
|
|
||||||
|
ccbegin = cc;
|
||||||
|
|
||||||
|
if (status & XCLASS_HAS_BIDICL)
|
||||||
|
{
|
||||||
|
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
|
||||||
|
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT);
|
||||||
|
|
||||||
|
while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
|
||||||
|
{
|
||||||
|
cc++;
|
||||||
|
|
||||||
|
if (*cc == PT_BIDICL)
|
||||||
|
{
|
||||||
|
compares--;
|
||||||
|
invertcmp = (compares == 0 && list != backtracks);
|
||||||
|
if (cc[-1] == XCL_NOTPROP)
|
||||||
|
invertcmp ^= 0x1;
|
||||||
|
jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
|
||||||
|
add_jump(compiler, compares > 0 ? list : backtracks, jump);
|
||||||
|
}
|
||||||
|
cc += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
cc = ccbegin;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (status & XCLASS_HAS_BOOL)
|
||||||
|
{
|
||||||
|
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops));
|
||||||
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK);
|
||||||
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
|
||||||
|
|
||||||
|
while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
|
||||||
|
{
|
||||||
|
cc++;
|
||||||
|
if (*cc == PT_BOOL)
|
||||||
|
{
|
||||||
|
compares--;
|
||||||
|
invertcmp = (compares == 0 && list != backtracks);
|
||||||
|
if (cc[-1] == XCL_NOTPROP)
|
||||||
|
invertcmp ^= 0x1;
|
||||||
|
|
||||||
|
OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f)));
|
||||||
|
add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
|
||||||
|
}
|
||||||
|
cc += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
cc = ccbegin;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (status & XCLASS_HAS_SCRIPT)
|
||||||
|
{
|
||||||
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
|
||||||
|
|
||||||
|
while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
|
||||||
|
{
|
||||||
|
cc++;
|
||||||
|
|
||||||
|
switch (*cc)
|
||||||
|
{
|
||||||
|
case PT_SCX:
|
||||||
|
if (cc[-1] == XCL_NOTPROP)
|
||||||
|
break;
|
||||||
|
/* Fall through */
|
||||||
|
|
||||||
|
case PT_SC:
|
||||||
|
compares--;
|
||||||
|
invertcmp = (compares == 0 && list != backtracks);
|
||||||
|
if (cc[-1] == XCL_NOTPROP)
|
||||||
|
invertcmp ^= 0x1;
|
||||||
|
|
||||||
|
add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]));
|
||||||
|
}
|
||||||
|
cc += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
cc = ccbegin;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (status & XCLASS_HAS_SCRIPT_EXTENSION)
|
||||||
|
{
|
||||||
|
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
|
||||||
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK);
|
||||||
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
|
||||||
|
|
||||||
|
if (status & XCLASS_SCRIPT_EXTENSION_NOTPROP)
|
||||||
|
{
|
||||||
|
if (status & XCLASS_HAS_TYPE)
|
||||||
|
{
|
||||||
|
if ((status & (XCLASS_SAVE_CHAR | XCLASS_IS_ECLASS)) == XCLASS_SAVE_CHAR)
|
||||||
|
{
|
||||||
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, TMP2, 0);
|
||||||
|
status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCAL0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
|
||||||
|
status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
|
||||||
|
}
|
||||||
|
|
||||||
|
while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
|
||||||
|
{
|
||||||
|
cc++;
|
||||||
|
|
||||||
|
if (*cc == PT_SCX)
|
||||||
|
{
|
||||||
|
compares--;
|
||||||
|
invertcmp = (compares == 0 && list != backtracks);
|
||||||
|
|
||||||
|
jump = NULL;
|
||||||
|
if (cc[-1] == XCL_NOTPROP)
|
||||||
|
{
|
||||||
|
jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]);
|
||||||
|
if (invertcmp)
|
||||||
|
{
|
||||||
|
add_jump(compiler, backtracks, jump);
|
||||||
|
jump = NULL;
|
||||||
|
}
|
||||||
|
invertcmp ^= 0x1;
|
||||||
|
}
|
||||||
|
|
||||||
|
OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f)));
|
||||||
|
add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
|
||||||
|
|
||||||
|
if (jump != NULL)
|
||||||
|
JUMPHERE(jump);
|
||||||
|
}
|
||||||
|
cc += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCAL0)
|
||||||
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
|
||||||
|
else if (status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR)
|
||||||
|
OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
|
||||||
|
cc = ccbegin;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (status & XCLASS_SAVE_CHAR)
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, (status & XCLASS_IS_ECLASS) ? ECLASS_CHAR_DATA : RETURN_ADDR, 0);
|
||||||
|
|
||||||
|
if (status & XCLASS_HAS_TYPE)
|
||||||
|
{
|
||||||
|
if (status & XCLASS_SAVE_CHAR)
|
||||||
|
typereg = RETURN_ADDR;
|
||||||
|
|
||||||
|
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
|
||||||
|
OP2(SLJIT_SHL, typereg, 0, SLJIT_IMM, 1, TMP2, 0);
|
||||||
|
|
||||||
|
if (category_list > 0)
|
||||||
|
{
|
||||||
|
compares--;
|
||||||
|
invertcmp = (compares == 0 && list != backtracks);
|
||||||
|
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, category_list);
|
||||||
|
add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
/* Generating code. */
|
||||||
|
charoffset = 0;
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
|
||||||
|
{
|
||||||
|
compares--;
|
||||||
|
invertcmp = (compares == 0 && list != backtracks);
|
||||||
|
jump = NULL;
|
||||||
|
|
||||||
|
if (*cc == XCL_NOTPROP)
|
||||||
|
invertcmp ^= 0x1;
|
||||||
|
cc++;
|
||||||
|
switch(*cc)
|
||||||
|
{
|
||||||
|
case PT_LAMP:
|
||||||
|
case PT_GC:
|
||||||
|
case PT_PC:
|
||||||
|
case PT_SC:
|
||||||
|
case PT_SCX:
|
||||||
|
case PT_BOOL:
|
||||||
|
case PT_BIDICL:
|
||||||
|
case PT_WORD:
|
||||||
|
case PT_ALNUM:
|
||||||
|
compares++;
|
||||||
|
/* Already handled. */
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_SPACE:
|
||||||
|
case PT_PXSPACE:
|
||||||
|
SET_CHAR_OFFSET(9);
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
|
||||||
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||||
|
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
|
||||||
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
|
||||||
|
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
|
||||||
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
|
||||||
|
|
||||||
|
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Zl, ucp_Zs));
|
||||||
|
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
|
||||||
|
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_UCNC:
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
|
||||||
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
|
||||||
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
|
||||||
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
|
||||||
|
|
||||||
|
SET_CHAR_OFFSET(0xa0);
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
|
||||||
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||||
|
SET_CHAR_OFFSET(0);
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
|
||||||
|
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
|
||||||
|
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_PXGRAPH:
|
||||||
|
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT_RANGE(ucp_Zl, ucp_Zs));
|
||||||
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
|
||||||
|
|
||||||
|
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
|
||||||
|
jump = JUMP(SLJIT_ZERO);
|
||||||
|
|
||||||
|
c = charoffset;
|
||||||
|
/* In case of ucp_Cf, we overwrite the result. */
|
||||||
|
SET_CHAR_OFFSET(0x2066);
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
|
||||||
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||||
|
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
|
||||||
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
|
||||||
|
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
|
||||||
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
|
||||||
|
|
||||||
|
/* Restore charoffset. */
|
||||||
|
SET_CHAR_OFFSET(c);
|
||||||
|
|
||||||
|
JUMPHERE(jump);
|
||||||
|
jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_PXPRINT:
|
||||||
|
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT2(ucp_Zl, ucp_Zp));
|
||||||
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
|
||||||
|
|
||||||
|
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
|
||||||
|
jump = JUMP(SLJIT_ZERO);
|
||||||
|
|
||||||
|
c = charoffset;
|
||||||
|
/* In case of ucp_Cf, we overwrite the result. */
|
||||||
|
SET_CHAR_OFFSET(0x2066);
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
|
||||||
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||||
|
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
|
||||||
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
|
||||||
|
|
||||||
|
/* Restore charoffset. */
|
||||||
|
SET_CHAR_OFFSET(c);
|
||||||
|
|
||||||
|
JUMPHERE(jump);
|
||||||
|
jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_PXPUNCT:
|
||||||
|
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Sc, ucp_So));
|
||||||
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
|
||||||
|
|
||||||
|
SET_CHAR_OFFSET(0);
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f);
|
||||||
|
OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||||
|
|
||||||
|
OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Pc, ucp_Ps));
|
||||||
|
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
|
||||||
|
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_PXXDIGIT:
|
||||||
|
SET_CHAR_OFFSET(CHAR_A);
|
||||||
|
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, ~0x20);
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP2, 0, SLJIT_IMM, CHAR_F - CHAR_A);
|
||||||
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||||
|
|
||||||
|
SET_CHAR_OFFSET(CHAR_0);
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_9 - CHAR_0);
|
||||||
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||||
|
|
||||||
|
SET_CHAR_OFFSET(0xff10);
|
||||||
|
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff10);
|
||||||
|
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff19 - 0xff10);
|
||||||
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||||
|
|
||||||
|
SET_CHAR_OFFSET(0xff21);
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff26 - 0xff21);
|
||||||
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||||
|
|
||||||
|
SET_CHAR_OFFSET(0xff41);
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff41);
|
||||||
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||||
|
|
||||||
|
SET_CHAR_OFFSET(0xff10);
|
||||||
|
|
||||||
|
JUMPHERE(jump);
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
|
||||||
|
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
SLJIT_UNREACHABLE();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
cc += 2;
|
||||||
|
|
||||||
|
if (jump != NULL)
|
||||||
|
add_jump(compiler, compares > 0 ? list : backtracks, jump);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (compares == 0)
|
||||||
|
{
|
||||||
|
if (found != NULL)
|
||||||
|
set_jumps(found, LABEL());
|
||||||
|
|
||||||
|
if (status & XCLASS_IS_ECLASS)
|
||||||
|
OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
SLJIT_ASSERT(compares == 1);
|
||||||
|
ranges.range_count = 0;
|
||||||
|
ranges.ranges = ranges.local_ranges;
|
||||||
|
ranges.stack = ranges.local_stack;
|
||||||
|
|
||||||
|
xclass_compute_ranges(common, cc, &ranges);
|
||||||
|
|
||||||
|
/* Memory error is set for the compiler. */
|
||||||
|
if (ranges.stack == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
#if (defined SLJIT_DEBUG && SLJIT_DEBUG) && \
|
||||||
|
defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
|
||||||
|
if (common->utf)
|
||||||
|
{
|
||||||
|
min = READ_CHAR_MAX;
|
||||||
|
max = 0;
|
||||||
|
xclass_update_min_max(common, cc, &min, &max);
|
||||||
|
SLJIT_ASSERT(ranges.ranges[0] == min && ranges.ranges[ranges.range_count - 1] == max);
|
||||||
|
}
|
||||||
|
#endif /* SLJIT_DEBUG && SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
|
||||||
|
|
||||||
|
invertcmp = (list != backtracks);
|
||||||
|
|
||||||
|
if (ranges.range_count == 2)
|
||||||
|
{
|
||||||
|
range_start = ranges.ranges[0];
|
||||||
|
range_end = ranges.ranges[1];
|
||||||
|
|
||||||
|
if (range_start < range_end)
|
||||||
|
{
|
||||||
|
SET_CHAR_OFFSET(range_start);
|
||||||
|
jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_start - charoffset));
|
||||||
|
|
||||||
|
add_jump(compiler, backtracks, jump);
|
||||||
|
|
||||||
|
SLJIT_ASSERT(ranges.stack == ranges.local_stack);
|
||||||
|
if (found != NULL)
|
||||||
|
set_jumps(found, LABEL());
|
||||||
|
|
||||||
|
if (status & XCLASS_IS_ECLASS)
|
||||||
|
OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
range_start = ranges.ranges[0];
|
||||||
|
SET_CHAR_OFFSET(range_start);
|
||||||
|
if (ranges.range_count >= 6)
|
||||||
|
{
|
||||||
|
/* Early fail. */
|
||||||
|
range_end = ranges.ranges[ranges.range_count - 1];
|
||||||
|
add_jump(compiler, (flags & XCL_NOT) == 0 ? backtracks : &found,
|
||||||
|
CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start)));
|
||||||
|
}
|
||||||
|
|
||||||
|
depth = 0;
|
||||||
|
first_item = 0;
|
||||||
|
last_item = ranges.range_count - 2;
|
||||||
|
has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV) != 0;
|
||||||
|
|
||||||
|
while (TRUE)
|
||||||
|
{
|
||||||
|
/* At least two items are present. */
|
||||||
|
SLJIT_ASSERT(first_item < last_item && charoffset == ranges.ranges[0]);
|
||||||
|
last_range_set = FALSE;
|
||||||
|
|
||||||
|
if (first_item + 6 <= last_item)
|
||||||
|
{
|
||||||
|
mid_item = ((first_item + last_item) >> 1) & ~(sljit_u32)1;
|
||||||
|
SLJIT_ASSERT(last_item >= mid_item + 4);
|
||||||
|
|
||||||
|
range_end = ranges.ranges[mid_item + 1];
|
||||||
|
if (first_item + 6 > mid_item && ranges.ranges[mid_item] == range_end)
|
||||||
|
{
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_GREATER | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - charoffset));
|
||||||
|
ranges.stack[depth].jump = JUMP(SLJIT_GREATER);
|
||||||
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
|
||||||
|
last_range_set = TRUE;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
ranges.stack[depth].jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - charoffset));
|
||||||
|
|
||||||
|
ranges.stack[depth].first_item = (sljit_u32)(mid_item + 2);
|
||||||
|
ranges.stack[depth].last_item = (sljit_u32)last_item;
|
||||||
|
|
||||||
|
depth++;
|
||||||
|
SLJIT_ASSERT(ranges.stack == ranges.local_stack ?
|
||||||
|
depth <= XCLASS_LOCAL_RANGES_LOG2_SIZE : (ranges.stack + depth) <= (xclass_stack_item*)ranges.ranges);
|
||||||
|
|
||||||
|
last_item = mid_item;
|
||||||
|
if (!last_range_set)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
last_item -= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!last_range_set)
|
||||||
|
{
|
||||||
|
range_start = ranges.ranges[first_item];
|
||||||
|
range_end = ranges.ranges[first_item + 1];
|
||||||
|
|
||||||
|
if (range_start < range_end)
|
||||||
|
{
|
||||||
|
SET_CHAR_OFFSET(range_start);
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start));
|
||||||
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_start - charoffset));
|
||||||
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
|
||||||
|
}
|
||||||
|
first_item += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
SLJIT_ASSERT(first_item <= last_item);
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
range_start = ranges.ranges[first_item];
|
||||||
|
range_end = ranges.ranges[first_item + 1];
|
||||||
|
|
||||||
|
if (range_start < range_end)
|
||||||
|
{
|
||||||
|
SET_CHAR_OFFSET(range_start);
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start));
|
||||||
|
|
||||||
|
if (has_cmov)
|
||||||
|
SELECT(SLJIT_LESS_EQUAL, TMP2, STR_END, 0, TMP2);
|
||||||
|
else
|
||||||
|
OP_FLAGS(SLJIT_OR | ((first_item == last_item) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_LESS_EQUAL);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_start - charoffset));
|
||||||
|
|
||||||
|
if (has_cmov)
|
||||||
|
SELECT(SLJIT_EQUAL, TMP2, STR_END, 0, TMP2);
|
||||||
|
else
|
||||||
|
OP_FLAGS(SLJIT_OR | ((first_item == last_item) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
first_item += 2;
|
||||||
|
}
|
||||||
|
while (first_item <= last_item);
|
||||||
|
|
||||||
|
if (depth == 0) break;
|
||||||
|
|
||||||
|
add_jump(compiler, &check_result, JUMP(SLJIT_JUMP));
|
||||||
|
|
||||||
|
/* The charoffset resets after the end of a branch is reached. */
|
||||||
|
charoffset = ranges.ranges[0];
|
||||||
|
depth--;
|
||||||
|
first_item = ranges.stack[depth].first_item;
|
||||||
|
last_item = ranges.stack[depth].last_item;
|
||||||
|
JUMPHERE(ranges.stack[depth].jump);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (check_result != NULL)
|
||||||
|
set_jumps(check_result, LABEL());
|
||||||
|
|
||||||
|
if (has_cmov)
|
||||||
|
jump = CMP(SLJIT_NOT_EQUAL ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sljit_set_current_flags(compiler, SLJIT_SET_Z);
|
||||||
|
jump = JUMP(SLJIT_NOT_EQUAL ^ invertcmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
add_jump(compiler, backtracks, jump);
|
||||||
|
|
||||||
|
if (found != NULL)
|
||||||
|
set_jumps(found, LABEL());
|
||||||
|
|
||||||
|
if (status & XCLASS_IS_ECLASS)
|
||||||
|
OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||||
|
|
||||||
|
if (ranges.stack != ranges.local_stack)
|
||||||
|
SLJIT_FREE(ranges.stack, compiler->allocator_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PCRE2_SPTR compile_eclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
|
||||||
|
{
|
||||||
|
DEFINE_COMPILER;
|
||||||
|
PCRE2_SPTR end = cc + GET(cc, 0) - 1;
|
||||||
|
PCRE2_SPTR begin;
|
||||||
|
jump_list *not_found;
|
||||||
|
jump_list *found = NULL;
|
||||||
|
|
||||||
|
cc += LINK_SIZE;
|
||||||
|
|
||||||
|
/* Should be optimized later. */
|
||||||
|
read_char(common, 0, READ_CHAR_MAX, backtracks, 0);
|
||||||
|
|
||||||
|
if (((*cc++) & ECL_MAP) != 0)
|
||||||
|
{
|
||||||
|
xclass_check_bitset(common, (const sljit_u8 *)cc, &found, backtracks);
|
||||||
|
cc += 32 / sizeof(PCRE2_UCHAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
begin = cc;
|
||||||
|
|
||||||
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, ECLASS_CHAR_DATA, 0);
|
||||||
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, ECLASS_STACK_DATA, 0);
|
||||||
|
OP1(SLJIT_MOV, ECLASS_STACK_DATA, 0, SLJIT_IMM, 0);
|
||||||
|
OP1(SLJIT_MOV, ECLASS_CHAR_DATA, 0, TMP1, 0);
|
||||||
|
|
||||||
|
/* All eclass must start with an xclass. */
|
||||||
|
SLJIT_ASSERT(*cc == ECL_XCLASS);
|
||||||
|
|
||||||
|
while (cc < end)
|
||||||
|
{
|
||||||
|
switch (*cc)
|
||||||
|
{
|
||||||
|
case ECL_AND:
|
||||||
|
++cc;
|
||||||
|
OP2(SLJIT_OR, TMP2, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, ~(sljit_sw)1);
|
||||||
|
OP2(SLJIT_LSHR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||||
|
OP2(SLJIT_AND, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, TMP2, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ECL_OR:
|
||||||
|
++cc;
|
||||||
|
OP2(SLJIT_AND, TMP2, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||||
|
OP2(SLJIT_LSHR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||||
|
OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, TMP2, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ECL_XOR:
|
||||||
|
++cc;
|
||||||
|
OP2(SLJIT_AND, TMP2, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||||
|
OP2(SLJIT_LSHR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||||
|
OP2(SLJIT_XOR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, TMP2, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ECL_NOT:
|
||||||
|
++cc;
|
||||||
|
OP2(SLJIT_XOR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
SLJIT_ASSERT(*cc == ECL_XCLASS);
|
||||||
|
if (cc != begin)
|
||||||
|
{
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, ECLASS_CHAR_DATA, 0);
|
||||||
|
OP2(SLJIT_SHL, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
not_found = NULL;
|
||||||
|
compile_xclass_matchingpath(common, cc + 1 + LINK_SIZE, ¬_found, XCLASS_IS_ECLASS);
|
||||||
|
set_jumps(not_found, LABEL());
|
||||||
|
|
||||||
|
cc += GET(cc, 1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_Z, ECLASS_STACK_DATA, 0, SLJIT_IMM, 0);
|
||||||
|
OP1(SLJIT_MOV, ECLASS_CHAR_DATA, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
|
||||||
|
OP1(SLJIT_MOV, ECLASS_STACK_DATA, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
|
||||||
|
add_jump(compiler, backtracks, JUMP(SLJIT_EQUAL));
|
||||||
|
set_jumps(found, LABEL());
|
||||||
|
return end;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Generic character matching code. */
|
||||||
|
|
||||||
|
#undef SET_CHAR_OFFSET
|
||||||
|
#undef READ_FROM_CHAR_LIST
|
||||||
|
#undef XCLASS_LOCAL_RANGES_SIZE
|
||||||
|
#undef XCLASS_LOCAL_RANGES_LOG2_SIZE
|
||||||
|
|
||||||
|
#endif /* SUPPORT_WIDE_CHARS */
|
||||||
|
|
||||||
|
static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
|
||||||
|
compare_context *context, jump_list **backtracks)
|
||||||
|
{
|
||||||
|
DEFINE_COMPILER;
|
||||||
|
unsigned int othercasebit = 0;
|
||||||
|
PCRE2_SPTR othercasechar = NULL;
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
int utflength;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (caseless && char_has_othercase(common, cc))
|
||||||
|
{
|
||||||
|
othercasebit = char_get_othercase_bit(common, cc);
|
||||||
|
SLJIT_ASSERT(othercasebit);
|
||||||
|
/* Extracting bit difference info. */
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
othercasechar = cc + (othercasebit >> 8);
|
||||||
|
othercasebit &= 0xff;
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
/* Note that this code only handles characters in the BMP. If there
|
||||||
|
ever are characters outside the BMP whose othercase differs in only one
|
||||||
|
bit from itself (there currently are none), this code will need to be
|
||||||
|
revised for PCRE2_CODE_UNIT_WIDTH == 32. */
|
||||||
|
othercasechar = cc + (othercasebit >> 9);
|
||||||
|
if ((othercasebit & 0x100) != 0)
|
||||||
|
othercasebit = (othercasebit & 0xff) << 8;
|
||||||
|
else
|
||||||
|
othercasebit &= 0xff;
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
|
||||||
|
}
|
||||||
|
|
||||||
|
if (context->sourcereg == -1)
|
||||||
|
{
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
|
||||||
|
if (context->length >= 4)
|
||||||
|
OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||||
|
else if (context->length >= 2)
|
||||||
|
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
|
||||||
|
if (context->length >= 4)
|
||||||
|
OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
|
||||||
|
context->sourcereg = TMP2;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
utflength = 1;
|
||||||
|
if (common->utf && HAS_EXTRALEN(*cc))
|
||||||
|
utflength += GET_EXTRALEN(*cc);
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
#endif
|
||||||
|
|
||||||
|
context->length -= IN_UCHARS(1);
|
||||||
|
#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
|
||||||
|
|
||||||
|
/* Unaligned read is supported. */
|
||||||
|
if (othercasebit != 0 && othercasechar == cc)
|
||||||
|
{
|
||||||
|
context->c.asuchars[context->ucharptr] = *cc | othercasebit;
|
||||||
|
context->oc.asuchars[context->ucharptr] = othercasebit;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
context->c.asuchars[context->ucharptr] = *cc;
|
||||||
|
context->oc.asuchars[context->ucharptr] = 0;
|
||||||
|
}
|
||||||
|
context->ucharptr++;
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
|
||||||
|
#else
|
||||||
|
if (context->ucharptr >= 2 || context->length == 0)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
if (context->length >= 4)
|
||||||
|
OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||||
|
else if (context->length >= 2)
|
||||||
|
OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
else if (context->length >= 1)
|
||||||
|
OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||||
|
context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
|
||||||
|
|
||||||
|
switch(context->ucharptr)
|
||||||
|
{
|
||||||
|
case 4 / sizeof(PCRE2_UCHAR):
|
||||||
|
if (context->oc.asint != 0)
|
||||||
|
OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
|
||||||
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 2 / sizeof(PCRE2_UCHAR):
|
||||||
|
if (context->oc.asushort != 0)
|
||||||
|
OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
|
||||||
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
|
||||||
|
break;
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
case 1:
|
||||||
|
if (context->oc.asbyte != 0)
|
||||||
|
OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
|
||||||
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
default:
|
||||||
|
SLJIT_UNREACHABLE();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
context->ucharptr = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
/* Unaligned read is unsupported or in 32 bit mode. */
|
||||||
|
if (context->length >= 1)
|
||||||
|
OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
|
||||||
|
|
||||||
|
context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
|
||||||
|
|
||||||
|
if (othercasebit != 0 && othercasechar == cc)
|
||||||
|
{
|
||||||
|
OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
|
||||||
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
cc++;
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
utflength--;
|
||||||
|
}
|
||||||
|
while (utflength > 0);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return cc;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
|
||||||
|
/* The code in this function copies the logic of the interpreter function that
|
||||||
|
is defined in the pcre2_extuni.c source. If that code is updated, this
|
||||||
|
function, and those below it, must be kept in step (note by PH, June 2024). */
|
||||||
|
|
||||||
|
static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
|
||||||
|
{
|
||||||
|
PCRE2_SPTR start_subject = args->begin;
|
||||||
|
PCRE2_SPTR end_subject = args->end;
|
||||||
|
int lgb, rgb, ricount;
|
||||||
|
PCRE2_SPTR prevcc, endcc, bptr;
|
||||||
|
BOOL first = TRUE;
|
||||||
|
BOOL was_ep_ZWJ = FALSE;
|
||||||
|
uint32_t c;
|
||||||
|
|
||||||
|
prevcc = cc;
|
||||||
|
endcc = NULL;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
GETCHARINC(c, cc);
|
||||||
|
rgb = UCD_GRAPHBREAK(c);
|
||||||
|
|
||||||
|
if (first)
|
||||||
|
{
|
||||||
|
lgb = rgb;
|
||||||
|
endcc = cc;
|
||||||
|
first = FALSE;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
|
||||||
|
preceded by Extended Pictographic. */
|
||||||
|
|
||||||
|
if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Not breaking between Regional Indicators is allowed only if there
|
||||||
|
are an even number of preceding RIs. */
|
||||||
|
|
||||||
|
if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
|
||||||
|
{
|
||||||
|
ricount = 0;
|
||||||
|
bptr = prevcc;
|
||||||
|
|
||||||
|
/* bptr is pointing to the left-hand character */
|
||||||
|
while (bptr > start_subject)
|
||||||
|
{
|
||||||
|
bptr--;
|
||||||
|
BACKCHAR(bptr);
|
||||||
|
GETCHAR(c, bptr);
|
||||||
|
|
||||||
|
if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
|
||||||
|
break;
|
||||||
|
|
||||||
|
ricount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
|
||||||
|
between; see next statement). */
|
||||||
|
|
||||||
|
was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
|
||||||
|
|
||||||
|
/* If Extend follows Extended_Pictographic, do not update lgb; this allows
|
||||||
|
any number of them before a following ZWJ. */
|
||||||
|
|
||||||
|
if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
|
||||||
|
lgb = rgb;
|
||||||
|
|
||||||
|
prevcc = endcc;
|
||||||
|
endcc = cc;
|
||||||
|
}
|
||||||
|
while (cc < end_subject);
|
||||||
|
|
||||||
|
return endcc;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||||
|
|
||||||
|
/* The code in this function copies the logic of the interpreter function that
|
||||||
|
is defined in the pcre2_extuni.c source. If that code is updated, this
|
||||||
|
function, and the one below it, must be kept in step (note by PH, June 2024). */
|
||||||
|
|
||||||
|
static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
|
||||||
|
{
|
||||||
|
PCRE2_SPTR start_subject = args->begin;
|
||||||
|
PCRE2_SPTR end_subject = args->end;
|
||||||
|
int lgb, rgb, ricount;
|
||||||
|
PCRE2_SPTR prevcc, endcc, bptr;
|
||||||
|
BOOL first = TRUE;
|
||||||
|
BOOL was_ep_ZWJ = FALSE;
|
||||||
|
uint32_t c;
|
||||||
|
|
||||||
|
prevcc = cc;
|
||||||
|
endcc = NULL;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
GETCHARINC_INVALID(c, cc, end_subject, break);
|
||||||
|
rgb = UCD_GRAPHBREAK(c);
|
||||||
|
|
||||||
|
if (first)
|
||||||
|
{
|
||||||
|
lgb = rgb;
|
||||||
|
endcc = cc;
|
||||||
|
first = FALSE;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
|
||||||
|
preceded by Extended Pictographic. */
|
||||||
|
|
||||||
|
if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Not breaking between Regional Indicators is allowed only if there
|
||||||
|
are an even number of preceding RIs. */
|
||||||
|
|
||||||
|
if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
|
||||||
|
{
|
||||||
|
ricount = 0;
|
||||||
|
bptr = prevcc;
|
||||||
|
|
||||||
|
/* bptr is pointing to the left-hand character */
|
||||||
|
while (bptr > start_subject)
|
||||||
|
{
|
||||||
|
GETCHARBACK_INVALID(c, bptr, start_subject, break);
|
||||||
|
|
||||||
|
if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
|
||||||
|
break;
|
||||||
|
|
||||||
|
ricount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((ricount & 1) != 0)
|
||||||
|
break; /* Grapheme break required */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
|
||||||
|
between; see next statement). */
|
||||||
|
|
||||||
|
was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
|
||||||
|
|
||||||
|
/* If Extend follows Extended_Pictographic, do not update lgb; this allows
|
||||||
|
any number of them before a following ZWJ. */
|
||||||
|
|
||||||
|
if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
|
||||||
|
lgb = rgb;
|
||||||
|
|
||||||
|
prevcc = endcc;
|
||||||
|
endcc = cc;
|
||||||
|
}
|
||||||
|
while (cc < end_subject);
|
||||||
|
|
||||||
|
return endcc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The code in this function copies the logic of the interpreter function that
|
||||||
|
is defined in the pcre2_extuni.c source. If that code is updated, this
|
||||||
|
function must be kept in step (note by PH, June 2024). */
|
||||||
|
|
||||||
|
static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
|
||||||
|
{
|
||||||
|
PCRE2_SPTR start_subject = args->begin;
|
||||||
|
PCRE2_SPTR end_subject = args->end;
|
||||||
|
int lgb, rgb, ricount;
|
||||||
|
PCRE2_SPTR bptr;
|
||||||
|
uint32_t c;
|
||||||
|
BOOL was_ep_ZWJ = FALSE;
|
||||||
|
|
||||||
|
/* Patch by PH */
|
||||||
|
/* GETCHARINC(c, cc); */
|
||||||
|
c = *cc++;
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
if (c >= 0x110000)
|
||||||
|
return cc;
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||||
|
lgb = UCD_GRAPHBREAK(c);
|
||||||
|
|
||||||
|
while (cc < end_subject)
|
||||||
|
{
|
||||||
|
c = *cc;
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
if (c >= 0x110000)
|
||||||
|
break;
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||||
|
rgb = UCD_GRAPHBREAK(c);
|
||||||
|
|
||||||
|
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
|
||||||
|
preceded by Extended Pictographic. */
|
||||||
|
|
||||||
|
if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Not breaking between Regional Indicators is allowed only if there
|
||||||
|
are an even number of preceding RIs. */
|
||||||
|
|
||||||
|
if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
|
||||||
|
{
|
||||||
|
ricount = 0;
|
||||||
|
bptr = cc - 1;
|
||||||
|
|
||||||
|
/* bptr is pointing to the left-hand character */
|
||||||
|
while (bptr > start_subject)
|
||||||
|
{
|
||||||
|
bptr--;
|
||||||
|
c = *bptr;
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
if (c >= 0x110000)
|
||||||
|
break;
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||||
|
|
||||||
|
if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
|
||||||
|
|
||||||
|
ricount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((ricount & 1) != 0)
|
||||||
|
break; /* Grapheme break required */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
|
||||||
|
between; see next statement). */
|
||||||
|
|
||||||
|
was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
|
||||||
|
|
||||||
|
/* If Extend follows Extended_Pictographic, do not update lgb; this allows
|
||||||
|
any number of them before a following ZWJ. */
|
||||||
|
|
||||||
|
if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
|
||||||
|
lgb = rgb;
|
||||||
|
|
||||||
|
cc++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return cc;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void compile_clist(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
|
||||||
|
{
|
||||||
|
DEFINE_COMPILER;
|
||||||
|
const sljit_u32 *other_cases;
|
||||||
|
struct sljit_jump *jump;
|
||||||
|
sljit_u32 min = 0, max = READ_CHAR_MAX;
|
||||||
|
BOOL has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV) != 0;
|
||||||
|
|
||||||
|
SLJIT_ASSERT(cc[1] == PT_CLIST);
|
||||||
|
|
||||||
|
if (cc[0] == OP_PROP)
|
||||||
|
{
|
||||||
|
other_cases = PRIV(ucd_caseless_sets) + cc[2];
|
||||||
|
|
||||||
|
min = *other_cases++;
|
||||||
|
max = min;
|
||||||
|
|
||||||
|
while (*other_cases != NOTACHAR)
|
||||||
|
{
|
||||||
|
if (*other_cases > max) max = *other_cases;
|
||||||
|
if (*other_cases < min) min = *other_cases;
|
||||||
|
other_cases++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
other_cases = PRIV(ucd_caseless_sets) + cc[2];
|
||||||
|
SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR);
|
||||||
|
/* The NOTACHAR is higher than any character. */
|
||||||
|
SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
|
||||||
|
|
||||||
|
read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||||
|
|
||||||
|
/* At least two characters are required.
|
||||||
|
Otherwise this case would be handled by the normal code path. */
|
||||||
|
/* NOTACHAR is the unsigned maximum. */
|
||||||
|
|
||||||
|
/* Optimizing character pairs, if their difference is power of 2. */
|
||||||
|
if (is_powerof2(other_cases[1] ^ other_cases[0]))
|
||||||
|
{
|
||||||
|
OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[1] ^ other_cases[0]));
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]);
|
||||||
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
|
||||||
|
other_cases += 2;
|
||||||
|
}
|
||||||
|
else if (is_powerof2(other_cases[2] ^ other_cases[1]))
|
||||||
|
{
|
||||||
|
SLJIT_ASSERT(other_cases[2] != NOTACHAR);
|
||||||
|
|
||||||
|
OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[2] ^ other_cases[1]));
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]);
|
||||||
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
|
||||||
|
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)other_cases[0]);
|
||||||
|
|
||||||
|
if (has_cmov)
|
||||||
|
SELECT(SLJIT_EQUAL, TMP2, STR_END, 0, TMP2);
|
||||||
|
else
|
||||||
|
OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
|
||||||
|
|
||||||
|
other_cases += 3;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++));
|
||||||
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (*other_cases != NOTACHAR)
|
||||||
|
{
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++));
|
||||||
|
|
||||||
|
if (has_cmov)
|
||||||
|
SELECT(SLJIT_EQUAL, TMP2, STR_END, 0, TMP2);
|
||||||
|
else
|
||||||
|
OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (has_cmov)
|
||||||
|
jump = CMP(cc[0] == OP_PROP ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
|
||||||
|
else
|
||||||
|
jump = JUMP(cc[0] == OP_PROP ? SLJIT_ZERO : SLJIT_NOT_ZERO);
|
||||||
|
|
||||||
|
add_jump(compiler, backtracks, jump);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
|
||||||
|
{
|
||||||
|
DEFINE_COMPILER;
|
||||||
|
int length;
|
||||||
|
unsigned int c, oc, bit;
|
||||||
|
compare_context context;
|
||||||
|
struct sljit_jump *jump[3];
|
||||||
|
jump_list *end_list;
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
PCRE2_UCHAR propdata[5];
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
switch(type)
|
||||||
|
{
|
||||||
|
case OP_NOT_DIGIT:
|
||||||
|
case OP_DIGIT:
|
||||||
|
/* Digits are usually 0-9, so it is worth to optimize them. */
|
||||||
|
if (check_str_ptr)
|
||||||
|
detect_partial_match(common, backtracks);
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
|
||||||
|
read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
|
||||||
|
/* Flip the starting bit in the negative case. */
|
||||||
|
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit);
|
||||||
|
add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
|
||||||
|
return cc;
|
||||||
|
|
||||||
|
case OP_NOT_WHITESPACE:
|
||||||
|
case OP_WHITESPACE:
|
||||||
|
if (check_str_ptr)
|
||||||
|
detect_partial_match(common, backtracks);
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
|
||||||
|
read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
|
||||||
|
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space);
|
||||||
|
add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
|
||||||
|
return cc;
|
||||||
|
|
||||||
|
case OP_NOT_WORDCHAR:
|
||||||
|
case OP_WORDCHAR:
|
||||||
|
if (check_str_ptr)
|
||||||
|
detect_partial_match(common, backtracks);
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
|
||||||
|
read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
|
||||||
|
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word);
|
||||||
|
add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
|
||||||
|
return cc;
|
||||||
|
|
||||||
|
case OP_ANY:
|
||||||
|
if (check_str_ptr)
|
||||||
|
detect_partial_match(common, backtracks);
|
||||||
|
read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||||
|
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
|
||||||
|
{
|
||||||
|
jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
|
||||||
|
end_list = NULL;
|
||||||
|
if (common->mode != PCRE2_JIT_PARTIAL_HARD)
|
||||||
|
add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
|
else
|
||||||
|
check_str_end(common, &end_list);
|
||||||
|
|
||||||
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
|
||||||
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
|
||||||
|
set_jumps(end_list, LABEL());
|
||||||
|
JUMPHERE(jump[0]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
check_newlinechar(common, common->nltype, backtracks, TRUE);
|
||||||
|
return cc;
|
||||||
|
|
||||||
|
case OP_ALLANY:
|
||||||
|
if (check_str_ptr)
|
||||||
|
detect_partial_match(common, backtracks);
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (common->utf && common->invalid_utf)
|
||||||
|
{
|
||||||
|
read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||||
|
return cc;
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
skip_valid_char(common);
|
||||||
|
return cc;
|
||||||
|
|
||||||
|
case OP_ANYBYTE:
|
||||||
|
if (check_str_ptr)
|
||||||
|
detect_partial_match(common, backtracks);
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||||
|
return cc;
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
case OP_NOTPROP:
|
||||||
|
case OP_PROP:
|
||||||
|
if (check_str_ptr)
|
||||||
|
detect_partial_match(common, backtracks);
|
||||||
|
if (cc[0] == PT_CLIST)
|
||||||
|
{
|
||||||
|
compile_clist(common, cc - 1, backtracks);
|
||||||
|
return cc + 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
propdata[0] = 0;
|
||||||
|
propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
|
||||||
|
propdata[2] = cc[0];
|
||||||
|
propdata[3] = cc[1];
|
||||||
|
propdata[4] = XCL_END;
|
||||||
|
compile_xclass_matchingpath(common, propdata, backtracks, 0);
|
||||||
|
return cc + 2;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
case OP_ANYNL:
|
||||||
|
if (check_str_ptr)
|
||||||
|
detect_partial_match(common, backtracks);
|
||||||
|
read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
|
||||||
|
jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
|
||||||
|
/* We don't need to handle soft partial matching case. */
|
||||||
|
end_list = NULL;
|
||||||
|
if (common->mode != PCRE2_JIT_PARTIAL_HARD)
|
||||||
|
add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
|
else
|
||||||
|
check_str_end(common, &end_list);
|
||||||
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);
|
||||||
|
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
|
||||||
|
#endif
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||||
|
jump[1] = JUMP(SLJIT_JUMP);
|
||||||
|
JUMPHERE(jump[0]);
|
||||||
|
check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
|
||||||
|
set_jumps(end_list, LABEL());
|
||||||
|
JUMPHERE(jump[1]);
|
||||||
|
return cc;
|
||||||
|
|
||||||
|
case OP_NOT_HSPACE:
|
||||||
|
case OP_HSPACE:
|
||||||
|
if (check_str_ptr)
|
||||||
|
detect_partial_match(common, backtracks);
|
||||||
|
|
||||||
|
if (type == OP_NOT_HSPACE)
|
||||||
|
read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||||
|
else
|
||||||
|
read_char(common, 0x9, 0x3000, NULL, 0);
|
||||||
|
|
||||||
|
add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
|
||||||
|
sljit_set_current_flags(compiler, SLJIT_SET_Z);
|
||||||
|
add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
|
||||||
|
return cc;
|
||||||
|
|
||||||
|
case OP_NOT_VSPACE:
|
||||||
|
case OP_VSPACE:
|
||||||
|
if (check_str_ptr)
|
||||||
|
detect_partial_match(common, backtracks);
|
||||||
|
|
||||||
|
if (type == OP_NOT_VSPACE)
|
||||||
|
read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||||
|
else
|
||||||
|
read_char(common, 0xa, 0x2029, NULL, 0);
|
||||||
|
|
||||||
|
add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
|
||||||
|
sljit_set_current_flags(compiler, SLJIT_SET_Z);
|
||||||
|
add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
|
||||||
|
return cc;
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
case OP_EXTUNI:
|
||||||
|
if (check_str_ptr)
|
||||||
|
detect_partial_match(common, backtracks);
|
||||||
|
|
||||||
|
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
|
||||||
|
OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
|
||||||
|
common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
|
||||||
|
if (common->invalid_utf)
|
||||||
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
|
||||||
|
#else
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
|
||||||
|
common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
|
||||||
|
if (common->invalid_utf)
|
||||||
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
|
||||||
|
|
||||||
|
if (common->mode == PCRE2_JIT_PARTIAL_HARD)
|
||||||
|
{
|
||||||
|
jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
|
||||||
|
/* Since we successfully read a char above, partial matching must occur. */
|
||||||
|
check_partial(common, TRUE);
|
||||||
|
JUMPHERE(jump[0]);
|
||||||
|
}
|
||||||
|
return cc;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
case OP_CHAR:
|
||||||
|
case OP_CHARI:
|
||||||
|
length = 1;
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
|
||||||
|
detect_partial_match(common, backtracks);
|
||||||
|
|
||||||
|
if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
|
||||||
|
{
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
|
||||||
|
if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
|
||||||
|
add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
|
||||||
|
|
||||||
|
context.length = IN_UCHARS(length);
|
||||||
|
context.sourcereg = -1;
|
||||||
|
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
|
||||||
|
context.ucharptr = 0;
|
||||||
|
#endif
|
||||||
|
return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (common->utf)
|
||||||
|
{
|
||||||
|
GETCHAR(c, cc);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
c = *cc;
|
||||||
|
|
||||||
|
SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
|
||||||
|
|
||||||
|
if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
|
||||||
|
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
|
|
||||||
|
oc = char_othercase(common, c);
|
||||||
|
read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
|
||||||
|
|
||||||
|
SLJIT_ASSERT(!is_powerof2(c ^ oc));
|
||||||
|
|
||||||
|
if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
|
||||||
|
{
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc);
|
||||||
|
SELECT(SLJIT_EQUAL, TMP1, SLJIT_IMM, c, TMP1);
|
||||||
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
|
||||||
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
|
||||||
|
JUMPHERE(jump[0]);
|
||||||
|
}
|
||||||
|
return cc + length;
|
||||||
|
|
||||||
|
case OP_NOT:
|
||||||
|
case OP_NOTI:
|
||||||
|
if (check_str_ptr)
|
||||||
|
detect_partial_match(common, backtracks);
|
||||||
|
|
||||||
|
length = 1;
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (common->utf)
|
||||||
|
{
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
c = *cc;
|
||||||
|
if (c < 128 && !common->invalid_utf)
|
||||||
|
{
|
||||||
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
|
||||||
|
if (type == OP_NOT || !char_has_othercase(common, cc))
|
||||||
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
|
||||||
|
OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
|
||||||
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
|
||||||
|
}
|
||||||
|
/* Skip the variable-length character. */
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||||
|
jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
|
||||||
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||||
|
JUMPHERE(jump[0]);
|
||||||
|
return cc + 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||||
|
{
|
||||||
|
GETCHARLEN(c, cc, length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
c = *cc;
|
||||||
|
|
||||||
|
if (type == OP_NOT || !char_has_othercase(common, cc))
|
||||||
|
{
|
||||||
|
read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||||
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
oc = char_othercase(common, c);
|
||||||
|
read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||||
|
bit = c ^ oc;
|
||||||
|
if (is_powerof2(bit))
|
||||||
|
{
|
||||||
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
|
||||||
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
|
||||||
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return cc + length;
|
||||||
|
|
||||||
|
case OP_CLASS:
|
||||||
|
case OP_NCLASS:
|
||||||
|
if (check_str_ptr)
|
||||||
|
detect_partial_match(common, backtracks);
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
|
||||||
|
if (type == OP_NCLASS)
|
||||||
|
read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||||
|
else
|
||||||
|
read_char(common, 0, bit, NULL, 0);
|
||||||
|
#else
|
||||||
|
if (type == OP_NCLASS)
|
||||||
|
read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||||
|
else
|
||||||
|
read_char(common, 0, 255, NULL, 0);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
|
||||||
|
return cc + 32 / sizeof(PCRE2_UCHAR);
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
jump[0] = NULL;
|
||||||
|
if (common->utf)
|
||||||
|
{
|
||||||
|
jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
|
||||||
|
if (type == OP_CLASS)
|
||||||
|
{
|
||||||
|
add_jump(compiler, backtracks, jump[0]);
|
||||||
|
jump[0] = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
|
jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
|
||||||
|
if (type == OP_CLASS)
|
||||||
|
{
|
||||||
|
add_jump(compiler, backtracks, jump[0]);
|
||||||
|
jump[0] = NULL;
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||||
|
|
||||||
|
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
|
||||||
|
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
|
||||||
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
|
||||||
|
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
|
||||||
|
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
|
||||||
|
add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
|
if (jump[0] != NULL)
|
||||||
|
JUMPHERE(jump[0]);
|
||||||
|
#endif
|
||||||
|
return cc + 32 / sizeof(PCRE2_UCHAR);
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
case OP_XCLASS:
|
||||||
|
if (check_str_ptr)
|
||||||
|
detect_partial_match(common, backtracks);
|
||||||
|
compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks, 0);
|
||||||
|
return cc + GET(cc, 0) - 1;
|
||||||
|
|
||||||
|
case OP_ECLASS:
|
||||||
|
if (check_str_ptr)
|
||||||
|
detect_partial_match(common, backtracks);
|
||||||
|
return compile_eclass_matchingpath(common, cc, backtracks);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
SLJIT_UNREACHABLE();
|
||||||
|
return cc;
|
||||||
|
}
|
||||||
|
|
||||||
|
static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
|
||||||
|
{
|
||||||
|
/* This function consumes at least one input character. */
|
||||||
|
/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
|
||||||
|
DEFINE_COMPILER;
|
||||||
|
PCRE2_SPTR ccbegin = cc;
|
||||||
|
compare_context context;
|
||||||
|
int size;
|
||||||
|
|
||||||
|
context.length = 0;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if (cc >= ccend)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (*cc == OP_CHAR)
|
||||||
|
{
|
||||||
|
size = 1;
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (common->utf && HAS_EXTRALEN(cc[1]))
|
||||||
|
size += GET_EXTRALEN(cc[1]);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else if (*cc == OP_CHARI)
|
||||||
|
{
|
||||||
|
size = 1;
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (common->utf)
|
||||||
|
{
|
||||||
|
if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
|
||||||
|
size = 0;
|
||||||
|
else if (HAS_EXTRALEN(cc[1]))
|
||||||
|
size += GET_EXTRALEN(cc[1]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
|
||||||
|
size = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
size = 0;
|
||||||
|
|
||||||
|
cc += 1 + size;
|
||||||
|
context.length += IN_UCHARS(size);
|
||||||
|
}
|
||||||
|
while (size > 0 && context.length <= 128);
|
||||||
|
|
||||||
|
cc = ccbegin;
|
||||||
|
if (context.length > 0)
|
||||||
|
{
|
||||||
|
/* We have a fixed-length byte sequence. */
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
|
||||||
|
add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
|
||||||
|
|
||||||
|
context.sourcereg = -1;
|
||||||
|
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
|
||||||
|
context.ucharptr = 0;
|
||||||
|
#endif
|
||||||
|
do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
|
||||||
|
return cc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* A non-fixed length character will be checked if length == 0. */
|
||||||
|
return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
14105
3rd/pcre2/src/pcre2_jit_compile.c
Normal file
14105
3rd/pcre2/src/pcre2_jit_compile.c
Normal file
File diff suppressed because it is too large
Load Diff
200
3rd/pcre2/src/pcre2_jit_match.c
Normal file
200
3rd/pcre2/src/pcre2_jit_match.c
Normal file
@@ -0,0 +1,200 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016-2023 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE
|
||||||
|
#error This file must be included from pcre2_jit_compile.c.
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__has_feature)
|
||||||
|
#if __has_feature(memory_sanitizer)
|
||||||
|
#include <sanitizer/msan_interface.h>
|
||||||
|
#endif /* __has_feature(memory_sanitizer) */
|
||||||
|
#endif /* defined(__has_feature) */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_JIT
|
||||||
|
|
||||||
|
static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, jit_function executable_func)
|
||||||
|
{
|
||||||
|
sljit_u8 local_space[MACHINE_STACK_SIZE];
|
||||||
|
struct sljit_stack local_stack;
|
||||||
|
|
||||||
|
local_stack.min_start = local_space;
|
||||||
|
local_stack.start = local_space;
|
||||||
|
local_stack.end = local_space + MACHINE_STACK_SIZE;
|
||||||
|
local_stack.top = local_space + MACHINE_STACK_SIZE;
|
||||||
|
arguments->stack = &local_stack;
|
||||||
|
return executable_func(arguments);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Do a JIT pattern match *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function runs a JIT pattern match.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
code points to the compiled expression
|
||||||
|
subject points to the subject string
|
||||||
|
length length of subject string (may contain binary zeros)
|
||||||
|
start_offset where to start in the subject string
|
||||||
|
options option bits
|
||||||
|
match_data points to a match_data block
|
||||||
|
mcontext points to a match context
|
||||||
|
|
||||||
|
Returns: > 0 => success; value is the number of ovector pairs filled
|
||||||
|
= 0 => success, but ovector is not big enough
|
||||||
|
-1 => failed to match (PCRE2_ERROR_NOMATCH)
|
||||||
|
< -1 => some kind of unexpected problem
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
|
||||||
|
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
|
||||||
|
pcre2_match_context *mcontext)
|
||||||
|
{
|
||||||
|
#ifndef SUPPORT_JIT
|
||||||
|
|
||||||
|
(void)code;
|
||||||
|
(void)subject;
|
||||||
|
(void)length;
|
||||||
|
(void)start_offset;
|
||||||
|
(void)options;
|
||||||
|
(void)match_data;
|
||||||
|
(void)mcontext;
|
||||||
|
return PCRE2_ERROR_JIT_BADOPTION;
|
||||||
|
|
||||||
|
#else /* SUPPORT_JIT */
|
||||||
|
|
||||||
|
pcre2_real_code *re = (pcre2_real_code *)code;
|
||||||
|
executable_functions *functions = (executable_functions *)re->executable_jit;
|
||||||
|
pcre2_jit_stack *jit_stack;
|
||||||
|
uint32_t oveccount = match_data->oveccount;
|
||||||
|
uint32_t max_oveccount;
|
||||||
|
union {
|
||||||
|
void *executable_func;
|
||||||
|
jit_function call_executable_func;
|
||||||
|
} convert_executable_func;
|
||||||
|
jit_arguments arguments;
|
||||||
|
int rc;
|
||||||
|
int index = 0;
|
||||||
|
|
||||||
|
if ((options & PCRE2_PARTIAL_HARD) != 0)
|
||||||
|
index = 2;
|
||||||
|
else if ((options & PCRE2_PARTIAL_SOFT) != 0)
|
||||||
|
index = 1;
|
||||||
|
|
||||||
|
if (functions == NULL || functions->executable_funcs[index] == NULL)
|
||||||
|
return PCRE2_ERROR_JIT_BADOPTION;
|
||||||
|
|
||||||
|
/* Sanity checks should be handled by pcre2_match. */
|
||||||
|
arguments.str = subject + start_offset;
|
||||||
|
arguments.begin = subject;
|
||||||
|
arguments.end = subject + length;
|
||||||
|
arguments.match_data = match_data;
|
||||||
|
arguments.startchar_ptr = subject;
|
||||||
|
arguments.mark_ptr = NULL;
|
||||||
|
arguments.options = options;
|
||||||
|
|
||||||
|
if (mcontext != NULL)
|
||||||
|
{
|
||||||
|
arguments.callout = mcontext->callout;
|
||||||
|
arguments.callout_data = mcontext->callout_data;
|
||||||
|
arguments.offset_limit = mcontext->offset_limit;
|
||||||
|
arguments.limit_match = (mcontext->match_limit < re->limit_match)?
|
||||||
|
mcontext->match_limit : re->limit_match;
|
||||||
|
if (mcontext->jit_callback != NULL)
|
||||||
|
jit_stack = mcontext->jit_callback(mcontext->jit_callback_data);
|
||||||
|
else
|
||||||
|
jit_stack = (pcre2_jit_stack *)mcontext->jit_callback_data;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
arguments.callout = NULL;
|
||||||
|
arguments.callout_data = NULL;
|
||||||
|
arguments.offset_limit = PCRE2_UNSET;
|
||||||
|
arguments.limit_match = (MATCH_LIMIT < re->limit_match)?
|
||||||
|
MATCH_LIMIT : re->limit_match;
|
||||||
|
jit_stack = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
max_oveccount = functions->top_bracket;
|
||||||
|
if (oveccount > max_oveccount)
|
||||||
|
oveccount = max_oveccount;
|
||||||
|
arguments.oveccount = oveccount << 1;
|
||||||
|
|
||||||
|
|
||||||
|
convert_executable_func.executable_func = functions->executable_funcs[index];
|
||||||
|
if (jit_stack != NULL)
|
||||||
|
{
|
||||||
|
arguments.stack = (struct sljit_stack *)(jit_stack->stack);
|
||||||
|
rc = convert_executable_func.call_executable_func(&arguments);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
rc = jit_machine_stack_exec(&arguments, convert_executable_func.call_executable_func);
|
||||||
|
|
||||||
|
if (rc > (int)oveccount)
|
||||||
|
rc = 0;
|
||||||
|
match_data->code = re;
|
||||||
|
match_data->subject = (rc >= 0 || rc == PCRE2_ERROR_PARTIAL)? subject : NULL;
|
||||||
|
match_data->subject_length = length;
|
||||||
|
match_data->rc = rc;
|
||||||
|
match_data->startchar = arguments.startchar_ptr - subject;
|
||||||
|
match_data->leftchar = 0;
|
||||||
|
match_data->rightchar = 0;
|
||||||
|
match_data->mark = arguments.mark_ptr;
|
||||||
|
match_data->matchedby = PCRE2_MATCHEDBY_JIT;
|
||||||
|
|
||||||
|
#if defined(__has_feature)
|
||||||
|
#if __has_feature(memory_sanitizer)
|
||||||
|
if (rc > 0)
|
||||||
|
__msan_unpoison(match_data->ovector, 2 * rc * sizeof(match_data->ovector[0]));
|
||||||
|
#endif /* __has_feature(memory_sanitizer) */
|
||||||
|
#endif /* defined(__has_feature) */
|
||||||
|
|
||||||
|
return match_data->rc;
|
||||||
|
|
||||||
|
#endif /* SUPPORT_JIT */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre2_jit_match.c */
|
||||||
234
3rd/pcre2/src/pcre2_jit_misc.c
Normal file
234
3rd/pcre2/src/pcre2_jit_misc.c
Normal file
@@ -0,0 +1,234 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE
|
||||||
|
#error This file must be included from pcre2_jit_compile.c.
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Free JIT read-only data *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
void
|
||||||
|
PRIV(jit_free_rodata)(void *current, void *allocator_data)
|
||||||
|
{
|
||||||
|
#ifndef SUPPORT_JIT
|
||||||
|
(void)current;
|
||||||
|
(void)allocator_data;
|
||||||
|
#else /* SUPPORT_JIT */
|
||||||
|
void *next;
|
||||||
|
|
||||||
|
SLJIT_UNUSED_ARG(allocator_data);
|
||||||
|
|
||||||
|
while (current != NULL)
|
||||||
|
{
|
||||||
|
next = *(void**)current;
|
||||||
|
SLJIT_FREE(current, allocator_data);
|
||||||
|
current = next;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* SUPPORT_JIT */
|
||||||
|
}
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Free JIT compiled code *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
void
|
||||||
|
PRIV(jit_free)(void *executable_jit, pcre2_memctl *memctl)
|
||||||
|
{
|
||||||
|
#ifndef SUPPORT_JIT
|
||||||
|
(void)executable_jit;
|
||||||
|
(void)memctl;
|
||||||
|
#else /* SUPPORT_JIT */
|
||||||
|
|
||||||
|
executable_functions *functions = (executable_functions *)executable_jit;
|
||||||
|
void *allocator_data = memctl;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
|
||||||
|
{
|
||||||
|
if (functions->executable_funcs[i] != NULL)
|
||||||
|
sljit_free_code(functions->executable_funcs[i], NULL);
|
||||||
|
PRIV(jit_free_rodata)(functions->read_only_data_heads[i], allocator_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
SLJIT_FREE(functions, allocator_data);
|
||||||
|
|
||||||
|
#endif /* SUPPORT_JIT */
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Free unused JIT memory *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_jit_free_unused_memory(pcre2_general_context *gcontext)
|
||||||
|
{
|
||||||
|
#ifndef SUPPORT_JIT
|
||||||
|
(void)gcontext; /* Suppress warning */
|
||||||
|
#else /* SUPPORT_JIT */
|
||||||
|
SLJIT_UNUSED_ARG(gcontext);
|
||||||
|
#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
|
||||||
|
sljit_free_unused_memory_exec();
|
||||||
|
#endif /* SLJIT_EXECUTABLE_ALLOCATOR */
|
||||||
|
#endif /* SUPPORT_JIT */
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Allocate a JIT stack *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN pcre2_jit_stack * PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_jit_stack_create(size_t startsize, size_t maxsize,
|
||||||
|
pcre2_general_context *gcontext)
|
||||||
|
{
|
||||||
|
#ifndef SUPPORT_JIT
|
||||||
|
|
||||||
|
(void)gcontext;
|
||||||
|
(void)startsize;
|
||||||
|
(void)maxsize;
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
#else /* SUPPORT_JIT */
|
||||||
|
|
||||||
|
pcre2_jit_stack *jit_stack;
|
||||||
|
|
||||||
|
if (startsize == 0 || maxsize == 0 || maxsize > SIZE_MAX - STACK_GROWTH_RATE)
|
||||||
|
return NULL;
|
||||||
|
if (startsize > maxsize)
|
||||||
|
startsize = maxsize;
|
||||||
|
startsize = (startsize + STACK_GROWTH_RATE - 1) & (size_t)(~(STACK_GROWTH_RATE - 1));
|
||||||
|
maxsize = (maxsize + STACK_GROWTH_RATE - 1) & (size_t)(~(STACK_GROWTH_RATE - 1));
|
||||||
|
|
||||||
|
jit_stack = PRIV(memctl_malloc)(sizeof(pcre2_real_jit_stack), (pcre2_memctl *)gcontext);
|
||||||
|
if (jit_stack == NULL) return NULL;
|
||||||
|
jit_stack->stack = sljit_allocate_stack(startsize, maxsize, &jit_stack->memctl);
|
||||||
|
if (jit_stack->stack == NULL)
|
||||||
|
{
|
||||||
|
jit_stack->memctl.free(jit_stack, jit_stack->memctl.memory_data);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return jit_stack;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Assign a JIT stack to a pattern *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_jit_stack_assign(pcre2_match_context *mcontext, pcre2_jit_callback callback,
|
||||||
|
void *callback_data)
|
||||||
|
{
|
||||||
|
#ifndef SUPPORT_JIT
|
||||||
|
(void)mcontext;
|
||||||
|
(void)callback;
|
||||||
|
(void)callback_data;
|
||||||
|
#else /* SUPPORT_JIT */
|
||||||
|
|
||||||
|
if (mcontext == NULL) return;
|
||||||
|
mcontext->jit_callback = callback;
|
||||||
|
mcontext->jit_callback_data = callback_data;
|
||||||
|
|
||||||
|
#endif /* SUPPORT_JIT */
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Free a JIT stack *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_jit_stack_free(pcre2_jit_stack *jit_stack)
|
||||||
|
{
|
||||||
|
#ifndef SUPPORT_JIT
|
||||||
|
(void)jit_stack;
|
||||||
|
#else /* SUPPORT_JIT */
|
||||||
|
if (jit_stack != NULL)
|
||||||
|
{
|
||||||
|
sljit_free_stack((struct sljit_stack *)(jit_stack->stack), &jit_stack->memctl);
|
||||||
|
jit_stack->memctl.free(jit_stack, jit_stack->memctl.memory_data);
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_JIT */
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Get target CPU type *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
const char*
|
||||||
|
PRIV(jit_get_target)(void)
|
||||||
|
{
|
||||||
|
#ifndef SUPPORT_JIT
|
||||||
|
return "JIT is not supported";
|
||||||
|
#else /* SUPPORT_JIT */
|
||||||
|
return sljit_get_platform_name();
|
||||||
|
#endif /* SUPPORT_JIT */
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Get size of JIT code *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
size_t
|
||||||
|
PRIV(jit_get_size)(void *executable_jit)
|
||||||
|
{
|
||||||
|
#ifndef SUPPORT_JIT
|
||||||
|
(void)executable_jit;
|
||||||
|
return 0;
|
||||||
|
#else /* SUPPORT_JIT */
|
||||||
|
sljit_uw *executable_sizes = ((executable_functions *)executable_jit)->executable_sizes;
|
||||||
|
SLJIT_COMPILE_ASSERT(JIT_NUMBER_OF_COMPILE_MODES == 3, number_of_compile_modes_changed);
|
||||||
|
return executable_sizes[0] + executable_sizes[1] + executable_sizes[2];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre2_jit_misc.c */
|
||||||
354
3rd/pcre2/src/pcre2_jit_neon_inc.h
Normal file
354
3rd/pcre2/src/pcre2_jit_neon_inc.h
Normal file
@@ -0,0 +1,354 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
This module by Zoltan Herczeg and Sebastian Pop
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
# if defined(FFCS)
|
||||||
|
# if defined(FF_UTF)
|
||||||
|
# define FF_FUN ffcs_utf
|
||||||
|
# else
|
||||||
|
# define FF_FUN ffcs
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# elif defined(FFCS_2)
|
||||||
|
# if defined(FF_UTF)
|
||||||
|
# define FF_FUN ffcs_2_utf
|
||||||
|
# else
|
||||||
|
# define FF_FUN ffcs_2
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# elif defined(FFCS_MASK)
|
||||||
|
# if defined(FF_UTF)
|
||||||
|
# define FF_FUN ffcs_mask_utf
|
||||||
|
# else
|
||||||
|
# define FF_FUN ffcs_mask
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# elif defined(FFCPS_0)
|
||||||
|
# if defined (FF_UTF)
|
||||||
|
# define FF_FUN ffcps_0_utf
|
||||||
|
# else
|
||||||
|
# define FF_FUN ffcps_0
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# elif defined (FFCPS_1)
|
||||||
|
# if defined (FF_UTF)
|
||||||
|
# define FF_FUN ffcps_1_utf
|
||||||
|
# else
|
||||||
|
# define FF_FUN ffcps_1
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# elif defined (FFCPS_DEFAULT)
|
||||||
|
# if defined (FF_UTF)
|
||||||
|
# define FF_FUN ffcps_default_utf
|
||||||
|
# else
|
||||||
|
# define FF_FUN ffcps_default
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
|
||||||
|
#if (defined(__GNUC__) && defined(__SANITIZE_ADDRESS__) && __SANITIZE_ADDRESS__ ) \
|
||||||
|
|| (defined(__clang__) \
|
||||||
|
&& ((__clang_major__ == 3 && __clang_minor__ >= 3) || (__clang_major__ > 3)))
|
||||||
|
__attribute__((no_sanitize_address))
|
||||||
|
#endif
|
||||||
|
static sljit_u8* SLJIT_FUNC FF_FUN(sljit_u8 *str_end, sljit_u8 **str_ptr, sljit_uw offs1, sljit_uw offs2, sljit_uw chars)
|
||||||
|
#undef FF_FUN
|
||||||
|
{
|
||||||
|
quad_word qw;
|
||||||
|
int_char ic;
|
||||||
|
|
||||||
|
SLJIT_UNUSED_ARG(offs1);
|
||||||
|
SLJIT_UNUSED_ARG(offs2);
|
||||||
|
|
||||||
|
ic.x = chars;
|
||||||
|
|
||||||
|
#if defined(FFCS)
|
||||||
|
sljit_u8 c1 = ic.c.c1;
|
||||||
|
vect_t vc1 = VDUPQ(c1);
|
||||||
|
|
||||||
|
#elif defined(FFCS_2)
|
||||||
|
sljit_u8 c1 = ic.c.c1;
|
||||||
|
vect_t vc1 = VDUPQ(c1);
|
||||||
|
sljit_u8 c2 = ic.c.c2;
|
||||||
|
vect_t vc2 = VDUPQ(c2);
|
||||||
|
|
||||||
|
#elif defined(FFCS_MASK)
|
||||||
|
sljit_u8 c1 = ic.c.c1;
|
||||||
|
vect_t vc1 = VDUPQ(c1);
|
||||||
|
sljit_u8 mask = ic.c.c2;
|
||||||
|
vect_t vmask = VDUPQ(mask);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(FFCPS)
|
||||||
|
compare_type compare1_type = compare_match1;
|
||||||
|
compare_type compare2_type = compare_match1;
|
||||||
|
vect_t cmp1a, cmp1b, cmp2a, cmp2b;
|
||||||
|
const sljit_u32 diff = IN_UCHARS(offs1 - offs2);
|
||||||
|
PCRE2_UCHAR char1a = ic.c.c1;
|
||||||
|
PCRE2_UCHAR char2a = ic.c.c3;
|
||||||
|
|
||||||
|
# ifdef FFCPS_CHAR1A2A
|
||||||
|
cmp1a = VDUPQ(char1a);
|
||||||
|
cmp2a = VDUPQ(char2a);
|
||||||
|
cmp1b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
|
||||||
|
cmp2b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
|
||||||
|
# else
|
||||||
|
PCRE2_UCHAR char1b = ic.c.c2;
|
||||||
|
PCRE2_UCHAR char2b = ic.c.c4;
|
||||||
|
if (char1a == char1b)
|
||||||
|
{
|
||||||
|
cmp1a = VDUPQ(char1a);
|
||||||
|
cmp1b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sljit_u32 bit1 = char1a ^ char1b;
|
||||||
|
if (is_powerof2(bit1))
|
||||||
|
{
|
||||||
|
compare1_type = compare_match1i;
|
||||||
|
cmp1a = VDUPQ(char1a | bit1);
|
||||||
|
cmp1b = VDUPQ(bit1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
compare1_type = compare_match2;
|
||||||
|
cmp1a = VDUPQ(char1a);
|
||||||
|
cmp1b = VDUPQ(char1b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (char2a == char2b)
|
||||||
|
{
|
||||||
|
cmp2a = VDUPQ(char2a);
|
||||||
|
cmp2b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sljit_u32 bit2 = char2a ^ char2b;
|
||||||
|
if (is_powerof2(bit2))
|
||||||
|
{
|
||||||
|
compare2_type = compare_match1i;
|
||||||
|
cmp2a = VDUPQ(char2a | bit2);
|
||||||
|
cmp2b = VDUPQ(bit2);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
compare2_type = compare_match2;
|
||||||
|
cmp2a = VDUPQ(char2a);
|
||||||
|
cmp2b = VDUPQ(char2b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
|
*str_ptr += IN_UCHARS(offs1);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
|
vect_t char_mask = VDUPQ(0xff);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(FF_UTF)
|
||||||
|
restart:;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(FFCPS)
|
||||||
|
if (*str_ptr >= str_end)
|
||||||
|
return NULL;
|
||||||
|
sljit_u8 *p1 = *str_ptr - diff;
|
||||||
|
#endif
|
||||||
|
sljit_s32 align_offset = ((uint64_t)*str_ptr & 0xf);
|
||||||
|
*str_ptr = (sljit_u8 *) ((uint64_t)*str_ptr & ~0xf);
|
||||||
|
vect_t data = VLD1Q(*str_ptr);
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
|
data = VANDQ(data, char_mask);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(FFCS)
|
||||||
|
vect_t eq = VCEQQ(data, vc1);
|
||||||
|
|
||||||
|
#elif defined(FFCS_2)
|
||||||
|
vect_t eq1 = VCEQQ(data, vc1);
|
||||||
|
vect_t eq2 = VCEQQ(data, vc2);
|
||||||
|
vect_t eq = VORRQ(eq1, eq2);
|
||||||
|
|
||||||
|
#elif defined(FFCS_MASK)
|
||||||
|
vect_t eq = VORRQ(data, vmask);
|
||||||
|
eq = VCEQQ(eq, vc1);
|
||||||
|
|
||||||
|
#elif defined(FFCPS)
|
||||||
|
# if defined(FFCPS_DIFF1)
|
||||||
|
vect_t prev_data = data;
|
||||||
|
# endif
|
||||||
|
|
||||||
|
vect_t data2;
|
||||||
|
if (p1 < *str_ptr)
|
||||||
|
{
|
||||||
|
data2 = VLD1Q(*str_ptr - diff);
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
|
data2 = VANDQ(data2, char_mask);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else
|
||||||
|
data2 = shift_left_n_lanes(data, offs1 - offs2);
|
||||||
|
|
||||||
|
if (compare1_type == compare_match1)
|
||||||
|
data = VCEQQ(data, cmp1a);
|
||||||
|
else
|
||||||
|
data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
|
||||||
|
|
||||||
|
if (compare2_type == compare_match1)
|
||||||
|
data2 = VCEQQ(data2, cmp2a);
|
||||||
|
else
|
||||||
|
data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
|
||||||
|
|
||||||
|
vect_t eq = VANDQ(data, data2);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
VST1Q(qw.mem, eq);
|
||||||
|
/* Ignore matches before the first STR_PTR. */
|
||||||
|
if (align_offset < 8)
|
||||||
|
{
|
||||||
|
qw.dw[0] >>= align_offset * 8;
|
||||||
|
if (qw.dw[0])
|
||||||
|
{
|
||||||
|
*str_ptr += align_offset + __builtin_ctzll(qw.dw[0]) / 8;
|
||||||
|
goto match;
|
||||||
|
}
|
||||||
|
if (qw.dw[1])
|
||||||
|
{
|
||||||
|
*str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8;
|
||||||
|
goto match;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
qw.dw[1] >>= (align_offset - 8) * 8;
|
||||||
|
if (qw.dw[1])
|
||||||
|
{
|
||||||
|
*str_ptr += align_offset + __builtin_ctzll(qw.dw[1]) / 8;
|
||||||
|
goto match;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*str_ptr += 16;
|
||||||
|
|
||||||
|
while (*str_ptr < str_end)
|
||||||
|
{
|
||||||
|
vect_t orig_data = VLD1Q(*str_ptr);
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
|
orig_data = VANDQ(orig_data, char_mask);
|
||||||
|
#endif
|
||||||
|
data = orig_data;
|
||||||
|
|
||||||
|
#if defined(FFCS)
|
||||||
|
eq = VCEQQ(data, vc1);
|
||||||
|
|
||||||
|
#elif defined(FFCS_2)
|
||||||
|
eq1 = VCEQQ(data, vc1);
|
||||||
|
eq2 = VCEQQ(data, vc2);
|
||||||
|
eq = VORRQ(eq1, eq2);
|
||||||
|
|
||||||
|
#elif defined(FFCS_MASK)
|
||||||
|
eq = VORRQ(data, vmask);
|
||||||
|
eq = VCEQQ(eq, vc1);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(FFCPS)
|
||||||
|
# if defined (FFCPS_DIFF1)
|
||||||
|
data2 = VEXTQ(prev_data, data, VECTOR_FACTOR - 1);
|
||||||
|
# else
|
||||||
|
data2 = VLD1Q(*str_ptr - diff);
|
||||||
|
# if PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
|
data2 = VANDQ(data2, char_mask);
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# ifdef FFCPS_CHAR1A2A
|
||||||
|
data = VCEQQ(data, cmp1a);
|
||||||
|
data2 = VCEQQ(data2, cmp2a);
|
||||||
|
# else
|
||||||
|
if (compare1_type == compare_match1)
|
||||||
|
data = VCEQQ(data, cmp1a);
|
||||||
|
else
|
||||||
|
data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
|
||||||
|
if (compare2_type == compare_match1)
|
||||||
|
data2 = VCEQQ(data2, cmp2a);
|
||||||
|
else
|
||||||
|
data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
|
||||||
|
# endif
|
||||||
|
|
||||||
|
eq = VANDQ(data, data2);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
VST1Q(qw.mem, eq);
|
||||||
|
if (qw.dw[0])
|
||||||
|
*str_ptr += __builtin_ctzll(qw.dw[0]) / 8;
|
||||||
|
else if (qw.dw[1])
|
||||||
|
*str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8;
|
||||||
|
else {
|
||||||
|
*str_ptr += 16;
|
||||||
|
#if defined (FFCPS_DIFF1)
|
||||||
|
prev_data = orig_data;
|
||||||
|
#endif
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
match:;
|
||||||
|
if (*str_ptr >= str_end)
|
||||||
|
/* Failed match. */
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
#if defined(FF_UTF)
|
||||||
|
if (utf_continue((PCRE2_SPTR)*str_ptr - offs1))
|
||||||
|
{
|
||||||
|
/* Not a match. */
|
||||||
|
*str_ptr += IN_UCHARS(1);
|
||||||
|
goto restart;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Match. */
|
||||||
|
#if defined (FFCPS)
|
||||||
|
*str_ptr -= IN_UCHARS(offs1);
|
||||||
|
#endif
|
||||||
|
return *str_ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Failed match. */
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
2356
3rd/pcre2/src/pcre2_jit_simd_inc.h
Normal file
2356
3rd/pcre2/src/pcre2_jit_simd_inc.h
Normal file
@@ -0,0 +1,2356 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
This module by Zoltan Herczeg
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !(defined SUPPORT_VALGRIND)
|
||||||
|
|
||||||
|
#if ((defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
|
||||||
|
|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
|
||||||
|
|| (defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64))
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
vector_compare_match1,
|
||||||
|
vector_compare_match1i,
|
||||||
|
vector_compare_match2,
|
||||||
|
} vector_compare_type;
|
||||||
|
|
||||||
|
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
|
||||||
|
static SLJIT_INLINE sljit_s32 max_fast_forward_char_pair_offset(void)
|
||||||
|
{
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
/* The AVX2 code path is currently disabled. */
|
||||||
|
/* return sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? 31 : 15; */
|
||||||
|
return 15;
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
/* The AVX2 code path is currently disabled. */
|
||||||
|
/* return sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? 15 : 7; */
|
||||||
|
return 7;
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
/* The AVX2 code path is currently disabled. */
|
||||||
|
/* return sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? 7 : 3; */
|
||||||
|
return 3;
|
||||||
|
#else
|
||||||
|
#error "Unsupported unit width"
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#else /* !SLJIT_CONFIG_X86 */
|
||||||
|
static SLJIT_INLINE sljit_s32 max_fast_forward_char_pair_offset(void)
|
||||||
|
{
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
return 15;
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
return 7;
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
return 3;
|
||||||
|
#else
|
||||||
|
#error "Unsupported unit width"
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif /* SLJIT_CONFIG_X86 */
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg)
|
||||||
|
{
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
|
||||||
|
return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80);
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
|
||||||
|
return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00);
|
||||||
|
#else
|
||||||
|
#error "Unknown code width"
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* SLJIT_CONFIG_X86 || SLJIT_CONFIG_S390X */
|
||||||
|
|
||||||
|
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
|
||||||
|
|
||||||
|
static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
|
||||||
|
{
|
||||||
|
sljit_u32 value = chr;
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
#define SIMD_COMPARE_TYPE_INDEX 0
|
||||||
|
return (sljit_s32)((value << 24) | (value << 16) | (value << 8) | value);
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
#define SIMD_COMPARE_TYPE_INDEX 1
|
||||||
|
return (sljit_s32)((value << 16) | value);
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
#define SIMD_COMPARE_TYPE_INDEX 2
|
||||||
|
return (sljit_s32)(value);
|
||||||
|
#else
|
||||||
|
#error "Unsupported unit width"
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type,
|
||||||
|
sljit_s32 reg_type, int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
|
||||||
|
{
|
||||||
|
sljit_u8 instruction[4];
|
||||||
|
|
||||||
|
if (reg_type == SLJIT_SIMD_REG_128)
|
||||||
|
{
|
||||||
|
instruction[0] = 0x66;
|
||||||
|
instruction[1] = 0x0f;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Two byte VEX prefix. */
|
||||||
|
instruction[0] = 0xc5;
|
||||||
|
instruction[1] = 0xfd;
|
||||||
|
}
|
||||||
|
|
||||||
|
SLJIT_ASSERT(step >= 0 && step <= 3);
|
||||||
|
|
||||||
|
if (compare_type != vector_compare_match2)
|
||||||
|
{
|
||||||
|
if (step == 0)
|
||||||
|
{
|
||||||
|
if (compare_type == vector_compare_match1i)
|
||||||
|
{
|
||||||
|
/* POR xmm1, xmm2/m128 */
|
||||||
|
if (reg_type == SLJIT_SIMD_REG_256)
|
||||||
|
instruction[1] ^= (dst_ind << 3);
|
||||||
|
|
||||||
|
/* Prefix is filled. */
|
||||||
|
instruction[2] = 0xeb;
|
||||||
|
instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind;
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 4);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (step != 2)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* PCMPEQB/W/D xmm1, xmm2/m128 */
|
||||||
|
if (reg_type == SLJIT_SIMD_REG_256)
|
||||||
|
instruction[1] ^= (dst_ind << 3);
|
||||||
|
|
||||||
|
/* Prefix is filled. */
|
||||||
|
instruction[2] = 0x74 + SIMD_COMPARE_TYPE_INDEX;
|
||||||
|
instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 4);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (reg_type == SLJIT_SIMD_REG_256)
|
||||||
|
{
|
||||||
|
if (step == 2)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (step == 0)
|
||||||
|
{
|
||||||
|
step = 2;
|
||||||
|
instruction[1] ^= (dst_ind << 3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (step)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
SLJIT_ASSERT(reg_type == SLJIT_SIMD_REG_128);
|
||||||
|
|
||||||
|
/* MOVDQA xmm1, xmm2/m128 */
|
||||||
|
/* Prefix is filled. */
|
||||||
|
instruction[2] = 0x6f;
|
||||||
|
instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind;
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 4);
|
||||||
|
return;
|
||||||
|
|
||||||
|
case 1:
|
||||||
|
/* PCMPEQB/W/D xmm1, xmm2/m128 */
|
||||||
|
if (reg_type == SLJIT_SIMD_REG_256)
|
||||||
|
instruction[1] ^= (dst_ind << 3);
|
||||||
|
|
||||||
|
/* Prefix is filled. */
|
||||||
|
instruction[2] = 0x74 + SIMD_COMPARE_TYPE_INDEX;
|
||||||
|
instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 4);
|
||||||
|
return;
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
/* PCMPEQB/W/D xmm1, xmm2/m128 */
|
||||||
|
/* Prefix is filled. */
|
||||||
|
instruction[2] = 0x74 + SIMD_COMPARE_TYPE_INDEX;
|
||||||
|
instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind;
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 4);
|
||||||
|
return;
|
||||||
|
|
||||||
|
case 3:
|
||||||
|
/* POR xmm1, xmm2/m128 */
|
||||||
|
if (reg_type == SLJIT_SIMD_REG_256)
|
||||||
|
instruction[1] ^= (dst_ind << 3);
|
||||||
|
|
||||||
|
/* Prefix is filled. */
|
||||||
|
instruction[2] = 0xeb;
|
||||||
|
instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind;
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 4);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD))
|
||||||
|
|
||||||
|
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
|
||||||
|
{
|
||||||
|
DEFINE_COMPILER;
|
||||||
|
sljit_u8 instruction[8];
|
||||||
|
/* The AVX2 code path is currently disabled. */
|
||||||
|
/* sljit_s32 reg_type = sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? SLJIT_SIMD_REG_256 : SLJIT_SIMD_REG_128; */
|
||||||
|
sljit_s32 reg_type = SLJIT_SIMD_REG_128;
|
||||||
|
sljit_s32 value;
|
||||||
|
struct sljit_label *start;
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
struct sljit_label *restart;
|
||||||
|
#endif
|
||||||
|
struct sljit_jump *quit;
|
||||||
|
struct sljit_jump *partial_quit[2];
|
||||||
|
vector_compare_type compare_type = vector_compare_match1;
|
||||||
|
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||||
|
sljit_s32 data_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR0);
|
||||||
|
sljit_s32 cmp1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR1);
|
||||||
|
sljit_s32 cmp2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR2);
|
||||||
|
sljit_s32 tmp_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR3);
|
||||||
|
sljit_u32 bit = 0;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
SLJIT_UNUSED_ARG(offset);
|
||||||
|
|
||||||
|
if (char1 != char2)
|
||||||
|
{
|
||||||
|
bit = char1 ^ char2;
|
||||||
|
compare_type = vector_compare_match1i;
|
||||||
|
|
||||||
|
if (!is_powerof2(bit))
|
||||||
|
{
|
||||||
|
bit = 0;
|
||||||
|
compare_type = vector_compare_match2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||||
|
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||||
|
add_jump(compiler, &common->failed_match, partial_quit[0]);
|
||||||
|
|
||||||
|
/* First part (unaligned start) */
|
||||||
|
value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO;
|
||||||
|
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
|
||||||
|
|
||||||
|
if (char1 != char2)
|
||||||
|
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR2, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
|
||||||
|
|
||||||
|
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
|
||||||
|
|
||||||
|
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR1, SLJIT_VR1, 0);
|
||||||
|
|
||||||
|
if (char1 != char2)
|
||||||
|
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR2, SLJIT_VR2, 0);
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
restart = LABEL();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
value = (reg_type == SLJIT_SIMD_REG_256) ? 0x1f : 0xf;
|
||||||
|
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~value);
|
||||||
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, value);
|
||||||
|
|
||||||
|
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
|
||||||
|
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
|
||||||
|
|
||||||
|
for (i = 0; i < 4; i++)
|
||||||
|
fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||||
|
|
||||||
|
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||||
|
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||||
|
|
||||||
|
quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
|
||||||
|
|
||||||
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||||
|
|
||||||
|
/* Second part (aligned) */
|
||||||
|
start = LABEL();
|
||||||
|
|
||||||
|
value = (reg_type == SLJIT_SIMD_REG_256) ? 32 : 16;
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
|
||||||
|
|
||||||
|
partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||||
|
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||||
|
add_jump(compiler, &common->failed_match, partial_quit[1]);
|
||||||
|
|
||||||
|
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
|
||||||
|
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
|
||||||
|
for (i = 0; i < 4; i++)
|
||||||
|
fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||||
|
|
||||||
|
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
|
||||||
|
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
|
||||||
|
|
||||||
|
JUMPHERE(quit);
|
||||||
|
|
||||||
|
SLJIT_ASSERT(tmp1_reg_ind < 8);
|
||||||
|
/* BSF r32, r/m32 */
|
||||||
|
instruction[0] = 0x0f;
|
||||||
|
instruction[1] = 0xbc;
|
||||||
|
instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 3);
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||||
|
|
||||||
|
if (common->mode != PCRE2_JIT_COMPLETE)
|
||||||
|
{
|
||||||
|
JUMPHERE(partial_quit[0]);
|
||||||
|
JUMPHERE(partial_quit[1]);
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
|
||||||
|
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
if (common->utf && offset > 0)
|
||||||
|
{
|
||||||
|
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
|
||||||
|
|
||||||
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
|
||||||
|
|
||||||
|
quit = jump_if_utf_char_start(compiler, TMP1);
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||||
|
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
|
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
|
||||||
|
JUMPTO(SLJIT_JUMP, restart);
|
||||||
|
|
||||||
|
JUMPHERE(quit);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD))
|
||||||
|
|
||||||
|
static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
|
||||||
|
{
|
||||||
|
DEFINE_COMPILER;
|
||||||
|
sljit_u8 instruction[8];
|
||||||
|
/* The AVX2 code path is currently disabled. */
|
||||||
|
/* sljit_s32 reg_type = sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? SLJIT_SIMD_REG_256 : SLJIT_SIMD_REG_128; */
|
||||||
|
sljit_s32 reg_type = SLJIT_SIMD_REG_128;
|
||||||
|
sljit_s32 value;
|
||||||
|
struct sljit_label *start;
|
||||||
|
struct sljit_jump *quit;
|
||||||
|
jump_list *not_found = NULL;
|
||||||
|
vector_compare_type compare_type = vector_compare_match1;
|
||||||
|
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||||
|
sljit_s32 data_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR0);
|
||||||
|
sljit_s32 cmp1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR1);
|
||||||
|
sljit_s32 cmp2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR2);
|
||||||
|
sljit_s32 tmp_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR3);
|
||||||
|
sljit_u32 bit = 0;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (char1 != char2)
|
||||||
|
{
|
||||||
|
bit = char1 ^ char2;
|
||||||
|
compare_type = vector_compare_match1i;
|
||||||
|
|
||||||
|
if (!is_powerof2(bit))
|
||||||
|
{
|
||||||
|
bit = 0;
|
||||||
|
compare_type = vector_compare_match2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||||
|
OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
|
||||||
|
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
|
||||||
|
|
||||||
|
/* First part (unaligned start) */
|
||||||
|
|
||||||
|
value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO;
|
||||||
|
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
|
||||||
|
|
||||||
|
if (char1 != char2)
|
||||||
|
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR2, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
|
||||||
|
|
||||||
|
OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
|
||||||
|
|
||||||
|
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR1, SLJIT_VR1, 0);
|
||||||
|
|
||||||
|
if (char1 != char2)
|
||||||
|
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR2, SLJIT_VR2, 0);
|
||||||
|
|
||||||
|
value = (reg_type == SLJIT_SIMD_REG_256) ? 0x1f : 0xf;
|
||||||
|
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~value);
|
||||||
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, value);
|
||||||
|
|
||||||
|
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
|
||||||
|
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
|
||||||
|
|
||||||
|
for (i = 0; i < 4; i++)
|
||||||
|
fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||||
|
|
||||||
|
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||||
|
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||||
|
|
||||||
|
quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
|
||||||
|
|
||||||
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||||
|
|
||||||
|
/* Second part (aligned) */
|
||||||
|
start = LABEL();
|
||||||
|
|
||||||
|
value = (reg_type == SLJIT_SIMD_REG_256) ? 32 : 16;
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
|
||||||
|
|
||||||
|
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
|
|
||||||
|
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
|
||||||
|
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
|
||||||
|
|
||||||
|
for (i = 0; i < 4; i++)
|
||||||
|
fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||||
|
|
||||||
|
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
|
||||||
|
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
|
||||||
|
|
||||||
|
JUMPHERE(quit);
|
||||||
|
|
||||||
|
SLJIT_ASSERT(tmp1_reg_ind < 8);
|
||||||
|
/* BSF r32, r/m32 */
|
||||||
|
instruction[0] = 0x0f;
|
||||||
|
instruction[1] = 0xbc;
|
||||||
|
instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 3);
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0);
|
||||||
|
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||||
|
|
||||||
|
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
|
||||||
|
return not_found;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef _WIN64
|
||||||
|
|
||||||
|
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD))
|
||||||
|
|
||||||
|
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
|
||||||
|
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
|
||||||
|
{
|
||||||
|
DEFINE_COMPILER;
|
||||||
|
sljit_u8 instruction[8];
|
||||||
|
/* The AVX2 code path is currently disabled. */
|
||||||
|
/* sljit_s32 reg_type = sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? SLJIT_SIMD_REG_256 : SLJIT_SIMD_REG_128; */
|
||||||
|
sljit_s32 reg_type = SLJIT_SIMD_REG_128;
|
||||||
|
sljit_s32 value;
|
||||||
|
vector_compare_type compare1_type = vector_compare_match1;
|
||||||
|
vector_compare_type compare2_type = vector_compare_match1;
|
||||||
|
sljit_u32 bit1 = 0;
|
||||||
|
sljit_u32 bit2 = 0;
|
||||||
|
sljit_u32 diff = IN_UCHARS(offs1 - offs2);
|
||||||
|
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||||
|
sljit_s32 data1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR0);
|
||||||
|
sljit_s32 data2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR1);
|
||||||
|
sljit_s32 cmp1a_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR2);
|
||||||
|
sljit_s32 cmp2a_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR3);
|
||||||
|
sljit_s32 cmp1b_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR4);
|
||||||
|
sljit_s32 cmp2b_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR5);
|
||||||
|
sljit_s32 tmp1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR6);
|
||||||
|
sljit_s32 tmp2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_TMP_DEST_VREG);
|
||||||
|
struct sljit_label *start;
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
struct sljit_label *restart;
|
||||||
|
#endif
|
||||||
|
struct sljit_jump *jump[2];
|
||||||
|
int i;
|
||||||
|
|
||||||
|
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2 && offs2 >= 0);
|
||||||
|
SLJIT_ASSERT(diff <= (unsigned)IN_UCHARS(max_fast_forward_char_pair_offset()));
|
||||||
|
|
||||||
|
/* Initialize. */
|
||||||
|
if (common->match_end_ptr != 0)
|
||||||
|
{
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
|
||||||
|
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
|
||||||
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
|
||||||
|
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0);
|
||||||
|
SELECT(SLJIT_LESS, STR_END, TMP1, 0, STR_END);
|
||||||
|
}
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
|
||||||
|
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
|
|
||||||
|
if (char1a == char1b)
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
|
||||||
|
else
|
||||||
|
{
|
||||||
|
bit1 = char1a ^ char1b;
|
||||||
|
if (is_powerof2(bit1))
|
||||||
|
{
|
||||||
|
compare1_type = vector_compare_match1i;
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1));
|
||||||
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
compare1_type = vector_compare_match2;
|
||||||
|
bit1 = 0;
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
|
||||||
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO;
|
||||||
|
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR2, 0, TMP1, 0);
|
||||||
|
|
||||||
|
if (char1a != char1b)
|
||||||
|
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR4, 0, TMP2, 0);
|
||||||
|
|
||||||
|
if (char2a == char2b)
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
|
||||||
|
else
|
||||||
|
{
|
||||||
|
bit2 = char2a ^ char2b;
|
||||||
|
if (is_powerof2(bit2))
|
||||||
|
{
|
||||||
|
compare2_type = vector_compare_match1i;
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2));
|
||||||
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
compare2_type = vector_compare_match2;
|
||||||
|
bit2 = 0;
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
|
||||||
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR3, 0, TMP1, 0);
|
||||||
|
|
||||||
|
if (char2a != char2b)
|
||||||
|
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR5, 0, TMP2, 0);
|
||||||
|
|
||||||
|
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR2, SLJIT_VR2, 0);
|
||||||
|
if (char1a != char1b)
|
||||||
|
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR4, SLJIT_VR4, 0);
|
||||||
|
|
||||||
|
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR3, SLJIT_VR3, 0);
|
||||||
|
if (char2a != char2b)
|
||||||
|
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR5, SLJIT_VR5, 0);
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
restart = LABEL();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff);
|
||||||
|
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
|
||||||
|
value = (reg_type == SLJIT_SIMD_REG_256) ? ~0x1f : ~0xf;
|
||||||
|
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
|
||||||
|
|
||||||
|
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
|
||||||
|
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
|
||||||
|
|
||||||
|
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0);
|
||||||
|
|
||||||
|
sljit_emit_simd_mov(compiler, reg_type, SLJIT_VR1, SLJIT_MEM1(STR_PTR), -(sljit_sw)diff);
|
||||||
|
jump[1] = JUMP(SLJIT_JUMP);
|
||||||
|
|
||||||
|
JUMPHERE(jump[0]);
|
||||||
|
|
||||||
|
if (reg_type == SLJIT_SIMD_REG_256)
|
||||||
|
{
|
||||||
|
if (diff != 16)
|
||||||
|
{
|
||||||
|
/* PSLLDQ ymm1, ymm2, imm8 */
|
||||||
|
instruction[0] = 0xc5;
|
||||||
|
instruction[1] = (sljit_u8)(0xf9 ^ (data2_ind << 3));
|
||||||
|
instruction[2] = 0x73;
|
||||||
|
instruction[3] = 0xc0 | (7 << 3) | data1_ind;
|
||||||
|
instruction[4] = diff & 0xf;
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
instruction[0] = 0xc4;
|
||||||
|
instruction[1] = 0xe3;
|
||||||
|
if (diff < 16)
|
||||||
|
{
|
||||||
|
/* VINSERTI128 xmm1, xmm2, xmm3/m128 */
|
||||||
|
/* instruction[0] = 0xc4; */
|
||||||
|
/* instruction[1] = 0xe3; */
|
||||||
|
instruction[2] = (sljit_u8)(0x7d ^ (data2_ind << 3));
|
||||||
|
instruction[3] = 0x38;
|
||||||
|
SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR) <= 7);
|
||||||
|
instruction[4] = 0x40 | (data2_ind << 3) | sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
|
||||||
|
instruction[5] = (sljit_u8)(16 - diff);
|
||||||
|
instruction[6] = 1;
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 7);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* VPERM2I128 xmm1, xmm2, xmm3/m128 */
|
||||||
|
/* instruction[0] = 0xc4; */
|
||||||
|
/* instruction[1] = 0xe3; */
|
||||||
|
value = (diff == 16) ? data1_ind : data2_ind;
|
||||||
|
instruction[2] = (sljit_u8)(0x7d ^ (value << 3));
|
||||||
|
instruction[3] = 0x46;
|
||||||
|
instruction[4] = 0xc0 | (data2_ind << 3) | value;
|
||||||
|
instruction[5] = 0x08;
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* MOVDQA xmm1, xmm2/m128 */
|
||||||
|
instruction[0] = 0x66;
|
||||||
|
instruction[1] = 0x0f;
|
||||||
|
instruction[2] = 0x6f;
|
||||||
|
instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind;
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 4);
|
||||||
|
|
||||||
|
/* PSLLDQ xmm1, imm8 */
|
||||||
|
/* instruction[0] = 0x66; */
|
||||||
|
/* instruction[1] = 0x0f; */
|
||||||
|
instruction[2] = 0x73;
|
||||||
|
instruction[3] = 0xc0 | (7 << 3) | data2_ind;
|
||||||
|
instruction[4] = diff;
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
JUMPHERE(jump[1]);
|
||||||
|
|
||||||
|
value = (reg_type == SLJIT_SIMD_REG_256) ? 0x1f : 0xf;
|
||||||
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, value);
|
||||||
|
|
||||||
|
for (i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
fast_forward_char_pair_sse2_compare(compiler, compare2_type, reg_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
|
||||||
|
fast_forward_char_pair_sse2_compare(compiler, compare1_type, reg_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
|
||||||
|
}
|
||||||
|
|
||||||
|
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_VR0, SLJIT_VR0, SLJIT_VR1, 0);
|
||||||
|
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
|
||||||
|
|
||||||
|
/* Ignore matches before the first STR_PTR. */
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||||
|
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||||
|
|
||||||
|
jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
|
||||||
|
|
||||||
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||||
|
|
||||||
|
/* Main loop. */
|
||||||
|
start = LABEL();
|
||||||
|
|
||||||
|
value = (reg_type == SLJIT_SIMD_REG_256) ? 32 : 16;
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
|
||||||
|
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
|
|
||||||
|
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
|
||||||
|
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
|
||||||
|
sljit_emit_simd_mov(compiler, reg_type, SLJIT_VR1, SLJIT_MEM1(STR_PTR), -(sljit_sw)diff);
|
||||||
|
|
||||||
|
for (i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
fast_forward_char_pair_sse2_compare(compiler, compare1_type, reg_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind);
|
||||||
|
fast_forward_char_pair_sse2_compare(compiler, compare2_type, reg_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind);
|
||||||
|
}
|
||||||
|
|
||||||
|
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_VR0, SLJIT_VR0, SLJIT_VR1, 0);
|
||||||
|
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
|
||||||
|
|
||||||
|
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
|
||||||
|
|
||||||
|
JUMPHERE(jump[0]);
|
||||||
|
|
||||||
|
SLJIT_ASSERT(tmp1_reg_ind < 8);
|
||||||
|
/* BSF r32, r/m32 */
|
||||||
|
instruction[0] = 0x0f;
|
||||||
|
instruction[1] = 0xbc;
|
||||||
|
instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 3);
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||||
|
|
||||||
|
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
if (common->utf)
|
||||||
|
{
|
||||||
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
|
||||||
|
|
||||||
|
jump[0] = jump_if_utf_char_start(compiler, TMP1);
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||||
|
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
|
||||||
|
|
||||||
|
add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));
|
||||||
|
|
||||||
|
JUMPHERE(jump[0]);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
|
||||||
|
|
||||||
|
if (common->match_end_ptr != 0)
|
||||||
|
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* !_WIN64 */
|
||||||
|
|
||||||
|
#undef SIMD_COMPARE_TYPE_INDEX
|
||||||
|
|
||||||
|
#endif /* SLJIT_CONFIG_X86 */
|
||||||
|
|
||||||
|
#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__))
|
||||||
|
|
||||||
|
#include <arm_neon.h>
|
||||||
|
|
||||||
|
typedef union {
|
||||||
|
unsigned int x;
|
||||||
|
struct { unsigned char c1, c2, c3, c4; } c;
|
||||||
|
} int_char;
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
static SLJIT_INLINE int utf_continue(PCRE2_SPTR s)
|
||||||
|
{
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
return (*s & 0xc0) == 0x80;
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
return (*s & 0xfc00) == 0xdc00;
|
||||||
|
#else
|
||||||
|
#error "Unknown code width"
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
# define VECTOR_FACTOR 16
|
||||||
|
# define vect_t uint8x16_t
|
||||||
|
# define VLD1Q(X) vld1q_u8((sljit_u8 *)(X))
|
||||||
|
# define VCEQQ vceqq_u8
|
||||||
|
# define VORRQ vorrq_u8
|
||||||
|
# define VST1Q vst1q_u8
|
||||||
|
# define VDUPQ vdupq_n_u8
|
||||||
|
# define VEXTQ vextq_u8
|
||||||
|
# define VANDQ vandq_u8
|
||||||
|
typedef union {
|
||||||
|
uint8_t mem[16];
|
||||||
|
uint64_t dw[2];
|
||||||
|
} quad_word;
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
# define VECTOR_FACTOR 8
|
||||||
|
# define vect_t uint16x8_t
|
||||||
|
# define VLD1Q(X) vld1q_u16((sljit_u16 *)(X))
|
||||||
|
# define VCEQQ vceqq_u16
|
||||||
|
# define VORRQ vorrq_u16
|
||||||
|
# define VST1Q vst1q_u16
|
||||||
|
# define VDUPQ vdupq_n_u16
|
||||||
|
# define VEXTQ vextq_u16
|
||||||
|
# define VANDQ vandq_u16
|
||||||
|
typedef union {
|
||||||
|
uint16_t mem[8];
|
||||||
|
uint64_t dw[2];
|
||||||
|
} quad_word;
|
||||||
|
#else
|
||||||
|
# define VECTOR_FACTOR 4
|
||||||
|
# define vect_t uint32x4_t
|
||||||
|
# define VLD1Q(X) vld1q_u32((sljit_u32 *)(X))
|
||||||
|
# define VCEQQ vceqq_u32
|
||||||
|
# define VORRQ vorrq_u32
|
||||||
|
# define VST1Q vst1q_u32
|
||||||
|
# define VDUPQ vdupq_n_u32
|
||||||
|
# define VEXTQ vextq_u32
|
||||||
|
# define VANDQ vandq_u32
|
||||||
|
typedef union {
|
||||||
|
uint32_t mem[4];
|
||||||
|
uint64_t dw[2];
|
||||||
|
} quad_word;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define FFCS
|
||||||
|
#include "pcre2_jit_neon_inc.h"
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
# define FF_UTF
|
||||||
|
# include "pcre2_jit_neon_inc.h"
|
||||||
|
# undef FF_UTF
|
||||||
|
#endif
|
||||||
|
#undef FFCS
|
||||||
|
|
||||||
|
#define FFCS_2
|
||||||
|
#include "pcre2_jit_neon_inc.h"
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
# define FF_UTF
|
||||||
|
# include "pcre2_jit_neon_inc.h"
|
||||||
|
# undef FF_UTF
|
||||||
|
#endif
|
||||||
|
#undef FFCS_2
|
||||||
|
|
||||||
|
#define FFCS_MASK
|
||||||
|
#include "pcre2_jit_neon_inc.h"
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
# define FF_UTF
|
||||||
|
# include "pcre2_jit_neon_inc.h"
|
||||||
|
# undef FF_UTF
|
||||||
|
#endif
|
||||||
|
#undef FFCS_MASK
|
||||||
|
|
||||||
|
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
|
||||||
|
|
||||||
|
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
|
||||||
|
{
|
||||||
|
DEFINE_COMPILER;
|
||||||
|
int_char ic;
|
||||||
|
struct sljit_jump *partial_quit, *quit;
|
||||||
|
/* Save temporary registers. */
|
||||||
|
SLJIT_ASSERT(common->locals_size >= 2 * (int)sizeof(sljit_sw));
|
||||||
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, STR_PTR, 0);
|
||||||
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, TMP3, 0);
|
||||||
|
|
||||||
|
/* Prepare function arguments */
|
||||||
|
OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
|
||||||
|
GET_LOCAL_BASE(SLJIT_R1, 0, LOCAL0);
|
||||||
|
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, offset);
|
||||||
|
|
||||||
|
if (char1 == char2)
|
||||||
|
{
|
||||||
|
ic.c.c1 = char1;
|
||||||
|
ic.c.c2 = char2;
|
||||||
|
OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
if (common->utf && offset > 0)
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||||
|
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_utf));
|
||||||
|
else
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||||
|
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs));
|
||||||
|
#else
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||||
|
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PCRE2_UCHAR mask = char1 ^ char2;
|
||||||
|
if (is_powerof2(mask))
|
||||||
|
{
|
||||||
|
ic.c.c1 = char1 | mask;
|
||||||
|
ic.c.c2 = mask;
|
||||||
|
OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
if (common->utf && offset > 0)
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||||
|
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask_utf));
|
||||||
|
else
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||||
|
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask));
|
||||||
|
#else
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||||
|
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ic.c.c1 = char1;
|
||||||
|
ic.c.c2 = char2;
|
||||||
|
OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
if (common->utf && offset > 0)
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||||
|
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2_utf));
|
||||||
|
else
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||||
|
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2));
|
||||||
|
#else
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||||
|
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Restore registers. */
|
||||||
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
|
||||||
|
OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
|
||||||
|
|
||||||
|
/* Check return value. */
|
||||||
|
partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
|
||||||
|
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||||
|
add_jump(compiler, &common->failed_match, partial_quit);
|
||||||
|
|
||||||
|
/* Fast forward STR_PTR to the result of memchr. */
|
||||||
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
|
||||||
|
if (common->mode != PCRE2_JIT_COMPLETE)
|
||||||
|
{
|
||||||
|
quit = CMP(SLJIT_NOT_ZERO, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
|
||||||
|
JUMPHERE(partial_quit);
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
|
||||||
|
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
|
||||||
|
JUMPHERE(quit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
compare_match1,
|
||||||
|
compare_match1i,
|
||||||
|
compare_match2,
|
||||||
|
} compare_type;
|
||||||
|
|
||||||
|
static inline vect_t fast_forward_char_pair_compare(compare_type ctype, vect_t dst, vect_t cmp1, vect_t cmp2)
|
||||||
|
{
|
||||||
|
if (ctype == compare_match2)
|
||||||
|
{
|
||||||
|
vect_t tmp = dst;
|
||||||
|
dst = VCEQQ(dst, cmp1);
|
||||||
|
tmp = VCEQQ(tmp, cmp2);
|
||||||
|
dst = VORRQ(dst, tmp);
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ctype == compare_match1i)
|
||||||
|
dst = VORRQ(dst, cmp2);
|
||||||
|
dst = VCEQQ(dst, cmp1);
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
|
||||||
|
{
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
return 15;
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
return 7;
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
return 3;
|
||||||
|
#else
|
||||||
|
#error "Unsupported unit width"
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ARM doesn't have a shift left across lanes. */
|
||||||
|
static SLJIT_INLINE vect_t shift_left_n_lanes(vect_t a, sljit_u8 n)
|
||||||
|
{
|
||||||
|
vect_t zero = VDUPQ(0);
|
||||||
|
SLJIT_ASSERT(0 < n && n < VECTOR_FACTOR);
|
||||||
|
/* VEXTQ takes an immediate as last argument. */
|
||||||
|
#define C(X) case X: return VEXTQ(zero, a, VECTOR_FACTOR - X);
|
||||||
|
switch (n)
|
||||||
|
{
|
||||||
|
C(1); C(2); C(3);
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
C(4); C(5); C(6); C(7);
|
||||||
|
# if PCRE2_CODE_UNIT_WIDTH != 16
|
||||||
|
C(8); C(9); C(10); C(11); C(12); C(13); C(14); C(15);
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
/* Based on the ASSERT(0 < n && n < VECTOR_FACTOR) above, this won't
|
||||||
|
happen. The return is still here for compilers to not warn. */
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define FFCPS
|
||||||
|
#define FFCPS_DIFF1
|
||||||
|
#define FFCPS_CHAR1A2A
|
||||||
|
|
||||||
|
#define FFCPS_0
|
||||||
|
#include "pcre2_jit_neon_inc.h"
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
# define FF_UTF
|
||||||
|
# include "pcre2_jit_neon_inc.h"
|
||||||
|
# undef FF_UTF
|
||||||
|
#endif
|
||||||
|
#undef FFCPS_0
|
||||||
|
|
||||||
|
#undef FFCPS_CHAR1A2A
|
||||||
|
|
||||||
|
#define FFCPS_1
|
||||||
|
#include "pcre2_jit_neon_inc.h"
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
# define FF_UTF
|
||||||
|
# include "pcre2_jit_neon_inc.h"
|
||||||
|
# undef FF_UTF
|
||||||
|
#endif
|
||||||
|
#undef FFCPS_1
|
||||||
|
|
||||||
|
#undef FFCPS_DIFF1
|
||||||
|
|
||||||
|
#define FFCPS_DEFAULT
|
||||||
|
#include "pcre2_jit_neon_inc.h"
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
# define FF_UTF
|
||||||
|
# include "pcre2_jit_neon_inc.h"
|
||||||
|
# undef FF_UTF
|
||||||
|
#endif
|
||||||
|
#undef FFCPS
|
||||||
|
|
||||||
|
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1
|
||||||
|
|
||||||
|
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
|
||||||
|
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
|
||||||
|
{
|
||||||
|
DEFINE_COMPILER;
|
||||||
|
sljit_u32 diff = IN_UCHARS(offs1 - offs2);
|
||||||
|
struct sljit_jump *partial_quit;
|
||||||
|
int_char ic;
|
||||||
|
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
|
||||||
|
SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset()));
|
||||||
|
SLJIT_ASSERT(compiler->scratches == 5);
|
||||||
|
|
||||||
|
/* Save temporary register STR_PTR. */
|
||||||
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, STR_PTR, 0);
|
||||||
|
|
||||||
|
/* Prepare arguments for the function call. */
|
||||||
|
if (common->match_end_ptr == 0)
|
||||||
|
OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
|
||||||
|
OP2(SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
|
||||||
|
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, SLJIT_R0, 0);
|
||||||
|
SELECT(SLJIT_LESS, SLJIT_R0, STR_END, 0, SLJIT_R0);
|
||||||
|
}
|
||||||
|
|
||||||
|
GET_LOCAL_BASE(SLJIT_R1, 0, LOCAL0);
|
||||||
|
OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_IMM, offs1);
|
||||||
|
OP1(SLJIT_MOV_S32, SLJIT_R3, 0, SLJIT_IMM, offs2);
|
||||||
|
ic.c.c1 = char1a;
|
||||||
|
ic.c.c2 = char1b;
|
||||||
|
ic.c.c3 = char2a;
|
||||||
|
ic.c.c4 = char2b;
|
||||||
|
OP1(SLJIT_MOV_U32, SLJIT_R4, 0, SLJIT_IMM, ic.x);
|
||||||
|
|
||||||
|
if (diff == 1) {
|
||||||
|
if (char1a == char1b && char2a == char2b) {
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
if (common->utf)
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||||
|
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0_utf));
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||||
|
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0));
|
||||||
|
} else {
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
if (common->utf)
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||||
|
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1_utf));
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||||
|
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
if (common->utf)
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||||
|
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default_utf));
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
|
||||||
|
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Restore STR_PTR register. */
|
||||||
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
|
||||||
|
|
||||||
|
/* Check return value. */
|
||||||
|
partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
|
||||||
|
add_jump(compiler, &common->failed_match, partial_quit);
|
||||||
|
|
||||||
|
/* Fast forward STR_PTR to the result of memchr. */
|
||||||
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
|
||||||
|
|
||||||
|
JUMPHERE(partial_quit);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 */
|
||||||
|
|
||||||
|
#if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
#define VECTOR_ELEMENT_SIZE 0
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
#define VECTOR_ELEMENT_SIZE 1
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
#define VECTOR_ELEMENT_SIZE 2
|
||||||
|
#else
|
||||||
|
#error "Unsupported unit width"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void load_from_mem_vector(struct sljit_compiler *compiler, BOOL vlbb, sljit_s32 dst_vreg,
|
||||||
|
sljit_s32 base_reg, sljit_s32 index_reg)
|
||||||
|
{
|
||||||
|
sljit_u16 instruction[3];
|
||||||
|
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | index_reg);
|
||||||
|
instruction[1] = (sljit_u16)(base_reg << 12);
|
||||||
|
instruction[2] = (sljit_u16)((0x8 << 8) | (vlbb ? 0x07 : 0x06));
|
||||||
|
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
|
||||||
|
static void replicate_imm_vector(struct sljit_compiler *compiler, int step, sljit_s32 dst_vreg,
|
||||||
|
PCRE2_UCHAR chr, sljit_s32 tmp_general_reg)
|
||||||
|
{
|
||||||
|
sljit_u16 instruction[3];
|
||||||
|
|
||||||
|
SLJIT_ASSERT(step >= 0 && step <= 1);
|
||||||
|
|
||||||
|
if (chr < 0x7fff)
|
||||||
|
{
|
||||||
|
if (step == 1)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* VREPI */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4));
|
||||||
|
instruction[1] = (sljit_u16)chr;
|
||||||
|
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (step == 0)
|
||||||
|
{
|
||||||
|
OP1(SLJIT_MOV, tmp_general_reg, 0, SLJIT_IMM, chr);
|
||||||
|
|
||||||
|
/* VLVG */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | sljit_get_register_index(SLJIT_GP_REGISTER, tmp_general_reg));
|
||||||
|
instruction[1] = 0;
|
||||||
|
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x22);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* VREP */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | dst_vreg);
|
||||||
|
instruction[1] = 0;
|
||||||
|
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xc << 8) | 0x4d);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type,
|
||||||
|
int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
|
||||||
|
{
|
||||||
|
sljit_u16 instruction[3];
|
||||||
|
|
||||||
|
SLJIT_ASSERT(step >= 0 && step <= 2);
|
||||||
|
|
||||||
|
if (step == 1)
|
||||||
|
{
|
||||||
|
/* VCEQ */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
|
||||||
|
instruction[1] = (sljit_u16)(cmp1_ind << 12);
|
||||||
|
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (compare_type != vector_compare_match2)
|
||||||
|
{
|
||||||
|
if (step == 0 && compare_type == vector_compare_match1i)
|
||||||
|
{
|
||||||
|
/* VO */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
|
||||||
|
instruction[1] = (sljit_u16)(cmp2_ind << 12);
|
||||||
|
instruction[2] = (sljit_u16)((0xe << 8) | 0x6a);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (step)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
/* VCEQ */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (tmp_ind << 4) | dst_ind);
|
||||||
|
instruction[1] = (sljit_u16)(cmp2_ind << 12);
|
||||||
|
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
return;
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
/* VO */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
|
||||||
|
instruction[1] = (sljit_u16)(tmp_ind << 12);
|
||||||
|
instruction[2] = (sljit_u16)((0xe << 8) | 0x6a);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
|
||||||
|
|
||||||
|
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
|
||||||
|
{
|
||||||
|
DEFINE_COMPILER;
|
||||||
|
sljit_u16 instruction[3];
|
||||||
|
struct sljit_label *start;
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
struct sljit_label *restart;
|
||||||
|
#endif
|
||||||
|
struct sljit_jump *quit;
|
||||||
|
struct sljit_jump *partial_quit[2];
|
||||||
|
vector_compare_type compare_type = vector_compare_match1;
|
||||||
|
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||||
|
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
|
||||||
|
sljit_s32 data_ind = 0;
|
||||||
|
sljit_s32 tmp_ind = 1;
|
||||||
|
sljit_s32 cmp1_ind = 2;
|
||||||
|
sljit_s32 cmp2_ind = 3;
|
||||||
|
sljit_s32 zero_ind = 4;
|
||||||
|
sljit_u32 bit = 0;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
SLJIT_UNUSED_ARG(offset);
|
||||||
|
|
||||||
|
if (char1 != char2)
|
||||||
|
{
|
||||||
|
bit = char1 ^ char2;
|
||||||
|
compare_type = vector_compare_match1i;
|
||||||
|
|
||||||
|
if (!is_powerof2(bit))
|
||||||
|
{
|
||||||
|
bit = 0;
|
||||||
|
compare_type = vector_compare_match2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||||
|
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||||
|
add_jump(compiler, &common->failed_match, partial_quit[0]);
|
||||||
|
|
||||||
|
/* First part (unaligned start) */
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
|
||||||
|
/* VREPI */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4));
|
||||||
|
instruction[1] = (sljit_u16)(char1 | bit);
|
||||||
|
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
|
||||||
|
if (char1 != char2)
|
||||||
|
{
|
||||||
|
/* VREPI */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4));
|
||||||
|
instruction[1] = (sljit_u16)(bit != 0 ? bit : char2);
|
||||||
|
/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||||
|
|
||||||
|
for (int i = 0; i < 2; i++)
|
||||||
|
{
|
||||||
|
replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP1);
|
||||||
|
|
||||||
|
if (char1 != char2)
|
||||||
|
replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||||
|
|
||||||
|
if (compare_type == vector_compare_match2)
|
||||||
|
{
|
||||||
|
/* VREPI */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
|
||||||
|
instruction[1] = 0;
|
||||||
|
instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
restart = LABEL();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0);
|
||||||
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15);
|
||||||
|
|
||||||
|
if (compare_type != vector_compare_match2)
|
||||||
|
{
|
||||||
|
if (compare_type == vector_compare_match1i)
|
||||||
|
fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||||
|
|
||||||
|
/* VFEE */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||||
|
instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
|
||||||
|
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (i = 0; i < 3; i++)
|
||||||
|
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||||
|
|
||||||
|
/* VFENE */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||||
|
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
||||||
|
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* VLGVB */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind);
|
||||||
|
instruction[1] = 7;
|
||||||
|
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||||
|
quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
|
||||||
|
|
||||||
|
OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16);
|
||||||
|
|
||||||
|
/* Second part (aligned) */
|
||||||
|
start = LABEL();
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||||
|
|
||||||
|
partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||||
|
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||||
|
add_jump(compiler, &common->failed_match, partial_quit[1]);
|
||||||
|
|
||||||
|
load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0);
|
||||||
|
|
||||||
|
if (compare_type != vector_compare_match2)
|
||||||
|
{
|
||||||
|
if (compare_type == vector_compare_match1i)
|
||||||
|
fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||||
|
|
||||||
|
/* VFEE */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||||
|
instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
|
||||||
|
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (i = 0; i < 3; i++)
|
||||||
|
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||||
|
|
||||||
|
/* VFENE */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||||
|
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
||||||
|
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
}
|
||||||
|
|
||||||
|
sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW);
|
||||||
|
JUMPTO(SLJIT_OVERFLOW, start);
|
||||||
|
|
||||||
|
/* VLGVB */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind);
|
||||||
|
instruction[1] = 7;
|
||||||
|
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||||
|
|
||||||
|
JUMPHERE(quit);
|
||||||
|
|
||||||
|
if (common->mode != PCRE2_JIT_COMPLETE)
|
||||||
|
{
|
||||||
|
JUMPHERE(partial_quit[0]);
|
||||||
|
JUMPHERE(partial_quit[1]);
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
|
||||||
|
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
if (common->utf && offset > 0)
|
||||||
|
{
|
||||||
|
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
|
||||||
|
|
||||||
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
|
||||||
|
|
||||||
|
quit = jump_if_utf_char_start(compiler, TMP1);
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||||
|
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||||
|
JUMPTO(SLJIT_JUMP, restart);
|
||||||
|
|
||||||
|
JUMPHERE(quit);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD 1
|
||||||
|
|
||||||
|
static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
|
||||||
|
{
|
||||||
|
DEFINE_COMPILER;
|
||||||
|
sljit_u16 instruction[3];
|
||||||
|
struct sljit_label *start;
|
||||||
|
struct sljit_jump *quit;
|
||||||
|
jump_list *not_found = NULL;
|
||||||
|
vector_compare_type compare_type = vector_compare_match1;
|
||||||
|
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||||
|
sljit_s32 tmp3_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP3);
|
||||||
|
sljit_s32 data_ind = 0;
|
||||||
|
sljit_s32 tmp_ind = 1;
|
||||||
|
sljit_s32 cmp1_ind = 2;
|
||||||
|
sljit_s32 cmp2_ind = 3;
|
||||||
|
sljit_s32 zero_ind = 4;
|
||||||
|
sljit_u32 bit = 0;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (char1 != char2)
|
||||||
|
{
|
||||||
|
bit = char1 ^ char2;
|
||||||
|
compare_type = vector_compare_match1i;
|
||||||
|
|
||||||
|
if (!is_powerof2(bit))
|
||||||
|
{
|
||||||
|
bit = 0;
|
||||||
|
compare_type = vector_compare_match2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||||
|
|
||||||
|
/* First part (unaligned start) */
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, 16);
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
|
||||||
|
/* VREPI */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4));
|
||||||
|
instruction[1] = (sljit_u16)(char1 | bit);
|
||||||
|
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
|
||||||
|
if (char1 != char2)
|
||||||
|
{
|
||||||
|
/* VREPI */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4));
|
||||||
|
instruction[1] = (sljit_u16)(bit != 0 ? bit : char2);
|
||||||
|
/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||||
|
|
||||||
|
for (int i = 0; i < 2; i++)
|
||||||
|
{
|
||||||
|
replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP3);
|
||||||
|
|
||||||
|
if (char1 != char2)
|
||||||
|
replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP3);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||||
|
|
||||||
|
if (compare_type == vector_compare_match2)
|
||||||
|
{
|
||||||
|
/* VREPI */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
|
||||||
|
instruction[1] = 0;
|
||||||
|
instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
}
|
||||||
|
|
||||||
|
load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0);
|
||||||
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15);
|
||||||
|
|
||||||
|
if (compare_type != vector_compare_match2)
|
||||||
|
{
|
||||||
|
if (compare_type == vector_compare_match1i)
|
||||||
|
fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||||
|
|
||||||
|
/* VFEE */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||||
|
instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
|
||||||
|
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (i = 0; i < 3; i++)
|
||||||
|
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||||
|
|
||||||
|
/* VFENE */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||||
|
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
||||||
|
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* VLGVB */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind);
|
||||||
|
instruction[1] = 7;
|
||||||
|
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
|
||||||
|
quit = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0);
|
||||||
|
|
||||||
|
OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 16);
|
||||||
|
|
||||||
|
/* Second part (aligned) */
|
||||||
|
start = LABEL();
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 16);
|
||||||
|
|
||||||
|
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||||
|
|
||||||
|
load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0);
|
||||||
|
|
||||||
|
if (compare_type != vector_compare_match2)
|
||||||
|
{
|
||||||
|
if (compare_type == vector_compare_match1i)
|
||||||
|
fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||||
|
|
||||||
|
/* VFEE */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||||
|
instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
|
||||||
|
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (i = 0; i < 3; i++)
|
||||||
|
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||||
|
|
||||||
|
/* VFENE */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
||||||
|
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
||||||
|
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
}
|
||||||
|
|
||||||
|
sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW);
|
||||||
|
JUMPTO(SLJIT_OVERFLOW, start);
|
||||||
|
|
||||||
|
/* VLGVB */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind);
|
||||||
|
instruction[1] = 7;
|
||||||
|
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
|
||||||
|
|
||||||
|
JUMPHERE(quit);
|
||||||
|
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||||
|
|
||||||
|
return not_found;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1
|
||||||
|
|
||||||
|
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
|
||||||
|
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
|
||||||
|
{
|
||||||
|
DEFINE_COMPILER;
|
||||||
|
sljit_u16 instruction[3];
|
||||||
|
struct sljit_label *start;
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
struct sljit_label *restart;
|
||||||
|
#endif
|
||||||
|
struct sljit_jump *quit;
|
||||||
|
struct sljit_jump *jump[2];
|
||||||
|
vector_compare_type compare1_type = vector_compare_match1;
|
||||||
|
vector_compare_type compare2_type = vector_compare_match1;
|
||||||
|
sljit_u32 bit1 = 0;
|
||||||
|
sljit_u32 bit2 = 0;
|
||||||
|
sljit_s32 diff = IN_UCHARS(offs2 - offs1);
|
||||||
|
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||||
|
sljit_s32 tmp2_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP2);
|
||||||
|
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
|
||||||
|
sljit_s32 data1_ind = 0;
|
||||||
|
sljit_s32 data2_ind = 1;
|
||||||
|
sljit_s32 tmp1_ind = 2;
|
||||||
|
sljit_s32 tmp2_ind = 3;
|
||||||
|
sljit_s32 cmp1a_ind = 4;
|
||||||
|
sljit_s32 cmp1b_ind = 5;
|
||||||
|
sljit_s32 cmp2a_ind = 6;
|
||||||
|
sljit_s32 cmp2b_ind = 7;
|
||||||
|
sljit_s32 zero_ind = 8;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
|
||||||
|
SLJIT_ASSERT(-diff <= (sljit_s32)IN_UCHARS(max_fast_forward_char_pair_offset()));
|
||||||
|
SLJIT_ASSERT(tmp1_reg_ind != 0 && tmp2_reg_ind != 0);
|
||||||
|
|
||||||
|
if (char1a != char1b)
|
||||||
|
{
|
||||||
|
bit1 = char1a ^ char1b;
|
||||||
|
compare1_type = vector_compare_match1i;
|
||||||
|
|
||||||
|
if (!is_powerof2(bit1))
|
||||||
|
{
|
||||||
|
bit1 = 0;
|
||||||
|
compare1_type = vector_compare_match2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (char2a != char2b)
|
||||||
|
{
|
||||||
|
bit2 = char2a ^ char2b;
|
||||||
|
compare2_type = vector_compare_match1i;
|
||||||
|
|
||||||
|
if (!is_powerof2(bit2))
|
||||||
|
{
|
||||||
|
bit2 = 0;
|
||||||
|
compare2_type = vector_compare_match2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Initialize. */
|
||||||
|
if (common->match_end_ptr != 0)
|
||||||
|
{
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
|
||||||
|
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
|
||||||
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
|
||||||
|
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0);
|
||||||
|
SELECT(SLJIT_LESS, STR_END, TMP1, 0, STR_END);
|
||||||
|
}
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
|
||||||
|
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
|
OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15);
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
|
||||||
|
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
|
||||||
|
|
||||||
|
/* VREPI */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (cmp1a_ind << 4));
|
||||||
|
instruction[1] = (sljit_u16)(char1a | bit1);
|
||||||
|
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
|
||||||
|
if (char1a != char1b)
|
||||||
|
{
|
||||||
|
/* VREPI */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (cmp1b_ind << 4));
|
||||||
|
instruction[1] = (sljit_u16)(bit1 != 0 ? bit1 : char1b);
|
||||||
|
/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* VREPI */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (cmp2a_ind << 4));
|
||||||
|
instruction[1] = (sljit_u16)(char2a | bit2);
|
||||||
|
/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
|
||||||
|
if (char2a != char2b)
|
||||||
|
{
|
||||||
|
/* VREPI */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (cmp2b_ind << 4));
|
||||||
|
instruction[1] = (sljit_u16)(bit2 != 0 ? bit2 : char2b);
|
||||||
|
/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||||
|
|
||||||
|
for (int i = 0; i < 2; i++)
|
||||||
|
{
|
||||||
|
replicate_imm_vector(compiler, i, cmp1a_ind, char1a | bit1, TMP1);
|
||||||
|
|
||||||
|
if (char1a != char1b)
|
||||||
|
replicate_imm_vector(compiler, i, cmp1b_ind, bit1 != 0 ? bit1 : char1b, TMP1);
|
||||||
|
|
||||||
|
replicate_imm_vector(compiler, i, cmp2a_ind, char2a | bit2, TMP1);
|
||||||
|
|
||||||
|
if (char2a != char2b)
|
||||||
|
replicate_imm_vector(compiler, i, cmp2b_ind, bit2 != 0 ? bit2 : char2b, TMP1);
|
||||||
|
}
|
||||||
|
|
||||||
|
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
|
||||||
|
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||||
|
|
||||||
|
/* VREPI */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
|
||||||
|
instruction[1] = 0;
|
||||||
|
instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
restart = LABEL();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
jump[0] = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0);
|
||||||
|
load_from_mem_vector(compiler, TRUE, data2_ind, tmp1_reg_ind, 0);
|
||||||
|
jump[1] = JUMP(SLJIT_JUMP);
|
||||||
|
JUMPHERE(jump[0]);
|
||||||
|
load_from_mem_vector(compiler, FALSE, data2_ind, tmp1_reg_ind, 0);
|
||||||
|
JUMPHERE(jump[1]);
|
||||||
|
|
||||||
|
load_from_mem_vector(compiler, TRUE, data1_ind, str_ptr_reg_ind, 0);
|
||||||
|
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 16);
|
||||||
|
|
||||||
|
for (i = 0; i < 3; i++)
|
||||||
|
{
|
||||||
|
fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
|
||||||
|
fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* VN */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
|
||||||
|
instruction[1] = (sljit_u16)(data2_ind << 12);
|
||||||
|
instruction[2] = (sljit_u16)((0xe << 8) | 0x68);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
|
||||||
|
/* VFENE */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
|
||||||
|
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
||||||
|
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
|
||||||
|
/* VLGVB */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data1_ind);
|
||||||
|
instruction[1] = 7;
|
||||||
|
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||||
|
quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
|
||||||
|
|
||||||
|
OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16);
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, diff);
|
||||||
|
|
||||||
|
/* Main loop. */
|
||||||
|
start = LABEL();
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||||
|
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
|
|
||||||
|
load_from_mem_vector(compiler, FALSE, data1_ind, str_ptr_reg_ind, 0);
|
||||||
|
load_from_mem_vector(compiler, FALSE, data2_ind, str_ptr_reg_ind, tmp1_reg_ind);
|
||||||
|
|
||||||
|
for (i = 0; i < 3; i++)
|
||||||
|
{
|
||||||
|
fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
|
||||||
|
fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* VN */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
|
||||||
|
instruction[1] = (sljit_u16)(data2_ind << 12);
|
||||||
|
instruction[2] = (sljit_u16)((0xe << 8) | 0x68);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
|
||||||
|
/* VFENE */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
|
||||||
|
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
||||||
|
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
|
||||||
|
sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW);
|
||||||
|
JUMPTO(SLJIT_OVERFLOW, start);
|
||||||
|
|
||||||
|
/* VLGVB */
|
||||||
|
instruction[0] = (sljit_u16)(0xe700 | (tmp2_reg_ind << 4) | data1_ind);
|
||||||
|
instruction[1] = 7;
|
||||||
|
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 6);
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||||
|
|
||||||
|
JUMPHERE(quit);
|
||||||
|
|
||||||
|
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
if (common->utf)
|
||||||
|
{
|
||||||
|
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
|
||||||
|
|
||||||
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
|
||||||
|
|
||||||
|
quit = jump_if_utf_char_start(compiler, TMP1);
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||||
|
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
|
|
||||||
|
/* TMP1 contains diff. */
|
||||||
|
OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15);
|
||||||
|
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
|
||||||
|
JUMPTO(SLJIT_JUMP, restart);
|
||||||
|
|
||||||
|
JUMPHERE(quit);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
|
||||||
|
|
||||||
|
if (common->match_end_ptr != 0)
|
||||||
|
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* SLJIT_CONFIG_S390X */
|
||||||
|
|
||||||
|
#if (defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64)
|
||||||
|
|
||||||
|
#ifdef __linux__
|
||||||
|
/* Using getauxval(AT_HWCAP) under Linux for detecting whether LSX is available */
|
||||||
|
#include <sys/auxv.h>
|
||||||
|
#define LOONGARCH_HWCAP_LSX (1 << 4)
|
||||||
|
#define HAS_LSX_SUPPORT ((getauxval(AT_HWCAP) & LOONGARCH_HWCAP_LSX) != 0)
|
||||||
|
#else
|
||||||
|
#define HAS_LSX_SUPPORT 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef sljit_ins sljit_u32;
|
||||||
|
|
||||||
|
#define SI12_IMM_MASK 0x003ffc00
|
||||||
|
#define UI5_IMM_MASK 0x00007c00
|
||||||
|
#define UI2_IMM_MASK 0x00000c00
|
||||||
|
|
||||||
|
#define VD(vd) ((sljit_ins)vd << 0)
|
||||||
|
#define VJ(vj) ((sljit_ins)vj << 5)
|
||||||
|
#define VK(vk) ((sljit_ins)vk << 10)
|
||||||
|
#define RD_V(rd) ((sljit_ins)rd << 0)
|
||||||
|
#define RJ_V(rj) ((sljit_ins)rj << 5)
|
||||||
|
|
||||||
|
#define IMM_SI12(imm) (((sljit_ins)(imm) << 10) & SI12_IMM_MASK)
|
||||||
|
#define IMM_UI5(imm) (((sljit_ins)(imm) << 10) & UI5_IMM_MASK)
|
||||||
|
#define IMM_UI2(imm) (((sljit_ins)(imm) << 10) & UI2_IMM_MASK)
|
||||||
|
|
||||||
|
// LSX OPCODES:
|
||||||
|
#define VLD 0x2c000000
|
||||||
|
#define VOR_V 0x71268000
|
||||||
|
#define VAND_V 0x71260000
|
||||||
|
#define VBSLL_V 0x728e0000
|
||||||
|
#define VMSKLTZ_B 0x729c4000
|
||||||
|
#define VPICKVE2GR_WU 0x72f3e000
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
#define VREPLGR2VR 0x729f0000
|
||||||
|
#define VSEQ 0x70000000
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
#define VREPLGR2VR 0x729f0400
|
||||||
|
#define VSEQ 0x70008000
|
||||||
|
#else
|
||||||
|
#define VREPLGR2VR 0x729f0800
|
||||||
|
#define VSEQ 0x70010000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void fast_forward_char_pair_lsx_compare(struct sljit_compiler *compiler, vector_compare_type compare_type,
|
||||||
|
sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
|
||||||
|
{
|
||||||
|
if (compare_type != vector_compare_match2)
|
||||||
|
{
|
||||||
|
if (compare_type == vector_compare_match1i)
|
||||||
|
{
|
||||||
|
/* VOR.V vd, vj, vk */
|
||||||
|
push_inst(compiler, VOR_V | VD(dst_ind) | VJ(cmp2_ind) | VK(dst_ind));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* VSEQ.B/H/W vd, vj, vk */
|
||||||
|
push_inst(compiler, VSEQ | VD(dst_ind) | VJ(dst_ind) | VK(cmp1_ind));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* VBSLL.V vd, vj, ui5 */
|
||||||
|
push_inst(compiler, VBSLL_V | VD(tmp_ind) | VJ(dst_ind) | IMM_UI5(0));
|
||||||
|
|
||||||
|
/* VSEQ.B/H/W vd, vj, vk */
|
||||||
|
push_inst(compiler, VSEQ | VD(dst_ind) | VJ(dst_ind) | VK(cmp1_ind));
|
||||||
|
|
||||||
|
/* VSEQ.B/H/W vd, vj, vk */
|
||||||
|
push_inst(compiler, VSEQ | VD(tmp_ind) | VJ(tmp_ind) | VK(cmp2_ind));
|
||||||
|
|
||||||
|
/* VOR vd, vj, vk */
|
||||||
|
push_inst(compiler, VOR_V | VD(dst_ind) | VJ(tmp_ind) | VK(dst_ind));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD HAS_LSX_SUPPORT
|
||||||
|
|
||||||
|
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
|
||||||
|
{
|
||||||
|
DEFINE_COMPILER;
|
||||||
|
struct sljit_label *start;
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
struct sljit_label *restart;
|
||||||
|
#endif
|
||||||
|
struct sljit_jump *quit;
|
||||||
|
struct sljit_jump *partial_quit[2];
|
||||||
|
vector_compare_type compare_type = vector_compare_match1;
|
||||||
|
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||||
|
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
|
||||||
|
sljit_s32 data_ind = 0;
|
||||||
|
sljit_s32 tmp_ind = 1;
|
||||||
|
sljit_s32 cmp1_ind = 2;
|
||||||
|
sljit_s32 cmp2_ind = 3;
|
||||||
|
sljit_u32 bit = 0;
|
||||||
|
|
||||||
|
SLJIT_UNUSED_ARG(offset);
|
||||||
|
|
||||||
|
if (char1 != char2)
|
||||||
|
{
|
||||||
|
bit = char1 ^ char2;
|
||||||
|
compare_type = vector_compare_match1i;
|
||||||
|
|
||||||
|
if (!is_powerof2(bit))
|
||||||
|
{
|
||||||
|
bit = 0;
|
||||||
|
compare_type = vector_compare_match2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||||
|
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||||
|
add_jump(compiler, &common->failed_match, partial_quit[0]);
|
||||||
|
|
||||||
|
/* First part (unaligned start) */
|
||||||
|
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1 | bit);
|
||||||
|
|
||||||
|
/* VREPLGR2VR.B/H/W vd, rj */
|
||||||
|
push_inst(compiler, VREPLGR2VR | VD(cmp1_ind) | RJ_V(tmp1_reg_ind));
|
||||||
|
|
||||||
|
if (char1 != char2)
|
||||||
|
{
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, bit != 0 ? bit : char2);
|
||||||
|
|
||||||
|
/* VREPLGR2VR.B/H/W vd, rj */
|
||||||
|
push_inst(compiler, VREPLGR2VR | VD(cmp2_ind) | RJ_V(tmp1_reg_ind));
|
||||||
|
}
|
||||||
|
|
||||||
|
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
restart = LABEL();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
|
||||||
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||||
|
|
||||||
|
/* VLD vd, rj, si12 */
|
||||||
|
push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
|
||||||
|
fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||||
|
|
||||||
|
/* VMSKLTZ.B vd, vj */
|
||||||
|
push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
|
||||||
|
|
||||||
|
/* VPICKVE2GR.WU rd, vj, ui2 */
|
||||||
|
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||||
|
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||||
|
|
||||||
|
quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
|
||||||
|
|
||||||
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||||
|
|
||||||
|
/* Second part (aligned) */
|
||||||
|
start = LABEL();
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||||
|
|
||||||
|
partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||||
|
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||||
|
add_jump(compiler, &common->failed_match, partial_quit[1]);
|
||||||
|
|
||||||
|
/* VLD vd, rj, si12 */
|
||||||
|
push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
|
||||||
|
fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||||
|
|
||||||
|
/* VMSKLTZ.B vd, vj */
|
||||||
|
push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
|
||||||
|
|
||||||
|
/* VPICKVE2GR.WU rd, vj, ui2 */
|
||||||
|
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
|
||||||
|
|
||||||
|
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
|
||||||
|
|
||||||
|
JUMPHERE(quit);
|
||||||
|
|
||||||
|
/* CTZ.W rd, rj */
|
||||||
|
push_inst(compiler, CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind));
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||||
|
|
||||||
|
if (common->mode != PCRE2_JIT_COMPLETE)
|
||||||
|
{
|
||||||
|
JUMPHERE(partial_quit[0]);
|
||||||
|
JUMPHERE(partial_quit[1]);
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
|
||||||
|
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
if (common->utf && offset > 0)
|
||||||
|
{
|
||||||
|
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
|
||||||
|
|
||||||
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
|
||||||
|
|
||||||
|
quit = jump_if_utf_char_start(compiler, TMP1);
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||||
|
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
|
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
|
||||||
|
JUMPTO(SLJIT_JUMP, restart);
|
||||||
|
|
||||||
|
JUMPHERE(quit);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD HAS_LSX_SUPPORT
|
||||||
|
|
||||||
|
static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
|
||||||
|
{
|
||||||
|
DEFINE_COMPILER;
|
||||||
|
struct sljit_label *start;
|
||||||
|
struct sljit_jump *quit;
|
||||||
|
jump_list *not_found = NULL;
|
||||||
|
vector_compare_type compare_type = vector_compare_match1;
|
||||||
|
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||||
|
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
|
||||||
|
sljit_s32 data_ind = 0;
|
||||||
|
sljit_s32 tmp_ind = 1;
|
||||||
|
sljit_s32 cmp1_ind = 2;
|
||||||
|
sljit_s32 cmp2_ind = 3;
|
||||||
|
sljit_u32 bit = 0;
|
||||||
|
|
||||||
|
if (char1 != char2)
|
||||||
|
{
|
||||||
|
bit = char1 ^ char2;
|
||||||
|
compare_type = vector_compare_match1i;
|
||||||
|
|
||||||
|
if (!is_powerof2(bit))
|
||||||
|
{
|
||||||
|
bit = 0;
|
||||||
|
compare_type = vector_compare_match2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||||
|
OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
|
||||||
|
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
|
||||||
|
|
||||||
|
/* First part (unaligned start) */
|
||||||
|
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1 | bit);
|
||||||
|
|
||||||
|
/* VREPLGR2VR vd, rj */
|
||||||
|
push_inst(compiler, VREPLGR2VR | VD(cmp1_ind) | RJ_V(tmp1_reg_ind));
|
||||||
|
|
||||||
|
if (char1 != char2)
|
||||||
|
{
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, bit != 0 ? bit : char2);
|
||||||
|
/* VREPLGR2VR vd, rj */
|
||||||
|
push_inst(compiler, VREPLGR2VR | VD(cmp2_ind) | RJ_V(tmp1_reg_ind));
|
||||||
|
}
|
||||||
|
|
||||||
|
OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
|
||||||
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
|
||||||
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||||
|
|
||||||
|
/* VLD vd, rj, si12 */
|
||||||
|
push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
|
||||||
|
fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||||
|
|
||||||
|
/* VMSKLTZ.B vd, vj */
|
||||||
|
push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
|
||||||
|
|
||||||
|
/* VPICKVE2GR.WU rd, vj, ui2 */
|
||||||
|
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||||
|
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||||
|
|
||||||
|
quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
|
||||||
|
|
||||||
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||||
|
|
||||||
|
/* Second part (aligned) */
|
||||||
|
start = LABEL();
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||||
|
|
||||||
|
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
|
|
||||||
|
/* VLD vd, rj, si12 */
|
||||||
|
push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
|
||||||
|
fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||||
|
|
||||||
|
/* VMSKLTZ.B vd, vj */
|
||||||
|
push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
|
||||||
|
|
||||||
|
/* VPICKVE2GR.WU rd, vj, ui2 */
|
||||||
|
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
|
||||||
|
|
||||||
|
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
|
||||||
|
|
||||||
|
JUMPHERE(quit);
|
||||||
|
|
||||||
|
/* CTZ.W rd, rj */
|
||||||
|
push_inst(compiler, CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind));
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0);
|
||||||
|
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||||
|
|
||||||
|
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
|
||||||
|
return not_found;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD HAS_LSX_SUPPORT
|
||||||
|
|
||||||
|
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
|
||||||
|
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
|
||||||
|
{
|
||||||
|
DEFINE_COMPILER;
|
||||||
|
vector_compare_type compare1_type = vector_compare_match1;
|
||||||
|
vector_compare_type compare2_type = vector_compare_match1;
|
||||||
|
sljit_u32 bit1 = 0;
|
||||||
|
sljit_u32 bit2 = 0;
|
||||||
|
sljit_u32 diff = IN_UCHARS(offs1 - offs2);
|
||||||
|
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
|
||||||
|
sljit_s32 tmp2_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP2);
|
||||||
|
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
|
||||||
|
sljit_s32 data1_ind = 0;
|
||||||
|
sljit_s32 data2_ind = 1;
|
||||||
|
sljit_s32 tmp1_ind = 2;
|
||||||
|
sljit_s32 tmp2_ind = 3;
|
||||||
|
sljit_s32 cmp1a_ind = 4;
|
||||||
|
sljit_s32 cmp1b_ind = 5;
|
||||||
|
sljit_s32 cmp2a_ind = 6;
|
||||||
|
sljit_s32 cmp2b_ind = 7;
|
||||||
|
struct sljit_label *start;
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
struct sljit_label *restart;
|
||||||
|
#endif
|
||||||
|
struct sljit_jump *jump[2];
|
||||||
|
|
||||||
|
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
|
||||||
|
SLJIT_ASSERT(diff <= (unsigned)IN_UCHARS(max_fast_forward_char_pair_offset()));
|
||||||
|
|
||||||
|
/* Initialize. */
|
||||||
|
if (common->match_end_ptr != 0)
|
||||||
|
{
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
|
||||||
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
|
||||||
|
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
|
||||||
|
|
||||||
|
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0);
|
||||||
|
SELECT(SLJIT_LESS, STR_END, TMP1, 0, STR_END);
|
||||||
|
}
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
|
||||||
|
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
|
|
||||||
|
if (char1a == char1b)
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1a);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
bit1 = char1a ^ char1b;
|
||||||
|
if (is_powerof2(bit1))
|
||||||
|
{
|
||||||
|
compare1_type = vector_compare_match1i;
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1a | bit1);
|
||||||
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, bit1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
compare1_type = vector_compare_match2;
|
||||||
|
bit1 = 0;
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1a);
|
||||||
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, char1b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* VREPLGR2VR vd, rj */
|
||||||
|
push_inst(compiler, VREPLGR2VR | VD(cmp1a_ind) | RJ_V(tmp1_reg_ind));
|
||||||
|
|
||||||
|
if (char1a != char1b)
|
||||||
|
{
|
||||||
|
/* VREPLGR2VR vd, rj */
|
||||||
|
push_inst(compiler, VREPLGR2VR | VD(cmp1b_ind) | RJ_V(tmp2_reg_ind));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (char2a == char2b)
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char2a);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
bit2 = char2a ^ char2b;
|
||||||
|
if (is_powerof2(bit2))
|
||||||
|
{
|
||||||
|
compare2_type = vector_compare_match1i;
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char2a | bit2);
|
||||||
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, bit2);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
compare2_type = vector_compare_match2;
|
||||||
|
bit2 = 0;
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char2a);
|
||||||
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, char2b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* VREPLGR2VR vd, rj */
|
||||||
|
push_inst(compiler, VREPLGR2VR | VD(cmp2a_ind) | RJ_V(tmp1_reg_ind));
|
||||||
|
|
||||||
|
if (char2a != char2b)
|
||||||
|
{
|
||||||
|
/* VREPLGR2VR vd, rj */
|
||||||
|
push_inst(compiler, VREPLGR2VR | VD(cmp2b_ind) | RJ_V(tmp2_reg_ind));
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
restart = LABEL();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff);
|
||||||
|
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
|
||||||
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
|
||||||
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||||
|
|
||||||
|
/* VLD vd, rj, si12 */
|
||||||
|
push_inst(compiler, VLD | VD(data1_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
|
||||||
|
|
||||||
|
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0);
|
||||||
|
|
||||||
|
/* VLD vd, rj, si12 */
|
||||||
|
push_inst(compiler, VLD | VD(data2_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(-(sljit_s8)diff));
|
||||||
|
jump[1] = JUMP(SLJIT_JUMP);
|
||||||
|
|
||||||
|
JUMPHERE(jump[0]);
|
||||||
|
|
||||||
|
/* VBSLL.V vd, vj, ui5 */
|
||||||
|
push_inst(compiler, VBSLL_V | VD(data2_ind) | VJ(data1_ind) | IMM_UI5(diff));
|
||||||
|
|
||||||
|
JUMPHERE(jump[1]);
|
||||||
|
|
||||||
|
fast_forward_char_pair_lsx_compare(compiler, compare2_type, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
|
||||||
|
fast_forward_char_pair_lsx_compare(compiler, compare1_type, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
|
||||||
|
|
||||||
|
/* VAND vd, vj, vk */
|
||||||
|
push_inst(compiler, VOR_V | VD(data1_ind) | VJ(data1_ind) | VK(data2_ind));
|
||||||
|
|
||||||
|
/* VMSKLTZ.B vd, vj */
|
||||||
|
push_inst(compiler, VMSKLTZ_B | VD(tmp1_ind) | VJ(data1_ind));
|
||||||
|
|
||||||
|
/* VPICKVE2GR.WU rd, vj, ui2 */
|
||||||
|
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp1_ind) | IMM_UI2(0));
|
||||||
|
|
||||||
|
/* Ignore matches before the first STR_PTR. */
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||||
|
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||||
|
|
||||||
|
jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
|
||||||
|
|
||||||
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||||
|
|
||||||
|
/* Main loop. */
|
||||||
|
start = LABEL();
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||||
|
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
|
|
||||||
|
/* VLD vd, rj, si12 */
|
||||||
|
push_inst(compiler, VLD | VD(data1_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
|
||||||
|
push_inst(compiler, VLD | VD(data2_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(-(sljit_s8)diff));
|
||||||
|
|
||||||
|
fast_forward_char_pair_lsx_compare(compiler, compare1_type, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind);
|
||||||
|
fast_forward_char_pair_lsx_compare(compiler, compare2_type, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind);
|
||||||
|
|
||||||
|
/* VAND.V vd, vj, vk */
|
||||||
|
push_inst(compiler, VAND_V | VD(data1_ind) | VJ(data1_ind) | VK(data2_ind));
|
||||||
|
|
||||||
|
/* VMSKLTZ.B vd, vj */
|
||||||
|
push_inst(compiler, VMSKLTZ_B | VD(tmp1_ind) | VJ(data1_ind));
|
||||||
|
|
||||||
|
/* VPICKVE2GR.WU rd, vj, ui2 */
|
||||||
|
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp1_ind) | IMM_UI2(0));
|
||||||
|
|
||||||
|
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
|
||||||
|
|
||||||
|
JUMPHERE(jump[0]);
|
||||||
|
|
||||||
|
/* CTZ.W rd, rj */
|
||||||
|
push_inst(compiler, CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind));
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||||
|
|
||||||
|
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
if (common->utf)
|
||||||
|
{
|
||||||
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
|
||||||
|
|
||||||
|
jump[0] = jump_if_utf_char_start(compiler, TMP1);
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||||
|
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
|
||||||
|
|
||||||
|
add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));
|
||||||
|
|
||||||
|
JUMPHERE(jump[0]);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
|
||||||
|
|
||||||
|
if (common->match_end_ptr != 0)
|
||||||
|
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* SLJIT_CONFIG_LOONGARCH_64 */
|
||||||
|
|
||||||
|
#endif /* !SUPPORT_VALGRIND */
|
||||||
2541
3rd/pcre2/src/pcre2_jit_test.c
Normal file
2541
3rd/pcre2/src/pcre2_jit_test.c
Normal file
@@ -0,0 +1,2541 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#define PCRE2_CODE_UNIT_WIDTH 0
|
||||||
|
#include "pcre2.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
Letter characters:
|
||||||
|
\xe6\x92\xad = 0x64ad = 25773 (kanji)
|
||||||
|
Non-letter characters:
|
||||||
|
\xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
|
||||||
|
\xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
|
||||||
|
\xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
|
||||||
|
\xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
|
||||||
|
Newlines:
|
||||||
|
\xc2\x85 = 0x85 = 133 (NExt Line = NEL)
|
||||||
|
\xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
|
||||||
|
Othercase pairs:
|
||||||
|
\xc3\xa9 = 0xe9 = 233 (e')
|
||||||
|
\xc3\x89 = 0xc9 = 201 (E')
|
||||||
|
\xc3\xa1 = 0xe1 = 225 (a')
|
||||||
|
\xc3\x81 = 0xc1 = 193 (A')
|
||||||
|
\x53 = 0x53 = S
|
||||||
|
\x73 = 0x73 = s
|
||||||
|
\xc5\xbf = 0x17f = 383 (long S)
|
||||||
|
\xc8\xba = 0x23a = 570
|
||||||
|
\xe2\xb1\xa5 = 0x2c65 = 11365
|
||||||
|
\xe1\xbd\xb8 = 0x1f78 = 8056
|
||||||
|
\xe1\xbf\xb8 = 0x1ff8 = 8184
|
||||||
|
\xf0\x90\x90\x80 = 0x10400 = 66560
|
||||||
|
\xf0\x90\x90\xa8 = 0x10428 = 66600
|
||||||
|
\xc7\x84 = 0x1c4 = 452
|
||||||
|
\xc7\x85 = 0x1c5 = 453
|
||||||
|
\xc7\x86 = 0x1c6 = 454
|
||||||
|
Caseless sets:
|
||||||
|
ucp_Armenian - \x{531}-\x{556} -> \x{561}-\x{586}
|
||||||
|
ucp_Coptic - \x{2c80}-\x{2ce3} -> caseless: XOR 0x1
|
||||||
|
ucp_Latin - \x{ff21}-\x{ff3a} -> \x{ff41]-\x{ff5a}
|
||||||
|
|
||||||
|
Mark property:
|
||||||
|
\xcc\x8d = 0x30d = 781
|
||||||
|
Special:
|
||||||
|
\xc2\x80 = 0x80 = 128 (lowest 2 byte character)
|
||||||
|
\xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
|
||||||
|
\xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
|
||||||
|
\xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
|
||||||
|
\xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
|
||||||
|
\xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int regression_tests(void);
|
||||||
|
static int invalid_utf8_regression_tests(void);
|
||||||
|
static int invalid_utf16_regression_tests(void);
|
||||||
|
static int invalid_utf32_regression_tests(void);
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
int jit = 0;
|
||||||
|
#if defined SUPPORT_PCRE2_8
|
||||||
|
pcre2_config_8(PCRE2_CONFIG_JIT, &jit);
|
||||||
|
#elif defined SUPPORT_PCRE2_16
|
||||||
|
pcre2_config_16(PCRE2_CONFIG_JIT, &jit);
|
||||||
|
#elif defined SUPPORT_PCRE2_32
|
||||||
|
pcre2_config_32(PCRE2_CONFIG_JIT, &jit);
|
||||||
|
#endif
|
||||||
|
if (!jit) {
|
||||||
|
printf("JIT must be enabled to run pcre2_jit_test\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return regression_tests()
|
||||||
|
| invalid_utf8_regression_tests()
|
||||||
|
| invalid_utf16_regression_tests()
|
||||||
|
| invalid_utf32_regression_tests();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* --------------------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#if !(defined SUPPORT_PCRE2_8) && !(defined SUPPORT_PCRE2_16) && !(defined SUPPORT_PCRE2_32)
|
||||||
|
#error SUPPORT_PCRE2_8 or SUPPORT_PCRE2_16 or SUPPORT_PCRE2_32 must be defined
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define MU (PCRE2_MULTILINE | PCRE2_UTF)
|
||||||
|
#define MUP (PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
|
||||||
|
#define CMU (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF)
|
||||||
|
#define CMUP (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
|
||||||
|
#define M (PCRE2_MULTILINE)
|
||||||
|
#define MP (PCRE2_MULTILINE | PCRE2_UCP)
|
||||||
|
#define U (PCRE2_UTF)
|
||||||
|
#define CM (PCRE2_CASELESS | PCRE2_MULTILINE)
|
||||||
|
|
||||||
|
#define BSR(x) ((x) << 16)
|
||||||
|
#define A PCRE2_NEWLINE_ANYCRLF
|
||||||
|
|
||||||
|
#define GET_NEWLINE(x) ((x) & 0xffff)
|
||||||
|
#define GET_BSR(x) ((x) >> 16)
|
||||||
|
|
||||||
|
#define OFFSET_MASK 0x00ffff
|
||||||
|
#define F_NO8 0x010000
|
||||||
|
#define F_NO16 0x020000
|
||||||
|
#define F_NO32 0x020000
|
||||||
|
#define F_NOMATCH 0x040000
|
||||||
|
#define F_DIFF 0x080000
|
||||||
|
#define F_FORCECONV 0x100000
|
||||||
|
#define F_PROPERTY 0x200000
|
||||||
|
|
||||||
|
struct regression_test_case {
|
||||||
|
uint32_t compile_options;
|
||||||
|
int newline;
|
||||||
|
int match_options;
|
||||||
|
int start_offset;
|
||||||
|
const char *pattern;
|
||||||
|
const char *input;
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct regression_test_case regression_test_cases[] = {
|
||||||
|
/* Constant strings. */
|
||||||
|
{ MU, A, 0, 0, "AbC", "AbAbC" },
|
||||||
|
{ MU, A, 0, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
|
||||||
|
{ CMU, A, 0, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
|
||||||
|
{ M, A, 0, 0, "[^a]", "aAbB" },
|
||||||
|
{ CM, A, 0, 0, "[^m]", "mMnN" },
|
||||||
|
{ M, A, 0, 0, "a[^b][^#]", "abacd" },
|
||||||
|
{ CM, A, 0, 0, "A[^B][^E]", "abacd" },
|
||||||
|
{ CMU, A, 0, 0, "[^x][^#]", "XxBll" },
|
||||||
|
{ MU, A, 0, 0, "[^a]", "aaa\xc3\xa1#Ab" },
|
||||||
|
{ CMU, A, 0, 0, "[^A]", "aA\xe6\x92\xad" },
|
||||||
|
{ MU, A, 0, 0, "\\W(\\W)?\\w", "\r\n+bc" },
|
||||||
|
{ MU, A, 0, 0, "\\W(\\W)?\\w", "\n\r+bc" },
|
||||||
|
{ MU, A, 0, 0, "\\W(\\W)?\\w", "\r\r+bc" },
|
||||||
|
{ MU, A, 0, 0, "\\W(\\W)?\\w", "\n\n+bc" },
|
||||||
|
{ MU, A, 0, 0, "[axd]", "sAXd" },
|
||||||
|
{ CMU, A, 0, 0, "[axd]", "sAXd" },
|
||||||
|
{ CMU, A, 0, 0 | F_NOMATCH, "[^axd]", "DxA" },
|
||||||
|
{ MU, A, 0, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
|
||||||
|
{ MU, A, 0, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
|
||||||
|
{ CMU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
|
||||||
|
{ MU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
|
||||||
|
{ MU, A, 0, 0, "[^a]", "\xc2\x80[]" },
|
||||||
|
{ CMU, A, 0, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
|
||||||
|
{ CM, A, 0, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
|
||||||
|
{ PCRE2_CASELESS, 0, 0, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
|
||||||
|
{ PCRE2_CASELESS, 0, 0, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
|
||||||
|
{ PCRE2_CASELESS, 0, 0, 0, "a1", "Aa1" },
|
||||||
|
#ifndef NEVER_BACKSLASH_C
|
||||||
|
{ M, A, 0, 0, "\\Ca", "cda" },
|
||||||
|
{ CM, A, 0, 0, "\\Ca", "CDA" },
|
||||||
|
{ M, A, 0, 0 | F_NOMATCH, "\\Cx", "cda" },
|
||||||
|
{ CM, A, 0, 0 | F_NOMATCH, "\\Cx", "CDA" },
|
||||||
|
#endif /* !NEVER_BACKSLASH_C */
|
||||||
|
{ CMUP, A, 0, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
|
||||||
|
{ CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
|
||||||
|
{ CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
|
||||||
|
{ CMUP, A, 0, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
|
||||||
|
{ M, A, 0, 0, "[3-57-9]", "5" },
|
||||||
|
{ PCRE2_AUTO_CALLOUT, A, 0, 0, "12345678901234567890123456789012345678901234567890123456789012345678901234567890",
|
||||||
|
"12345678901234567890123456789012345678901234567890123456789012345678901234567890" },
|
||||||
|
{ 0, A, 0, 0, "..a.......b", "bbbbbbbbbbbbbbbbbbbbbabbbbbbbb" },
|
||||||
|
{ 0, A, 0, 0, "..a.....b", "bbbbbbbbbbbbbbbbbbbbbabbbbbbbb" },
|
||||||
|
|
||||||
|
/* Assertions. */
|
||||||
|
{ MU, A, 0, 0, "\\b[^A]", "A_B#" },
|
||||||
|
{ M, A, 0, 0 | F_NOMATCH, "\\b\\W", "\n*" },
|
||||||
|
{ MU, A, 0, 0, "\\B[^,]\\b[^s]\\b", "#X" },
|
||||||
|
{ MP, A, 0, 0, "\\B", "_\xa1" },
|
||||||
|
{ MP, A, 0, 0 | F_PROPERTY, "\\b_\\b[,A]\\B", "_," },
|
||||||
|
{ MUP, A, 0, 0, "\\b", "\xe6\x92\xad!" },
|
||||||
|
{ MUP, A, 0, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
|
||||||
|
{ MUP, A, 0, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
|
||||||
|
{ MUP, A, 0, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
|
||||||
|
{ CMUP, A, 0, 0, "\\By", "\xf0\x90\x90\xa8y" },
|
||||||
|
{ M, A, 0, 0 | F_NOMATCH, "\\R^", "\n" },
|
||||||
|
{ M, A, 0, 1 | F_NOMATCH, "^", "\n" },
|
||||||
|
{ 0, 0, 0, 0, "^ab", "ab" },
|
||||||
|
{ 0, 0, 0, 0 | F_NOMATCH, "^ab", "aab" },
|
||||||
|
{ M, PCRE2_NEWLINE_CRLF, 0, 0, "^a", "\r\raa\n\naa\r\naa" },
|
||||||
|
{ MU, A, 0, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
|
||||||
|
{ M, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--b--\x85--" },
|
||||||
|
{ MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xe2\x80\xa8--" },
|
||||||
|
{ MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xc2\x85--" },
|
||||||
|
{ 0, 0, 0, 0, "ab$", "ab" },
|
||||||
|
{ 0, 0, 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
|
||||||
|
{ PCRE2_DOLLAR_ENDONLY, 0, 0, 0 | F_NOMATCH, "ab$", "abab\r\n" },
|
||||||
|
{ M, PCRE2_NEWLINE_CRLF, 0, 0, "a$", "\r\raa\n\naa\r\naa" },
|
||||||
|
{ M, PCRE2_NEWLINE_ANY, 0, 0, "a$", "aaa" },
|
||||||
|
{ MU, PCRE2_NEWLINE_ANYCRLF, 0, 0, "#$", "#\xc2\x85###\r#" },
|
||||||
|
{ MU, PCRE2_NEWLINE_ANY, 0, 0, "#$", "#\xe2\x80\xa9" },
|
||||||
|
{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0 | F_NOMATCH, "^a", "aa\naa" },
|
||||||
|
{ M, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0, "^a", "aa\naa" },
|
||||||
|
{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\naa" },
|
||||||
|
{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\r\n" },
|
||||||
|
{ U | PCRE2_DOLLAR_ENDONLY, PCRE2_NEWLINE_ANY, 0, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
|
||||||
|
{ M, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0, "a$", "aa\naa" },
|
||||||
|
{ 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa" },
|
||||||
|
{ U, PCRE2_NEWLINE_CR, 0, 0, "a\\Z", "aaa\r" },
|
||||||
|
{ 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa\n" },
|
||||||
|
{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r" },
|
||||||
|
{ U, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\n" },
|
||||||
|
{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r\n" },
|
||||||
|
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
|
||||||
|
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
|
||||||
|
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
|
||||||
|
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
|
||||||
|
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
|
||||||
|
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
|
||||||
|
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
|
||||||
|
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
|
||||||
|
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
|
||||||
|
{ U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xc2\x85" },
|
||||||
|
{ U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
|
||||||
|
{ M, A, 0, 0, "\\Aa", "aaa" },
|
||||||
|
{ M, A, 0, 1 | F_NOMATCH, "\\Aa", "aaa" },
|
||||||
|
{ M, A, 0, 1, "\\Ga", "aaa" },
|
||||||
|
{ M, A, 0, 1 | F_NOMATCH, "\\Ga", "aba" },
|
||||||
|
{ M, A, 0, 0, "a\\z", "aaa" },
|
||||||
|
{ M, A, 0, 0 | F_NOMATCH, "a\\z", "aab" },
|
||||||
|
|
||||||
|
/* Brackets and alternatives. */
|
||||||
|
{ MU, A, 0, 0, "(ab|bb|cd)", "bacde" },
|
||||||
|
{ MU, A, 0, 0, "(?:ab|a)(bc|c)", "ababc" },
|
||||||
|
{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
|
||||||
|
{ CMU, A, 0, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
|
||||||
|
{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
|
||||||
|
{ MU, A, 0, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
|
||||||
|
{ MU, A, 0, 0, "\xc7\x82|\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
|
||||||
|
{ MU, A, 0, 0, "=\xc7\x82|#\xc6\x82", "\xf1\x83\x82\x82=\xc7\x82\xc7\x83" },
|
||||||
|
{ MU, A, 0, 0, "\xc7\x82\xc7\x83|\xc6\x82\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
|
||||||
|
{ MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" },
|
||||||
|
{ U, A, 0, 0, "\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80", "\xdf\xbf\xc2\x80\xe4\x84\x80" },
|
||||||
|
{ U, A, 0, 0, "(?:\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80)#", "\xdf\xbf\xc2\x80#\xe4\x84\x80#" },
|
||||||
|
{ CM, A, 0, 0, "ab|cd", "CD" },
|
||||||
|
{ CM, A, 0, 0, "a1277|a1377|bX487", "bx487" },
|
||||||
|
{ CM, A, 0, 0, "a1277|a1377|bx487", "bX487" },
|
||||||
|
{ 0, A, 0, 0, "(a|)b*+a", "a" },
|
||||||
|
{ 0, A, 0, 0 | F_NOMATCH, "(.|.|.|.|.)(|.|.|.|.)(.||.|.|.)(.|.||.|.)(.|.|.||.)(.|.|.|.|)(A|.|.|.|.)(.|A|.|.|.)(.|.|A|.|.)(.|.|.|A|.)(.|.|.|.|A)(B|.|.|.|.)(.|B|.|.|.)(.|.|B|.|.)(.|.|.|B|.)(.|.|.|.|B)xa", "1234567890123456ax" },
|
||||||
|
|
||||||
|
/* Greedy and non-greedy ? operators. */
|
||||||
|
{ MU, A, 0, 0, "(?:a)?a", "laab" },
|
||||||
|
{ CMU, A, 0, 0, "(A)?A", "llaab" },
|
||||||
|
{ MU, A, 0, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
|
||||||
|
{ MU, A, 0, 0, "(a)?a", "manm" },
|
||||||
|
{ CMU, A, 0, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
|
||||||
|
{ MU, A, 0, 0, "(a|b)?\?d((?:e)?)", "abcde" },
|
||||||
|
{ MU, A, 0, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
|
||||||
|
{ M, A, 0, 0, "(?:a?|a)b", "ba" },
|
||||||
|
|
||||||
|
/* Greedy and non-greedy + operators */
|
||||||
|
{ MU, A, 0, 0, "(aa)+aa", "aaaaaaa" },
|
||||||
|
{ MU, A, 0, 0, "(aa)+?aa", "aaaaaaa" },
|
||||||
|
{ MU, A, 0, 0, "(?:aba|ab|a)+l", "ababamababal" },
|
||||||
|
{ MU, A, 0, 0, "(?:aba|ab|a)+?l", "ababamababal" },
|
||||||
|
{ MU, A, 0, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
|
||||||
|
{ MU, A, 0, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
|
||||||
|
{ MU, A, 0, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
|
||||||
|
{ MU, A, 0, 0, "(aa|bb){8,1000}", "abaabbaabbaabbaab_aabbaabbaabbaabbaabbaabb_" },
|
||||||
|
|
||||||
|
/* Greedy and non-greedy * operators */
|
||||||
|
{ CMU, A, 0, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
|
||||||
|
{ MU, A, 0, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
|
||||||
|
{ MU, A, 0, 0, "(aa|ab)*ab", "aaabaaab" },
|
||||||
|
{ CMU, A, 0, 0, "(aa|Ab)*?aB", "aaabaaab" },
|
||||||
|
{ MU, A, 0, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
|
||||||
|
{ MU, A, 0, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
|
||||||
|
{ M, A, 0, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
|
||||||
|
{ M, A, 0, 0, "((?:a|)*){0}a", "a" },
|
||||||
|
|
||||||
|
/* Combining ? + * operators */
|
||||||
|
{ MU, A, 0, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
|
||||||
|
{ MU, A, 0, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
|
||||||
|
{ MU, A, 0, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
|
||||||
|
{ MU, A, 0, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
|
||||||
|
{ MU, A, 0, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
|
||||||
|
|
||||||
|
/* Single character iterators. */
|
||||||
|
{ MU, A, 0, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
|
||||||
|
{ MU, A, 0, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
|
||||||
|
{ MU, A, 0, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
|
||||||
|
{ MU, A, 0, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
|
||||||
|
{ MU, A, 0, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
|
||||||
|
{ MU, A, 0, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
|
||||||
|
{ MU, A, 0, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
|
||||||
|
{ MU, A, 0, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
|
||||||
|
{ MU, A, 0, 0, "(ba{2})+c", "baabaaabacbaabaac" },
|
||||||
|
{ MU, A, 0, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
|
||||||
|
{ MU, A, 0, 0, "(a?+[^b])+", "babaacacb" },
|
||||||
|
{ MU, A, 0, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
|
||||||
|
{ CMU, A, 0, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
|
||||||
|
{ CMU, A, 0, 0, "[c-f]+k", "DemmFke" },
|
||||||
|
{ MU, A, 0, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
|
||||||
|
{ MU, A, 0, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
|
||||||
|
{ CMU, A, 0, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
|
||||||
|
{ CMU, A, 0, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
|
||||||
|
{ CMU, A, 0, 0, "[ace]{3,}", "AcbDAcEEcEd" },
|
||||||
|
{ CMU, A, 0, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
|
||||||
|
{ MU, A, 0, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
|
||||||
|
{ CMU, A, 0, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
|
||||||
|
{ MU, A, 0, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
|
||||||
|
{ MU, A, 0, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
|
||||||
|
{ MU, A, 0, 0, "\\b\\w+\\B", "x,a_cd" },
|
||||||
|
{ MUP, A, 0, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
|
||||||
|
{ CMU, A, 0, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
|
||||||
|
{ CMUP, A, 0, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
|
||||||
|
{ CMU, A, 0, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
|
||||||
|
{ CMU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
|
||||||
|
{ MU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
|
||||||
|
{ MU, A, 0, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
|
||||||
|
{ MU, A, 0, 0, "\\d+123", "987654321,01234" },
|
||||||
|
{ MU, A, 0, 0, "abcd*|\\w+xy", "aaaaa,abxyz" },
|
||||||
|
{ MU, A, 0, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" },
|
||||||
|
{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."},
|
||||||
|
{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."},
|
||||||
|
{ MU, A, 0, 0, ".[ab]*.", "xx" },
|
||||||
|
{ MU, A, 0, 0, ".[ab]*a", "xxa" },
|
||||||
|
{ MU, A, 0, 0, ".[ab]?.", "xx" },
|
||||||
|
{ MU, A, 0, 0, "_[ab]+_*a", "_aa" },
|
||||||
|
{ MU, A, 0, 0, "#(A+)#\\d+", "#A#A#0" },
|
||||||
|
{ MU, A, 0, 0, "(?P<size>\\d+)m|M", "4M" },
|
||||||
|
{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\n?.+#", "\n,\n,#" },
|
||||||
|
{ 0, A, 0, 0, "<(\\w+)[\\s\\w]+id>", "<br><div id>" },
|
||||||
|
{ MU, A, 0, 0, "([a-z]{0,3}c;)+", "ccccc;c;cc;ccc;cccccccccccccccc;" },
|
||||||
|
|
||||||
|
/* Bracket repeats with limit. */
|
||||||
|
{ MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
|
||||||
|
{ MU, A, 0, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
|
||||||
|
{ MU, A, 0, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
|
||||||
|
{ MU, A, 0, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
|
||||||
|
{ MU, A, 0, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
|
||||||
|
{ MU, A, 0, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
|
||||||
|
{ MU, A, 0, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
|
||||||
|
{ MU, A, 0, 0, "(ab){4,6}?M", "abababababababM" },
|
||||||
|
|
||||||
|
/* Basic character sets. */
|
||||||
|
{ MU, A, 0, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
|
||||||
|
{ MU, A, 0, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
|
||||||
|
{ MU, A, 0, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
|
||||||
|
{ MU, A, 0, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
|
||||||
|
{ MU, A, 0, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
|
||||||
|
{ MU, A, 0, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
|
||||||
|
{ MU, A, 0, 0, "x[bcef]+", "xaxdxecbfg" },
|
||||||
|
{ MU, A, 0, 0, "x[bcdghij]+", "xaxexfxdgbjk" },
|
||||||
|
{ MU, A, 0, 0, "x[^befg]+", "xbxexacdhg" },
|
||||||
|
{ MU, A, 0, 0, "x[^bcdl]+", "xlxbxaekmd" },
|
||||||
|
{ MU, A, 0, 0, "x[^bcdghi]+", "xbxdxgxaefji" },
|
||||||
|
{ MU, A, 0, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" },
|
||||||
|
{ CMU, A, 0, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" },
|
||||||
|
{ CMU, A, 0, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" },
|
||||||
|
{ MU, A, 0, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" },
|
||||||
|
{ MU, A, 0, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" },
|
||||||
|
{ MU, A, 0, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" },
|
||||||
|
{ MU, A, 0, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" },
|
||||||
|
{ MU, A, 0, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" },
|
||||||
|
{ MU, A, 0, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
|
||||||
|
{ MU, A, 0, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
|
||||||
|
{ MU, A, 0, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
|
||||||
|
{ CMU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "^[\\x{100}-\\x{17f}]", " " },
|
||||||
|
{ M, A, 0, 0 | F_NOMATCH, "[^\\S\\W]{6}", "abcdefghijk" },
|
||||||
|
|
||||||
|
/* Unicode properties. */
|
||||||
|
{ MUP, A, 0, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
|
||||||
|
{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
|
||||||
|
{ MUP, A, 0, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
|
||||||
|
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
|
||||||
|
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
|
||||||
|
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
|
||||||
|
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
|
||||||
|
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
|
||||||
|
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
|
||||||
|
{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
|
||||||
|
{ MUP, A, 0, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
|
||||||
|
{ MUP, A, 0, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
|
||||||
|
{ CMUP, A, 0, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
|
||||||
|
{ MUP, A, 0, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
|
||||||
|
{ MUP, A, 0, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
|
||||||
|
{ MU, A, 0, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
|
||||||
|
{ CMUP, A, 0, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
|
||||||
|
{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
|
||||||
|
{ MUP, A, 0, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
|
||||||
|
{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "[a-b\\s]{2,5}[^a]", "AB baaa" },
|
||||||
|
{ MUP, 0, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Hangul}\\p{Z}]", " " },
|
||||||
|
{ MUP, 0, 0, 0, "[\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" },
|
||||||
|
{ MUP, 0, 0, 0, "[\\x{a92e}\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" },
|
||||||
|
{ CMUP, 0, 0, 0, "[^S]\\B", "\xe2\x80\x8a" },
|
||||||
|
{ MUP, 0, 0, 0 | F_NOMATCH, "[^[:print:]\\x{f6f6}]", "\xef\x9b\xb6" },
|
||||||
|
{ MUP, 0, 0, 0, "[[:xdigit:]\\x{6500}]#", "\xe6\x94\x80#" },
|
||||||
|
{ MUP, 0, 0, 0 | F_PROPERTY, "[\\pC\\PC]#", "A#" },
|
||||||
|
{ MUP, 0, 0, 0 | F_PROPERTY, "[\\x80-\\xff\\x{800}\\x{802}\\x{804}\\p{Cc}]", "\xdf\xbf\xe0\xa0\x80" },
|
||||||
|
|
||||||
|
/* Possible empty brackets. */
|
||||||
|
{ MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
|
||||||
|
{ MU, A, 0, 0, "(|ab||bc|a)+d", "abcxabcabd" },
|
||||||
|
{ MU, A, 0, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
|
||||||
|
{ MU, A, 0, 0, "(|ab||bc|a)*d", "abcxabcabd" },
|
||||||
|
{ MU, A, 0, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
|
||||||
|
{ MU, A, 0, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
|
||||||
|
{ MU, A, 0, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
|
||||||
|
{ MU, A, 0, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
|
||||||
|
{ MU, A, 0, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
|
||||||
|
{ MU, A, 0, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
|
||||||
|
|
||||||
|
/* Start offset. */
|
||||||
|
{ MU, A, 0, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
|
||||||
|
{ MU, A, 0, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
|
||||||
|
{ MU, A, 0, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
|
||||||
|
{ MU, A, 0, 1, "(\\w\\W\\w)+", "ab#d" },
|
||||||
|
|
||||||
|
/* Newline. */
|
||||||
|
{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
|
||||||
|
{ M, PCRE2_NEWLINE_CR, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
|
||||||
|
{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{1,3}[^#]", "\r\n##...." },
|
||||||
|
{ MU, A, PCRE2_NO_UTF_CHECK, 1, "^.a", "\n\x80\nxa" },
|
||||||
|
{ MU, A, 0, 1, "^", "\r\n" },
|
||||||
|
{ M, PCRE2_NEWLINE_CRLF, 0, 1 | F_NOMATCH, "^", "\r\n" },
|
||||||
|
{ M, PCRE2_NEWLINE_CRLF, 0, 1, "^", "\r\na" },
|
||||||
|
|
||||||
|
/* Any character except newline or any newline. */
|
||||||
|
{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
|
||||||
|
{ U, PCRE2_NEWLINE_CRLF, 0, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
|
||||||
|
{ 0, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
|
||||||
|
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
|
||||||
|
{ U, PCRE2_NEWLINE_ANY, 0, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
|
||||||
|
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
|
||||||
|
{ 0, PCRE2_NEWLINE_ANY, 0, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
|
||||||
|
{ U, PCRE2_NEWLINE_ANY, 0, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
|
||||||
|
{ 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\r" },
|
||||||
|
{ 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\x85#\r\n#" },
|
||||||
|
{ U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\xe2\x80\xa8#c" },
|
||||||
|
{ U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\r\nc" },
|
||||||
|
{ U, PCRE2_NEWLINE_CRLF | BSR(PCRE2_BSR_UNICODE), 0, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "\\R+", "ab" },
|
||||||
|
{ MU, A, 0, 0, "\\R+", "ab\r\n\r" },
|
||||||
|
{ MU, A, 0, 0, "\\R*", "ab\r\n\r" },
|
||||||
|
{ MU, A, 0, 0, "\\R*", "\r\n\r" },
|
||||||
|
{ M, A, 0, 0, "\\R+\x85", "\r\n\n\r#\r\x85\n" },
|
||||||
|
{ MU, A, 0, 0, "\\R{2,4}", "\r\nab\r\r" },
|
||||||
|
{ MU, A, 0, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
|
||||||
|
{ MU, A, 0, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
|
||||||
|
{ MU, A, 0, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
|
||||||
|
{ MU, A, 0, 0, "\\R{2,4}\n", "\r\n\nab\r\r\nab\r\r\n\n" },
|
||||||
|
{ MU, A, 0, 0, "\\R{2,4}\n", "\r\n\nab\n\n\n\r\r\n" },
|
||||||
|
{ MU, A, 0, 0, "\\R{3,}\n", "\r\n\r\n\nab\n\n\n\r\r\n\n" },
|
||||||
|
{ MU, A, 0, 0, "\\R{0,3}\n", "\r\n\r\n\r\n\n" },
|
||||||
|
{ MU, A, 0, 0, "\\R{0,3}\n", "\r\n\r\n\r\n\r" },
|
||||||
|
{ MU, A, 0, 0, "(\\R{0,3}\n;)+", "\r\n\r\n\r\n\r\n\n;\n;\n\n;\n\n\n;\n\n\n\n\n;" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
|
||||||
|
{ MU, A, 0, 0, "\\R+\\R\\R", "\r\r\r" },
|
||||||
|
{ MU, A, 0, 0, "\\R*\\R\\R", "\n\r" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
|
||||||
|
{ MU, A, 0, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
|
||||||
|
|
||||||
|
/* Atomic groups (no fallback from "next" direction). */
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
|
||||||
|
{ MU, A, 0, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
|
||||||
|
"bababcdedefgheijijklmlmnop" },
|
||||||
|
{ MU, A, 0, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
|
||||||
|
{ MU, A, 0, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
|
||||||
|
{ MU, A, 0, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
|
||||||
|
{ MU, A, 0, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
|
||||||
|
{ MU, A, 0, 0, "((?>a|)+?)b", "aaacaaab" },
|
||||||
|
{ MU, A, 0, 0, "(?>x|)*$", "aaa" },
|
||||||
|
{ MU, A, 0, 0, "(?>(x)|)*$", "aaa" },
|
||||||
|
{ MU, A, 0, 0, "(?>x|())*$", "aaa" },
|
||||||
|
{ MU, A, 0, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
|
||||||
|
{ MU, A, 0, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
|
||||||
|
{ MU, A, 0, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
|
||||||
|
{ MU, A, 0, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
|
||||||
|
{ MU, A, 0, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
|
||||||
|
{ MU, A, 0, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
|
||||||
|
{ MU, A, 0, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
|
||||||
|
{ MU, A, 0, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
|
||||||
|
{ MU, A, 0, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
|
||||||
|
{ MU, A, 0, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
|
||||||
|
{ MU, A, 0, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
|
||||||
|
{ MU, A, 0, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
|
||||||
|
{ MU, A, 0, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
|
||||||
|
{ MU, A, 0, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
|
||||||
|
{ CM, A, 0, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
|
||||||
|
{ MU, A, 0, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
|
||||||
|
{ MU, A, 0, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
|
||||||
|
{ MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
|
||||||
|
{ MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
|
||||||
|
{ MU, A, 0, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
|
||||||
|
{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
|
||||||
|
{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
|
||||||
|
{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
|
||||||
|
{ MU, A, 0, 0, "(c(ab)?+ab)+", "cabcababcab" },
|
||||||
|
{ MU, A, 0, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(?>a*|)a", "aaa" },
|
||||||
|
|
||||||
|
/* Possessive quantifiers. */
|
||||||
|
{ MU, A, 0, 0, "(?:a|b)++m", "mababbaaxababbaam" },
|
||||||
|
{ MU, A, 0, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
|
||||||
|
{ MU, A, 0, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
|
||||||
|
{ MU, A, 0, 0, "(a|b)++m", "mababbaaxababbaam" },
|
||||||
|
{ MU, A, 0, 0, "(a|b)*+m", "mababbaaxababbaam" },
|
||||||
|
{ MU, A, 0, 0, "(a|b)*+m", "ababbaaxababbaam" },
|
||||||
|
{ MU, A, 0, 0, "(a|b(*ACCEPT))++m", "maaxab" },
|
||||||
|
{ MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxm" },
|
||||||
|
{ MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
|
||||||
|
{ MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxm" },
|
||||||
|
{ MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
|
||||||
|
{ MU, A, 0, 0, "(b*)++m", "bxbbxbbbxm" },
|
||||||
|
{ MU, A, 0, 0, "(b*)++m", "bxbbxbbbxbbm" },
|
||||||
|
{ MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxm" },
|
||||||
|
{ MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxbbm" },
|
||||||
|
{ MU, A, 0, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
|
||||||
|
{ MU, A, 0, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
|
||||||
|
{ MU, A, 0, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
|
||||||
|
{ MU, A, 0, 0, "(a|(b))++m", "mababbaaxababbaam" },
|
||||||
|
{ MU, A, 0, 0, "((a)|b)*+m", "mababbaaxababbaam" },
|
||||||
|
{ MU, A, 0, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
|
||||||
|
{ MU, A, 0, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
|
||||||
|
{ MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxm" },
|
||||||
|
{ MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
|
||||||
|
{ MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
|
||||||
|
{ MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
|
||||||
|
{ MU, A, 0, 0, "((b*))++m", "bxbbxbbbxm" },
|
||||||
|
{ MU, A, 0, 0, "((b*))++m", "bxbbxbbbxbbm" },
|
||||||
|
{ MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxm" },
|
||||||
|
{ MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxbbm" },
|
||||||
|
{ MU, A, 0, 0, "(A)*+$", "ABC" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
|
||||||
|
{ MU, A, 0, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
|
||||||
|
{ MU, A, 0, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
|
||||||
|
{ MU, A, 0, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
|
||||||
|
{ MU, A, 0, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
|
||||||
|
|
||||||
|
/* Back references. */
|
||||||
|
{ MU, A, 0, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
|
||||||
|
{ CMU, A, 0, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
|
||||||
|
{ CM, A, 0, 0, "(a{2,4})\\1", "AaAaaAaA" },
|
||||||
|
{ MU, A, 0, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
|
||||||
|
{ MU, A, 0, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
|
||||||
|
{ MU, A, 0, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
|
||||||
|
{ MU, A, 0, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
|
||||||
|
{ MU, A, 0, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
|
||||||
|
{ MU, A, 0, 0, "(?:(aa)|b)\\1?b", "bb" },
|
||||||
|
{ CMU, A, 0, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
|
||||||
|
{ MU, A, 0, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
|
||||||
|
{ CMU, A, 0, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
|
||||||
|
{ MU, A, 0, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
|
||||||
|
{ CM, A, 0, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
|
||||||
|
{ MU, A, 0, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
|
||||||
|
{ MU, A, 0, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
|
||||||
|
{ M, A, 0, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
|
||||||
|
{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
|
||||||
|
{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
|
||||||
|
{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
|
||||||
|
{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
|
||||||
|
{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
|
||||||
|
{ CMUP, A, 0, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
|
||||||
|
{ MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
|
||||||
|
{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
|
||||||
|
{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
|
||||||
|
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
|
||||||
|
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
|
||||||
|
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
|
||||||
|
{ MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
|
||||||
|
{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
|
||||||
|
{ MU | PCRE2_DUPNAMES, A, 0, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
|
||||||
|
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
|
||||||
|
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
|
||||||
|
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
|
||||||
|
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
|
||||||
|
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
|
||||||
|
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
|
||||||
|
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
|
||||||
|
{ MU | PCRE2_DUPNAMES, A, 0, 0, "^(?P<NAME>..)(?P<NAME>..)\\k<NAME>{2,4}", "AaAAAaAaAaaA" },
|
||||||
|
{ MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "(a)|\\1+c", "xxc" },
|
||||||
|
{ MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\1+?()", "" },
|
||||||
|
|
||||||
|
/* Assertions. */
|
||||||
|
{ MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
|
||||||
|
{ MU, A, 0, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
|
||||||
|
{ MU, A, 0, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
|
||||||
|
{ MU, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
|
||||||
|
{ MU, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
|
||||||
|
{ M, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
|
||||||
|
{ M, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
|
||||||
|
{ MU, A, 0, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
|
||||||
|
{ MU, A, 0, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
|
||||||
|
{ MU, A, 0, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
|
||||||
|
{ MU, A, 0, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
|
||||||
|
{ MU, A, 0, 0, "((?(?=(a))a)+k)", "bbak" },
|
||||||
|
{ MU, A, 0, 0, "((?(?=a)a)+k)", "bbak" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
|
||||||
|
{ MU, A, 0, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
|
||||||
|
{ MU, A, 0, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
|
||||||
|
{ MU, A, 0, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
|
||||||
|
{ MU, A, 0, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
|
||||||
|
{ MU, A, 0, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
|
||||||
|
{ MU, A, 0, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
|
||||||
|
{ MU, A, 0, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
|
||||||
|
{ MU, A, 0, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
|
||||||
|
{ MU, A, 0, 0, "a(?=(?C)\\B(?C`x`))b", "ab" },
|
||||||
|
{ MU, A, 0, 0, "a(?!(?C)\\B(?C`x`))bb|ab", "abb" },
|
||||||
|
{ MU, A, 0, 0, "a(?=\\b|(?C)\\B(?C`x`))b", "ab" },
|
||||||
|
{ MU, A, 0, 0, "a(?!\\b|(?C)\\B(?C`x`))bb|ab", "abb" },
|
||||||
|
{ MU, A, 0, 0, "c(?(?=(?C)\\B(?C`x`))ab|a)", "cab" },
|
||||||
|
{ MU, A, 0, 0, "c(?(?!(?C)\\B(?C`x`))ab|a)", "cab" },
|
||||||
|
{ MU, A, 0, 0, "c(?(?=\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
|
||||||
|
{ MU, A, 0, 0, "c(?(?!\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
|
||||||
|
{ MU, A, 0, 0, "a(?=)b", "ab" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "a(?!)b", "ab" },
|
||||||
|
{ MU, A, 0, 0, "(?(?<!|(|a)))", "a" },
|
||||||
|
|
||||||
|
/* Not empty, ACCEPT, FAIL */
|
||||||
|
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
|
||||||
|
{ MU, A, PCRE2_NOTEMPTY, 0, "a*", "bcaad" },
|
||||||
|
{ MU, A, PCRE2_NOTEMPTY, 0, "a*?", "bcaad" },
|
||||||
|
{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
|
||||||
|
{ MU, A, 0, 0, "a(*ACCEPT)b", "ab" },
|
||||||
|
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
|
||||||
|
{ MU, A, PCRE2_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
|
||||||
|
{ MU, A, PCRE2_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
|
||||||
|
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
|
||||||
|
{ MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
|
||||||
|
{ MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
|
||||||
|
{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
|
||||||
|
{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
|
||||||
|
{ MU, A, 0, 0, "((a(*ACCEPT)b))", "ab" },
|
||||||
|
{ MU, A, 0, 0, "(a(*FAIL)a|a)", "aaa" },
|
||||||
|
{ MU, A, 0, 0, "(?=ab(*ACCEPT)b)a", "ab" },
|
||||||
|
{ MU, A, 0, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
|
||||||
|
{ MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
|
||||||
|
{ MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
|
||||||
|
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?=A)", "AB" },
|
||||||
|
{ MU | PCRE2_ENDANCHORED, A, 0, 0, "aa(*ACCEPT)aa", "aaa" },
|
||||||
|
|
||||||
|
/* Conditional blocks. */
|
||||||
|
{ MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
|
||||||
|
{ MU, A, 0, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
|
||||||
|
{ MU, A, 0, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
|
||||||
|
{ MU, A, 0, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
|
||||||
|
{ MU, A, 0, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
|
||||||
|
{ MU, A, 0, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
|
||||||
|
{ MU, A, 0, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
|
||||||
|
{ MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
|
||||||
|
{ MU, A, 0, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
|
||||||
|
{ MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
|
||||||
|
{ MU, A, 0, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
|
||||||
|
{ MU, A, 0, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
|
||||||
|
{ MU, A, 0, 0, "(?(?=a)ab)", "a" },
|
||||||
|
{ MU, A, 0, 0, "(?(?<!b)c)", "b" },
|
||||||
|
{ MU, A, 0, 0, "(?(DEFINE)a(b))", "a" },
|
||||||
|
{ MU, A, 0, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
|
||||||
|
{ MU, A, 0, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
|
||||||
|
{ MU, A, 0, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
|
||||||
|
{ MU, A, 0, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
|
||||||
|
{ MU, A, 0, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
|
||||||
|
{ MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
|
||||||
|
{ MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cbb" },
|
||||||
|
{ MU, A, 0, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
|
||||||
|
{ MU, A, 0, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
|
||||||
|
{ MU, A, 0, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
|
||||||
|
{ MU, A, 0, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
|
||||||
|
{ MU, A, 0, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
|
||||||
|
{ MU, A, 0, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
|
||||||
|
{ MU, A, 0, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
|
||||||
|
{ MU, A, 0, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
|
||||||
|
{ MU, A, 0, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
|
||||||
|
{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
|
||||||
|
{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
|
||||||
|
{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
|
||||||
|
{ MU, A, 0, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
|
||||||
|
{ MU, A, 0, 0, "(?(?!)a|b)", "ab" },
|
||||||
|
{ MU, A, 0, 0, "(?(?!)a)", "ab" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(?(?!)a|b)", "ac" },
|
||||||
|
|
||||||
|
/* Set start of match. */
|
||||||
|
{ MU, A, 0, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
|
||||||
|
{ MU, A, 0, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
|
||||||
|
{ MU, A, 0, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
|
||||||
|
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
|
||||||
|
{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
|
||||||
|
|
||||||
|
/* First line. */
|
||||||
|
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
|
||||||
|
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
|
||||||
|
{ MU | PCRE2_FIRSTLINE, A, 0, 0, "(?<=a)", "a" },
|
||||||
|
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[^a][^b]", "ab" },
|
||||||
|
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "a", "\na" },
|
||||||
|
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[abc]", "\na" },
|
||||||
|
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^a", "\na" },
|
||||||
|
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
|
||||||
|
{ MU | PCRE2_FIRSTLINE, A, 0, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
|
||||||
|
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\xc2\x85#" },
|
||||||
|
{ M | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\x85#" },
|
||||||
|
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
|
||||||
|
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
|
||||||
|
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
|
||||||
|
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, "a", "\ra" },
|
||||||
|
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
|
||||||
|
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
|
||||||
|
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 1, ".", "\r\n" },
|
||||||
|
{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_LF, 0, 0 | F_NOMATCH, "ab.", "ab" },
|
||||||
|
{ MU | PCRE2_FIRSTLINE, A, 0, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
|
||||||
|
{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_ANY, 0, 0, "....a", "012\n0a" },
|
||||||
|
{ MU | PCRE2_FIRSTLINE, A, 0, 0, "[aC]", "a" },
|
||||||
|
|
||||||
|
/* Recurse. */
|
||||||
|
{ MU, A, 0, 0, "(a)(?1)", "aa" },
|
||||||
|
{ MU, A, 0, 0, "((a))(?1)", "aa" },
|
||||||
|
{ MU, A, 0, 0, "(b|a)(?1)", "aa" },
|
||||||
|
{ MU, A, 0, 0, "(b|(a))(?1)", "aa" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
|
||||||
|
{ MU, A, 0, 0, "((a)(b)(?:a*))(?1)", "abab" },
|
||||||
|
{ MU, A, 0, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
|
||||||
|
{ MU, A, 0, 0, "((?2)b|(a)){2}(?1)", "aabab" },
|
||||||
|
{ MU, A, 0, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
|
||||||
|
{ MU, A, 0, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
|
||||||
|
{ MU, A, 0, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
|
||||||
|
{ MU, A, 0, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
|
||||||
|
{ MU, A, 0, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
|
||||||
|
{ MU, A, 0, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
|
||||||
|
{ MU, A, 0, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
|
||||||
|
{ MU, A, 0, 0, "b|<(?R)*>", "<<b>" },
|
||||||
|
{ MU, A, 0, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
|
||||||
|
{ MU, A, 0, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
|
||||||
|
{ MU, A, 0, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
|
||||||
|
{ MU, A, 0, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
|
||||||
|
{ MU, A, 0, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
|
||||||
|
{ MU, A, 0, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
|
||||||
|
{ MU, A, 0, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
|
||||||
|
{ MU, A, 0, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
|
||||||
|
{ MU, A, 0, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
|
||||||
|
{ MU, A, 0, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
|
||||||
|
{ MU, A, 0, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
|
||||||
|
{ MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
|
||||||
|
{ MU, A, 0, 0, "((.)(?:.|\\2(?1))){0}#(?1)#", "#aabbccdde# #aabbccddee#" },
|
||||||
|
{ MU, A, 0, 0, "((.)(?:\\2|\\2{4}b)){0}#(?:(?1))+#", "#aaaab# #aaaaab#" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(?1)$((.|\\2xx){1,2})", "abc" },
|
||||||
|
|
||||||
|
/* 16 bit specific tests. */
|
||||||
|
{ CM, A, 0, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
|
||||||
|
{ CM, A, 0, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
|
||||||
|
{ CM, A, 0, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
|
||||||
|
{ CM, A, 0, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
|
||||||
|
{ CM, A, 0, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
|
||||||
|
{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
|
||||||
|
{ CM, A, 0, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
|
||||||
|
{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
|
||||||
|
{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
|
||||||
|
{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
|
||||||
|
{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
|
||||||
|
{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
|
||||||
|
{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
|
||||||
|
{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
|
||||||
|
{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
|
||||||
|
{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
|
||||||
|
{ M, A, 0, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
|
||||||
|
{ M, A, 0, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
|
||||||
|
{ CM, A, 0, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
|
||||||
|
{ CM, A, 0, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
|
||||||
|
{ CM, A, 0, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
|
||||||
|
{ CM, A, 0, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
|
||||||
|
{ CM | PCRE2_EXTENDED, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
|
||||||
|
{ CM, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
|
||||||
|
{ CM, A, 0, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
|
||||||
|
{ M, PCRE2_NEWLINE_ANY, 0, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
|
||||||
|
{ 0, BSR(PCRE2_BSR_UNICODE), 0, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
|
||||||
|
{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
|
||||||
|
{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
|
||||||
|
{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
|
||||||
|
{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
|
||||||
|
|
||||||
|
/* Partial matching. */
|
||||||
|
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "ab", "a" },
|
||||||
|
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "ab|a", "a" },
|
||||||
|
{ MU, A, PCRE2_PARTIAL_HARD, 0, "ab|a", "a" },
|
||||||
|
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "\\b#", "a" },
|
||||||
|
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
|
||||||
|
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
|
||||||
|
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a\\B", "a" },
|
||||||
|
{ MU, A, PCRE2_PARTIAL_HARD, 0, "a\\b", "a" },
|
||||||
|
{ M | PCRE2_DUPNAMES, A, PCRE2_PARTIAL_HARD, 0, "^(?P<NAME>..)(?P<NAME>..)\\k<NAME>{2,4}", "AaAAAaAaAaA" },
|
||||||
|
{ M | PCRE2_DUPNAMES, A, PCRE2_PARTIAL_HARD, 0, "^(?P<NAME>..)(?P<NAME>..)\\k<NAME>{2,4}", "AaAAAaAaAaa" },
|
||||||
|
|
||||||
|
/* (*MARK) verb. */
|
||||||
|
{ MU, A, 0, 0, "a(*MARK:aa)a", "ababaa" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
|
||||||
|
{ MU, A, 0, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
|
||||||
|
{ MU, A, 0, 0, "(?>a(*:aa))b|ac", "ac" },
|
||||||
|
{ MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
|
||||||
|
{ MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
|
||||||
|
{ MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
|
||||||
|
{ MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
|
||||||
|
{ MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
|
||||||
|
{ MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(*:mark)m", "a" },
|
||||||
|
|
||||||
|
/* (*COMMIT) verb. */
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
|
||||||
|
{ MU, A, 0, 0, "aa(*COMMIT)b", "xaxaab" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
|
||||||
|
|
||||||
|
/* (*PRUNE) verb. */
|
||||||
|
{ MU, A, 0, 0, "aa\\K(*PRUNE)b", "aaab" },
|
||||||
|
{ MU, A, 0, 0, "aa(*PRUNE:bb)b|a", "aa" },
|
||||||
|
{ MU, A, 0, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
|
||||||
|
{ MU, A, 0, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
|
||||||
|
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
|
||||||
|
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
|
||||||
|
{ MU, A, 0, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
|
||||||
|
{ MU, A, 0, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
|
||||||
|
{ MU, A, 0, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
|
||||||
|
{ MU, A, 0, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
|
||||||
|
{ MU, A, 0, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
|
||||||
|
{ MU, A, 0, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
|
||||||
|
{ MU, A, 0, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
|
||||||
|
{ MU, A, 0, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
|
||||||
|
{ MU, A, 0, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
|
||||||
|
{ MU, A, 0, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
|
||||||
|
{ MU, A, 0, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
|
||||||
|
{ MU, A, 0, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
|
||||||
|
|
||||||
|
/* (*SKIP) verb. */
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
|
||||||
|
{ MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," },
|
||||||
|
{ MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" },
|
||||||
|
|
||||||
|
/* (*THEN) verb. */
|
||||||
|
{ MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
|
||||||
|
{ MU, A, 0, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
|
||||||
|
{ MU, A, 0, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
|
||||||
|
{ MU, A, 0, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
|
||||||
|
{ MU, A, 0, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
|
||||||
|
{ MU, A, 0, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
|
||||||
|
{ MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
|
||||||
|
{ MU, A, 0, 0, "(?=(*THEN: ))* ", " " },
|
||||||
|
{ MU, A, 0, 0, "a(*THEN)(?R) |", "a" },
|
||||||
|
{ MU, A, 0, 0 | F_NOMATCH, "(?<!(*THEN)a|(*THEN)b|(*THEN)ab?|(*THEN)ba?|)", "c" },
|
||||||
|
|
||||||
|
/* Recurse and control verbs. */
|
||||||
|
{ MU, A, 0, 0, "(a(*ACCEPT)b){0}a(?1)b", "aacaabb" },
|
||||||
|
{ MU, A, 0, 0, "((a)\\2(*ACCEPT)b){0}a(?1)b", "aaacaaabb" },
|
||||||
|
{ MU, A, 0, 0, "((ab|a(*ACCEPT)x)+|ababababax){0}_(?1)_", "_ababababax_ _ababababa_" },
|
||||||
|
{ MU, A, 0, 0, "((.)(?:A(*ACCEPT)|(?1)\\2)){0}_(?1)_", "_bcdaAdcb_bcdaAdcb_" },
|
||||||
|
{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_", "_ab_" },
|
||||||
|
{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_|(_aa_)", "_aa_" },
|
||||||
|
{ MU, A, 0, 0, "(a(*COMMIT)(?:b|bb)|c(*ACCEPT)d|dd){0}_(?1)+_", "_ax_ _cd_ _abbb_ _abcd_ _abbcdd_" },
|
||||||
|
{ MU, A, 0, 0, "((.)(?:.|(*COMMIT)\\2{3}(*ACCEPT).*|.*)){0}_(?1){0,4}_", "_aaaabbbbccccddd_ _aaaabbbbccccdddd_" },
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
/* Script runs and iterations. */
|
||||||
|
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
||||||
|
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
||||||
|
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
||||||
|
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
||||||
|
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
||||||
|
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)++#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
||||||
|
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)?#", "!ab!abc!ab!ab#" },
|
||||||
|
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)??#", "!ab!abc!ab!ab#" },
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
/* Deep recursion. */
|
||||||
|
{ MU, A, 0, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
|
||||||
|
{ MU, A, 0, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
|
||||||
|
{ MU, A, 0, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
|
||||||
|
|
||||||
|
/* Deep recursion: Stack limit reached. */
|
||||||
|
{ M, A, 0, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
|
||||||
|
{ M, A, 0, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
|
||||||
|
{ M, A, 0, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
|
||||||
|
{ M, A, 0, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
|
||||||
|
{ M, A, 0, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
|
||||||
|
|
||||||
|
{ 0, 0, 0, 0, NULL, NULL }
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef SUPPORT_PCRE2_8
|
||||||
|
static pcre2_jit_stack_8* callback8(void *arg)
|
||||||
|
{
|
||||||
|
return (pcre2_jit_stack_8 *)arg;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef SUPPORT_PCRE2_16
|
||||||
|
static pcre2_jit_stack_16* callback16(void *arg)
|
||||||
|
{
|
||||||
|
return (pcre2_jit_stack_16 *)arg;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef SUPPORT_PCRE2_32
|
||||||
|
static pcre2_jit_stack_32* callback32(void *arg)
|
||||||
|
{
|
||||||
|
return (pcre2_jit_stack_32 *)arg;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef SUPPORT_PCRE2_8
|
||||||
|
static pcre2_jit_stack_8 *stack8;
|
||||||
|
|
||||||
|
static pcre2_jit_stack_8 *getstack8(void)
|
||||||
|
{
|
||||||
|
if (!stack8)
|
||||||
|
stack8 = pcre2_jit_stack_create_8(1, 1024 * 1024, NULL);
|
||||||
|
return stack8;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void setstack8(pcre2_match_context_8 *mcontext)
|
||||||
|
{
|
||||||
|
if (!mcontext) {
|
||||||
|
if (stack8)
|
||||||
|
pcre2_jit_stack_free_8(stack8);
|
||||||
|
stack8 = NULL;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
pcre2_jit_stack_assign_8(mcontext, callback8, getstack8());
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_PCRE2_8 */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_PCRE2_16
|
||||||
|
static pcre2_jit_stack_16 *stack16;
|
||||||
|
|
||||||
|
static pcre2_jit_stack_16 *getstack16(void)
|
||||||
|
{
|
||||||
|
if (!stack16)
|
||||||
|
stack16 = pcre2_jit_stack_create_16(1, 1024 * 1024, NULL);
|
||||||
|
return stack16;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void setstack16(pcre2_match_context_16 *mcontext)
|
||||||
|
{
|
||||||
|
if (!mcontext) {
|
||||||
|
if (stack16)
|
||||||
|
pcre2_jit_stack_free_16(stack16);
|
||||||
|
stack16 = NULL;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
pcre2_jit_stack_assign_16(mcontext, callback16, getstack16());
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_PCRE2_16 */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_PCRE2_32
|
||||||
|
static pcre2_jit_stack_32 *stack32;
|
||||||
|
|
||||||
|
static pcre2_jit_stack_32 *getstack32(void)
|
||||||
|
{
|
||||||
|
if (!stack32)
|
||||||
|
stack32 = pcre2_jit_stack_create_32(1, 1024 * 1024, NULL);
|
||||||
|
return stack32;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void setstack32(pcre2_match_context_32 *mcontext)
|
||||||
|
{
|
||||||
|
if (!mcontext) {
|
||||||
|
if (stack32)
|
||||||
|
pcre2_jit_stack_free_32(stack32);
|
||||||
|
stack32 = NULL;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
pcre2_jit_stack_assign_32(mcontext, callback32, getstack32());
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_PCRE2_32 */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_PCRE2_16
|
||||||
|
|
||||||
|
static int convert_utf8_to_utf16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int *offsetmap, int max_length)
|
||||||
|
{
|
||||||
|
PCRE2_SPTR8 iptr = input;
|
||||||
|
PCRE2_UCHAR16 *optr = output;
|
||||||
|
unsigned int c;
|
||||||
|
|
||||||
|
if (max_length == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
while (*iptr && max_length > 1) {
|
||||||
|
c = 0;
|
||||||
|
if (offsetmap)
|
||||||
|
*offsetmap++ = (int)(iptr - (unsigned char*)input);
|
||||||
|
|
||||||
|
if (*iptr < 0xc0)
|
||||||
|
c = *iptr++;
|
||||||
|
else if (!(*iptr & 0x20)) {
|
||||||
|
c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
|
||||||
|
iptr += 2;
|
||||||
|
} else if (!(*iptr & 0x10)) {
|
||||||
|
c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
|
||||||
|
iptr += 3;
|
||||||
|
} else if (!(*iptr & 0x08)) {
|
||||||
|
c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
|
||||||
|
iptr += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c < 65536) {
|
||||||
|
*optr++ = c;
|
||||||
|
max_length--;
|
||||||
|
} else if (max_length <= 2) {
|
||||||
|
*optr = '\0';
|
||||||
|
return (int)(optr - output);
|
||||||
|
} else {
|
||||||
|
c -= 0x10000;
|
||||||
|
*optr++ = 0xd800 | ((c >> 10) & 0x3ff);
|
||||||
|
*optr++ = 0xdc00 | (c & 0x3ff);
|
||||||
|
max_length -= 2;
|
||||||
|
if (offsetmap)
|
||||||
|
offsetmap++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (offsetmap)
|
||||||
|
*offsetmap = (int)(iptr - (unsigned char*)input);
|
||||||
|
*optr = '\0';
|
||||||
|
return (int)(optr - output);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int copy_char8_to_char16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int max_length)
|
||||||
|
{
|
||||||
|
PCRE2_SPTR8 iptr = input;
|
||||||
|
PCRE2_UCHAR16 *optr = output;
|
||||||
|
|
||||||
|
if (max_length == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
while (*iptr && max_length > 1) {
|
||||||
|
*optr++ = *iptr++;
|
||||||
|
max_length--;
|
||||||
|
}
|
||||||
|
*optr = '\0';
|
||||||
|
return (int)(optr - output);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define REGTEST_MAX_LENGTH16 4096
|
||||||
|
static PCRE2_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
|
||||||
|
static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
|
||||||
|
|
||||||
|
#endif /* SUPPORT_PCRE2_16 */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_PCRE2_32
|
||||||
|
|
||||||
|
static int convert_utf8_to_utf32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int *offsetmap, int max_length)
|
||||||
|
{
|
||||||
|
PCRE2_SPTR8 iptr = input;
|
||||||
|
PCRE2_UCHAR32 *optr = output;
|
||||||
|
unsigned int c;
|
||||||
|
|
||||||
|
if (max_length == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
while (*iptr && max_length > 1) {
|
||||||
|
c = 0;
|
||||||
|
if (offsetmap)
|
||||||
|
*offsetmap++ = (int)(iptr - (unsigned char*)input);
|
||||||
|
|
||||||
|
if (*iptr < 0xc0)
|
||||||
|
c = *iptr++;
|
||||||
|
else if (!(*iptr & 0x20)) {
|
||||||
|
c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
|
||||||
|
iptr += 2;
|
||||||
|
} else if (!(*iptr & 0x10)) {
|
||||||
|
c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
|
||||||
|
iptr += 3;
|
||||||
|
} else if (!(*iptr & 0x08)) {
|
||||||
|
c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
|
||||||
|
iptr += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
*optr++ = c;
|
||||||
|
max_length--;
|
||||||
|
}
|
||||||
|
if (offsetmap)
|
||||||
|
*offsetmap = (int)(iptr - (unsigned char*)input);
|
||||||
|
*optr = 0;
|
||||||
|
return (int)(optr - output);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int copy_char8_to_char32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int max_length)
|
||||||
|
{
|
||||||
|
PCRE2_SPTR8 iptr = input;
|
||||||
|
PCRE2_UCHAR32 *optr = output;
|
||||||
|
|
||||||
|
if (max_length == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
while (*iptr && max_length > 1) {
|
||||||
|
*optr++ = *iptr++;
|
||||||
|
max_length--;
|
||||||
|
}
|
||||||
|
*optr = '\0';
|
||||||
|
return (int)(optr - output);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define REGTEST_MAX_LENGTH32 4096
|
||||||
|
static PCRE2_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
|
||||||
|
static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
|
||||||
|
|
||||||
|
#endif /* SUPPORT_PCRE2_32 */
|
||||||
|
|
||||||
|
static int check_ascii(const char *input)
|
||||||
|
{
|
||||||
|
const unsigned char *ptr = (unsigned char *)input;
|
||||||
|
while (*ptr) {
|
||||||
|
if (*ptr > 127)
|
||||||
|
return 0;
|
||||||
|
ptr++;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define OVECTOR_SIZE 15
|
||||||
|
|
||||||
|
static int regression_tests(void)
|
||||||
|
{
|
||||||
|
struct regression_test_case *current = regression_test_cases;
|
||||||
|
int error;
|
||||||
|
PCRE2_SIZE err_offs;
|
||||||
|
int is_successful;
|
||||||
|
int is_ascii;
|
||||||
|
int total = 0;
|
||||||
|
int successful = 0;
|
||||||
|
int successful_row = 0;
|
||||||
|
int counter = 0;
|
||||||
|
int jit_compile_mode;
|
||||||
|
int utf = 0;
|
||||||
|
uint32_t disabled_options = 0;
|
||||||
|
int i;
|
||||||
|
#ifdef SUPPORT_PCRE2_8
|
||||||
|
pcre2_code_8 *re8;
|
||||||
|
pcre2_compile_context_8 *ccontext8;
|
||||||
|
pcre2_match_data_8 *mdata8_1;
|
||||||
|
pcre2_match_data_8 *mdata8_2;
|
||||||
|
pcre2_match_context_8 *mcontext8;
|
||||||
|
PCRE2_SIZE *ovector8_1 = NULL;
|
||||||
|
PCRE2_SIZE *ovector8_2 = NULL;
|
||||||
|
int return_value8[2];
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_16
|
||||||
|
pcre2_code_16 *re16;
|
||||||
|
pcre2_compile_context_16 *ccontext16;
|
||||||
|
pcre2_match_data_16 *mdata16_1;
|
||||||
|
pcre2_match_data_16 *mdata16_2;
|
||||||
|
pcre2_match_context_16 *mcontext16;
|
||||||
|
PCRE2_SIZE *ovector16_1 = NULL;
|
||||||
|
PCRE2_SIZE *ovector16_2 = NULL;
|
||||||
|
int return_value16[2];
|
||||||
|
int length16;
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_32
|
||||||
|
pcre2_code_32 *re32;
|
||||||
|
pcre2_compile_context_32 *ccontext32;
|
||||||
|
pcre2_match_data_32 *mdata32_1;
|
||||||
|
pcre2_match_data_32 *mdata32_2;
|
||||||
|
pcre2_match_context_32 *mcontext32;
|
||||||
|
PCRE2_SIZE *ovector32_1 = NULL;
|
||||||
|
PCRE2_SIZE *ovector32_2 = NULL;
|
||||||
|
int return_value32[2];
|
||||||
|
int length32;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined SUPPORT_PCRE2_8
|
||||||
|
PCRE2_UCHAR8 cpu_info[128];
|
||||||
|
#elif defined SUPPORT_PCRE2_16
|
||||||
|
PCRE2_UCHAR16 cpu_info[128];
|
||||||
|
#elif defined SUPPORT_PCRE2_32
|
||||||
|
PCRE2_UCHAR32 cpu_info[128];
|
||||||
|
#endif
|
||||||
|
#if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
|
||||||
|
int return_value;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* This test compares the behaviour of interpreter and JIT. Although disabling
|
||||||
|
utf or ucp may make tests fail, if the pcre2_match result is the SAME, it is
|
||||||
|
still considered successful from pcre2_jit_test point of view. */
|
||||||
|
|
||||||
|
#if defined SUPPORT_PCRE2_8
|
||||||
|
pcre2_config_8(PCRE2_CONFIG_JITTARGET, &cpu_info);
|
||||||
|
#elif defined SUPPORT_PCRE2_16
|
||||||
|
pcre2_config_16(PCRE2_CONFIG_JITTARGET, &cpu_info);
|
||||||
|
#elif defined SUPPORT_PCRE2_32
|
||||||
|
pcre2_config_32(PCRE2_CONFIG_JITTARGET, &cpu_info);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
printf("Running JIT regression tests\n");
|
||||||
|
printf(" target CPU of SLJIT compiler: ");
|
||||||
|
for (i = 0; cpu_info[i]; i++)
|
||||||
|
printf("%c", (char)(cpu_info[i]));
|
||||||
|
printf("\n");
|
||||||
|
|
||||||
|
#if defined SUPPORT_PCRE2_8
|
||||||
|
pcre2_config_8(PCRE2_CONFIG_UNICODE, &utf);
|
||||||
|
#elif defined SUPPORT_PCRE2_16
|
||||||
|
pcre2_config_16(PCRE2_CONFIG_UNICODE, &utf);
|
||||||
|
#elif defined SUPPORT_PCRE2_32
|
||||||
|
pcre2_config_32(PCRE2_CONFIG_UNICODE, &utf);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (!utf)
|
||||||
|
disabled_options |= PCRE2_UTF;
|
||||||
|
#ifdef SUPPORT_PCRE2_8
|
||||||
|
printf(" in 8 bit mode with UTF-8 %s:\n", utf ? "enabled" : "disabled");
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_16
|
||||||
|
printf(" in 16 bit mode with UTF-16 %s:\n", utf ? "enabled" : "disabled");
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_32
|
||||||
|
printf(" in 32 bit mode with UTF-32 %s:\n", utf ? "enabled" : "disabled");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
while (current->pattern) {
|
||||||
|
/* printf("\nPattern: %s :\n", current->pattern); */
|
||||||
|
total++;
|
||||||
|
is_ascii = 0;
|
||||||
|
if (!(current->start_offset & F_PROPERTY))
|
||||||
|
is_ascii = check_ascii(current->pattern) && check_ascii(current->input);
|
||||||
|
|
||||||
|
if (current->match_options & PCRE2_PARTIAL_SOFT)
|
||||||
|
jit_compile_mode = PCRE2_JIT_PARTIAL_SOFT;
|
||||||
|
else if (current->match_options & PCRE2_PARTIAL_HARD)
|
||||||
|
jit_compile_mode = PCRE2_JIT_PARTIAL_HARD;
|
||||||
|
else
|
||||||
|
jit_compile_mode = PCRE2_JIT_COMPLETE;
|
||||||
|
error = 0;
|
||||||
|
#ifdef SUPPORT_PCRE2_8
|
||||||
|
re8 = NULL;
|
||||||
|
ccontext8 = pcre2_compile_context_create_8(NULL);
|
||||||
|
if (ccontext8) {
|
||||||
|
if (GET_NEWLINE(current->newline))
|
||||||
|
pcre2_set_newline_8(ccontext8, GET_NEWLINE(current->newline));
|
||||||
|
if (GET_BSR(current->newline))
|
||||||
|
pcre2_set_bsr_8(ccontext8, GET_BSR(current->newline));
|
||||||
|
|
||||||
|
if (!(current->start_offset & F_NO8)) {
|
||||||
|
re8 = pcre2_compile_8((PCRE2_SPTR8)current->pattern, PCRE2_ZERO_TERMINATED,
|
||||||
|
current->compile_options & ~disabled_options,
|
||||||
|
&error, &err_offs, ccontext8);
|
||||||
|
|
||||||
|
if (!re8 && (utf || is_ascii))
|
||||||
|
printf("\n8 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
|
||||||
|
}
|
||||||
|
pcre2_compile_context_free_8(ccontext8);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
printf("\n8 bit: Cannot allocate compile context\n");
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_16
|
||||||
|
if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
|
||||||
|
convert_utf8_to_utf16((PCRE2_SPTR8)current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
|
||||||
|
else
|
||||||
|
copy_char8_to_char16((PCRE2_SPTR8)current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
|
||||||
|
|
||||||
|
re16 = NULL;
|
||||||
|
ccontext16 = pcre2_compile_context_create_16(NULL);
|
||||||
|
if (ccontext16) {
|
||||||
|
if (GET_NEWLINE(current->newline))
|
||||||
|
pcre2_set_newline_16(ccontext16, GET_NEWLINE(current->newline));
|
||||||
|
if (GET_BSR(current->newline))
|
||||||
|
pcre2_set_bsr_16(ccontext16, GET_BSR(current->newline));
|
||||||
|
|
||||||
|
if (!(current->start_offset & F_NO16)) {
|
||||||
|
re16 = pcre2_compile_16(regtest_buf16, PCRE2_ZERO_TERMINATED,
|
||||||
|
current->compile_options & ~disabled_options,
|
||||||
|
&error, &err_offs, ccontext16);
|
||||||
|
|
||||||
|
if (!re16 && (utf || is_ascii))
|
||||||
|
printf("\n16 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
|
||||||
|
}
|
||||||
|
pcre2_compile_context_free_16(ccontext16);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
printf("\n16 bit: Cannot allocate compile context\n");
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_32
|
||||||
|
if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
|
||||||
|
convert_utf8_to_utf32((PCRE2_SPTR8)current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
|
||||||
|
else
|
||||||
|
copy_char8_to_char32((PCRE2_SPTR8)current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
|
||||||
|
|
||||||
|
re32 = NULL;
|
||||||
|
ccontext32 = pcre2_compile_context_create_32(NULL);
|
||||||
|
if (ccontext32) {
|
||||||
|
if (GET_NEWLINE(current->newline))
|
||||||
|
pcre2_set_newline_32(ccontext32, GET_NEWLINE(current->newline));
|
||||||
|
if (GET_BSR(current->newline))
|
||||||
|
pcre2_set_bsr_32(ccontext32, GET_BSR(current->newline));
|
||||||
|
|
||||||
|
if (!(current->start_offset & F_NO32)) {
|
||||||
|
re32 = pcre2_compile_32(regtest_buf32, PCRE2_ZERO_TERMINATED,
|
||||||
|
current->compile_options & ~disabled_options,
|
||||||
|
&error, &err_offs, ccontext32);
|
||||||
|
|
||||||
|
if (!re32 && (utf || is_ascii))
|
||||||
|
printf("\n32 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
|
||||||
|
}
|
||||||
|
pcre2_compile_context_free_32(ccontext32);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
printf("\n32 bit: Cannot allocate compile context\n");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
counter++;
|
||||||
|
if ((counter & 0x3) != 0) {
|
||||||
|
#ifdef SUPPORT_PCRE2_8
|
||||||
|
setstack8(NULL);
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_16
|
||||||
|
setstack16(NULL);
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_32
|
||||||
|
setstack32(NULL);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SUPPORT_PCRE2_8
|
||||||
|
return_value8[0] = -1000;
|
||||||
|
return_value8[1] = -1000;
|
||||||
|
mdata8_1 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
|
||||||
|
mdata8_2 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
|
||||||
|
mcontext8 = pcre2_match_context_create_8(NULL);
|
||||||
|
if (!mdata8_1 || !mdata8_2 || !mcontext8) {
|
||||||
|
printf("\n8 bit: Cannot allocate match data\n");
|
||||||
|
pcre2_match_data_free_8(mdata8_1);
|
||||||
|
pcre2_match_data_free_8(mdata8_2);
|
||||||
|
pcre2_match_context_free_8(mcontext8);
|
||||||
|
pcre2_code_free_8(re8);
|
||||||
|
re8 = NULL;
|
||||||
|
} else {
|
||||||
|
ovector8_1 = pcre2_get_ovector_pointer_8(mdata8_1);
|
||||||
|
ovector8_2 = pcre2_get_ovector_pointer_8(mdata8_2);
|
||||||
|
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||||
|
ovector8_1[i] = (PCRE2_SIZE)(-2);
|
||||||
|
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||||
|
ovector8_2[i] = (PCRE2_SIZE)(-2);
|
||||||
|
pcre2_set_match_limit_8(mcontext8, 10000000);
|
||||||
|
}
|
||||||
|
if (re8) {
|
||||||
|
return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
|
||||||
|
current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, mcontext8);
|
||||||
|
|
||||||
|
if (pcre2_jit_compile_8(re8, jit_compile_mode)) {
|
||||||
|
printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern);
|
||||||
|
} else if ((counter & 0x1) != 0) {
|
||||||
|
setstack8(mcontext8);
|
||||||
|
return_value8[0] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
|
||||||
|
current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
|
||||||
|
} else {
|
||||||
|
pcre2_jit_stack_assign_8(mcontext8, NULL, getstack8());
|
||||||
|
return_value8[0] = pcre2_jit_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
|
||||||
|
current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef SUPPORT_PCRE2_16
|
||||||
|
return_value16[0] = -1000;
|
||||||
|
return_value16[1] = -1000;
|
||||||
|
mdata16_1 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
|
||||||
|
mdata16_2 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
|
||||||
|
mcontext16 = pcre2_match_context_create_16(NULL);
|
||||||
|
if (!mdata16_1 || !mdata16_2 || !mcontext16) {
|
||||||
|
printf("\n16 bit: Cannot allocate match data\n");
|
||||||
|
pcre2_match_data_free_16(mdata16_1);
|
||||||
|
pcre2_match_data_free_16(mdata16_2);
|
||||||
|
pcre2_match_context_free_16(mcontext16);
|
||||||
|
pcre2_code_free_16(re16);
|
||||||
|
re16 = NULL;
|
||||||
|
} else {
|
||||||
|
ovector16_1 = pcre2_get_ovector_pointer_16(mdata16_1);
|
||||||
|
ovector16_2 = pcre2_get_ovector_pointer_16(mdata16_2);
|
||||||
|
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||||
|
ovector16_1[i] = (PCRE2_SIZE)(-2);
|
||||||
|
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||||
|
ovector16_2[i] = (PCRE2_SIZE)(-2);
|
||||||
|
pcre2_set_match_limit_16(mcontext16, 10000000);
|
||||||
|
}
|
||||||
|
if (re16) {
|
||||||
|
if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
|
||||||
|
length16 = convert_utf8_to_utf16((PCRE2_SPTR8)current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
|
||||||
|
else
|
||||||
|
length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
|
||||||
|
|
||||||
|
return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16,
|
||||||
|
current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, mcontext16);
|
||||||
|
|
||||||
|
if (pcre2_jit_compile_16(re16, jit_compile_mode)) {
|
||||||
|
printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern);
|
||||||
|
} else if ((counter & 0x1) != 0) {
|
||||||
|
setstack16(mcontext16);
|
||||||
|
return_value16[0] = pcre2_match_16(re16, regtest_buf16, length16,
|
||||||
|
current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
|
||||||
|
} else {
|
||||||
|
pcre2_jit_stack_assign_16(mcontext16, NULL, getstack16());
|
||||||
|
return_value16[0] = pcre2_jit_match_16(re16, regtest_buf16, length16,
|
||||||
|
current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef SUPPORT_PCRE2_32
|
||||||
|
return_value32[0] = -1000;
|
||||||
|
return_value32[1] = -1000;
|
||||||
|
mdata32_1 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
|
||||||
|
mdata32_2 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
|
||||||
|
mcontext32 = pcre2_match_context_create_32(NULL);
|
||||||
|
if (!mdata32_1 || !mdata32_2 || !mcontext32) {
|
||||||
|
printf("\n32 bit: Cannot allocate match data\n");
|
||||||
|
pcre2_match_data_free_32(mdata32_1);
|
||||||
|
pcre2_match_data_free_32(mdata32_2);
|
||||||
|
pcre2_match_context_free_32(mcontext32);
|
||||||
|
pcre2_code_free_32(re32);
|
||||||
|
re32 = NULL;
|
||||||
|
} else {
|
||||||
|
ovector32_1 = pcre2_get_ovector_pointer_32(mdata32_1);
|
||||||
|
ovector32_2 = pcre2_get_ovector_pointer_32(mdata32_2);
|
||||||
|
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||||
|
ovector32_1[i] = (PCRE2_SIZE)(-2);
|
||||||
|
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||||
|
ovector32_2[i] = (PCRE2_SIZE)(-2);
|
||||||
|
pcre2_set_match_limit_32(mcontext32, 10000000);
|
||||||
|
}
|
||||||
|
if (re32) {
|
||||||
|
if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
|
||||||
|
length32 = convert_utf8_to_utf32((PCRE2_SPTR8)current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
|
||||||
|
else
|
||||||
|
length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
|
||||||
|
|
||||||
|
return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32,
|
||||||
|
current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, mcontext32);
|
||||||
|
|
||||||
|
if (pcre2_jit_compile_32(re32, jit_compile_mode)) {
|
||||||
|
printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern);
|
||||||
|
} else if ((counter & 0x1) != 0) {
|
||||||
|
setstack32(mcontext32);
|
||||||
|
return_value32[0] = pcre2_match_32(re32, regtest_buf32, length32,
|
||||||
|
current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
|
||||||
|
} else {
|
||||||
|
pcre2_jit_stack_assign_32(mcontext32, NULL, getstack32());
|
||||||
|
return_value32[0] = pcre2_jit_match_32(re32, regtest_buf32, length32,
|
||||||
|
current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
|
||||||
|
return_value8[0], return_value16[0], return_value32[0],
|
||||||
|
(int)ovector8_1[0], (int)ovector8_1[1],
|
||||||
|
(int)ovector16_1[0], (int)ovector16_1[1],
|
||||||
|
(int)ovector32_1[0], (int)ovector32_1[1],
|
||||||
|
(current->compile_options & PCRE2_CASELESS) ? "C" : ""); */
|
||||||
|
|
||||||
|
/* If F_DIFF is set, just run the test, but do not compare the results.
|
||||||
|
Segfaults can still be captured. */
|
||||||
|
|
||||||
|
is_successful = 1;
|
||||||
|
if (!(current->start_offset & F_DIFF)) {
|
||||||
|
#if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
|
||||||
|
if (!(current->start_offset & F_FORCECONV)) {
|
||||||
|
|
||||||
|
/* All results must be the same. */
|
||||||
|
#ifdef SUPPORT_PCRE2_8
|
||||||
|
if ((return_value = return_value8[0]) != return_value8[1]) {
|
||||||
|
printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
|
||||||
|
return_value8[0], return_value8[1], total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
} else
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_16
|
||||||
|
if ((return_value = return_value16[0]) != return_value16[1]) {
|
||||||
|
printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
|
||||||
|
return_value16[0], return_value16[1], total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
} else
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_32
|
||||||
|
if ((return_value = return_value32[0]) != return_value32[1]) {
|
||||||
|
printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
|
||||||
|
return_value32[0], return_value32[1], total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
} else
|
||||||
|
#endif
|
||||||
|
#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
|
||||||
|
if (return_value8[0] != return_value16[0]) {
|
||||||
|
printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
|
||||||
|
return_value8[0], return_value16[0],
|
||||||
|
total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
} else
|
||||||
|
#endif
|
||||||
|
#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
|
||||||
|
if (return_value8[0] != return_value32[0]) {
|
||||||
|
printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
|
||||||
|
return_value8[0], return_value32[0],
|
||||||
|
total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
} else
|
||||||
|
#endif
|
||||||
|
#if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
|
||||||
|
if (return_value16[0] != return_value32[0]) {
|
||||||
|
printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
|
||||||
|
return_value16[0], return_value32[0],
|
||||||
|
total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
} else
|
||||||
|
#endif
|
||||||
|
if (return_value >= 0 || return_value == PCRE2_ERROR_PARTIAL) {
|
||||||
|
if (return_value == PCRE2_ERROR_PARTIAL) {
|
||||||
|
return_value = 2;
|
||||||
|
} else {
|
||||||
|
return_value *= 2;
|
||||||
|
}
|
||||||
|
#ifdef SUPPORT_PCRE2_8
|
||||||
|
return_value8[0] = return_value;
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_16
|
||||||
|
return_value16[0] = return_value;
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_32
|
||||||
|
return_value32[0] = return_value;
|
||||||
|
#endif
|
||||||
|
/* Transform back the results. */
|
||||||
|
if (current->compile_options & PCRE2_UTF) {
|
||||||
|
#ifdef SUPPORT_PCRE2_16
|
||||||
|
for (i = 0; i < return_value; ++i) {
|
||||||
|
if (ovector16_1[i] != PCRE2_UNSET)
|
||||||
|
ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
|
||||||
|
if (ovector16_2[i] != PCRE2_UNSET)
|
||||||
|
ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_32
|
||||||
|
for (i = 0; i < return_value; ++i) {
|
||||||
|
if (ovector32_1[i] != PCRE2_UNSET)
|
||||||
|
ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
|
||||||
|
if (ovector32_2[i] != PCRE2_UNSET)
|
||||||
|
ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < return_value; ++i) {
|
||||||
|
#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
|
||||||
|
if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
|
||||||
|
printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
|
||||||
|
i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector16_1[i], (int)ovector16_2[i],
|
||||||
|
total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
|
||||||
|
if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
|
||||||
|
printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
|
||||||
|
i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
|
||||||
|
total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
|
||||||
|
if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) {
|
||||||
|
printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
|
||||||
|
i, (int)ovector16_1[i], (int)ovector16_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
|
||||||
|
total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
#endif /* more than one of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16 and SUPPORT_PCRE2_32 */
|
||||||
|
{
|
||||||
|
#ifdef SUPPORT_PCRE2_8
|
||||||
|
if (return_value8[0] != return_value8[1]) {
|
||||||
|
printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
||||||
|
return_value8[0], return_value8[1], total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
} else if (return_value8[0] >= 0 || return_value8[0] == PCRE2_ERROR_PARTIAL) {
|
||||||
|
if (return_value8[0] == PCRE2_ERROR_PARTIAL)
|
||||||
|
return_value8[0] = 2;
|
||||||
|
else
|
||||||
|
return_value8[0] *= 2;
|
||||||
|
|
||||||
|
for (i = 0; i < return_value8[0]; ++i)
|
||||||
|
if (ovector8_1[i] != ovector8_2[i]) {
|
||||||
|
printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
||||||
|
i, (int)ovector8_1[i], (int)ovector8_2[i], total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef SUPPORT_PCRE2_16
|
||||||
|
if (return_value16[0] != return_value16[1]) {
|
||||||
|
printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
||||||
|
return_value16[0], return_value16[1], total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
} else if (return_value16[0] >= 0 || return_value16[0] == PCRE2_ERROR_PARTIAL) {
|
||||||
|
if (return_value16[0] == PCRE2_ERROR_PARTIAL)
|
||||||
|
return_value16[0] = 2;
|
||||||
|
else
|
||||||
|
return_value16[0] *= 2;
|
||||||
|
|
||||||
|
for (i = 0; i < return_value16[0]; ++i)
|
||||||
|
if (ovector16_1[i] != ovector16_2[i]) {
|
||||||
|
printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
||||||
|
i, (int)ovector16_1[i], (int)ovector16_2[i], total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef SUPPORT_PCRE2_32
|
||||||
|
if (return_value32[0] != return_value32[1]) {
|
||||||
|
printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
||||||
|
return_value32[0], return_value32[1], total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
} else if (return_value32[0] >= 0 || return_value32[0] == PCRE2_ERROR_PARTIAL) {
|
||||||
|
if (return_value32[0] == PCRE2_ERROR_PARTIAL)
|
||||||
|
return_value32[0] = 2;
|
||||||
|
else
|
||||||
|
return_value32[0] *= 2;
|
||||||
|
|
||||||
|
for (i = 0; i < return_value32[0]; ++i)
|
||||||
|
if (ovector32_1[i] != ovector32_2[i]) {
|
||||||
|
printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
||||||
|
i, (int)ovector32_1[i], (int)ovector32_2[i], total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_successful) {
|
||||||
|
#ifdef SUPPORT_PCRE2_8
|
||||||
|
if (!(current->start_offset & F_NO8) && (utf || is_ascii)) {
|
||||||
|
if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
|
||||||
|
printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
|
||||||
|
total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
|
||||||
|
printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
|
||||||
|
total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_16
|
||||||
|
if (!(current->start_offset & F_NO16) && (utf || is_ascii)) {
|
||||||
|
if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
|
||||||
|
printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
|
||||||
|
total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
|
||||||
|
printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
|
||||||
|
total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_32
|
||||||
|
if (!(current->start_offset & F_NO32) && (utf || is_ascii)) {
|
||||||
|
if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
|
||||||
|
printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
|
||||||
|
total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
|
||||||
|
printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
|
||||||
|
total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_successful) {
|
||||||
|
#ifdef SUPPORT_PCRE2_8
|
||||||
|
if (re8 && !(current->start_offset & F_NO8) && pcre2_get_mark_8(mdata8_1) != pcre2_get_mark_8(mdata8_2)) {
|
||||||
|
printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
|
||||||
|
total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_16
|
||||||
|
if (re16 && !(current->start_offset & F_NO16) && pcre2_get_mark_16(mdata16_1) != pcre2_get_mark_16(mdata16_2)) {
|
||||||
|
printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
|
||||||
|
total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_32
|
||||||
|
if (re32 && !(current->start_offset & F_NO32) && pcre2_get_mark_32(mdata32_1) != pcre2_get_mark_32(mdata32_2)) {
|
||||||
|
printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
|
||||||
|
total, current->pattern, current->input);
|
||||||
|
is_successful = 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SUPPORT_PCRE2_8
|
||||||
|
pcre2_code_free_8(re8);
|
||||||
|
pcre2_match_data_free_8(mdata8_1);
|
||||||
|
pcre2_match_data_free_8(mdata8_2);
|
||||||
|
pcre2_match_context_free_8(mcontext8);
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_16
|
||||||
|
pcre2_code_free_16(re16);
|
||||||
|
pcre2_match_data_free_16(mdata16_1);
|
||||||
|
pcre2_match_data_free_16(mdata16_2);
|
||||||
|
pcre2_match_context_free_16(mcontext16);
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_32
|
||||||
|
pcre2_code_free_32(re32);
|
||||||
|
pcre2_match_data_free_32(mdata32_1);
|
||||||
|
pcre2_match_data_free_32(mdata32_2);
|
||||||
|
pcre2_match_context_free_32(mcontext32);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (is_successful) {
|
||||||
|
successful++;
|
||||||
|
successful_row++;
|
||||||
|
printf(".");
|
||||||
|
if (successful_row >= 60) {
|
||||||
|
successful_row = 0;
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
successful_row = 0;
|
||||||
|
|
||||||
|
fflush(stdout);
|
||||||
|
current++;
|
||||||
|
}
|
||||||
|
#ifdef SUPPORT_PCRE2_8
|
||||||
|
setstack8(NULL);
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_16
|
||||||
|
setstack16(NULL);
|
||||||
|
#endif
|
||||||
|
#ifdef SUPPORT_PCRE2_32
|
||||||
|
setstack32(NULL);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (total == successful) {
|
||||||
|
printf("\nAll JIT regression tests are successfully passed.\n");
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE
|
||||||
|
|
||||||
|
static int check_invalid_utf_result(int pattern_index, const char *type, int result,
|
||||||
|
int match_start, int match_end, PCRE2_SIZE *ovector)
|
||||||
|
{
|
||||||
|
if (match_start < 0) {
|
||||||
|
if (result != -1) {
|
||||||
|
printf("Pattern[%d] %s result is not -1.\n", pattern_index, type);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result <= 0) {
|
||||||
|
printf("Pattern[%d] %s result (%d) is not greater than 0.\n", pattern_index, type, result);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ovector[0] != (PCRE2_SIZE)match_start) {
|
||||||
|
printf("Pattern[%d] %s ovector[0] is unexpected (%d instead of %d)\n",
|
||||||
|
pattern_index, type, (int)ovector[0], match_start);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ovector[1] != (PCRE2_SIZE)match_end) {
|
||||||
|
printf("Pattern[%d] %s ovector[1] is unexpected (%d instead of %d)\n",
|
||||||
|
pattern_index, type, (int)ovector[1], match_end);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_8
|
||||||
|
|
||||||
|
#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
|
||||||
|
#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
|
||||||
|
#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
|
||||||
|
|
||||||
|
struct invalid_utf8_regression_test_case {
|
||||||
|
uint32_t compile_options;
|
||||||
|
int jit_compile_options;
|
||||||
|
int start_offset;
|
||||||
|
int skip_left;
|
||||||
|
int skip_right;
|
||||||
|
int match_start;
|
||||||
|
int match_end;
|
||||||
|
const char *pattern[2];
|
||||||
|
const char *input;
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char invalid_utf8_newline_cr;
|
||||||
|
|
||||||
|
static const struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cases[] = {
|
||||||
|
{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
|
||||||
|
{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf0\x90\x80\x80" },
|
||||||
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf4\x90\x80\x80" },
|
||||||
|
{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
|
||||||
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\x7f" },
|
||||||
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\xc0" },
|
||||||
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x8f\xbf\xbf" },
|
||||||
|
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf#" },
|
||||||
|
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf" },
|
||||||
|
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80#" },
|
||||||
|
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80" },
|
||||||
|
{ UDA, CI, 0, 0, 2, -1, -1, { ".", NULL }, "\xef\xbf\xbf#" },
|
||||||
|
{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xef\xbf\xbf" },
|
||||||
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\x7f#" },
|
||||||
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\xc0" },
|
||||||
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf#" },
|
||||||
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf" },
|
||||||
|
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xed\x9f\xbf#" },
|
||||||
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xa0\x80#" },
|
||||||
|
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xee\x80\x80#" },
|
||||||
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xbf\xbf#" },
|
||||||
|
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf##" },
|
||||||
|
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf#" },
|
||||||
|
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf" },
|
||||||
|
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80##" },
|
||||||
|
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80#" },
|
||||||
|
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80" },
|
||||||
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80##" },
|
||||||
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0##" },
|
||||||
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80" },
|
||||||
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0" },
|
||||||
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf##" },
|
||||||
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf" },
|
||||||
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80###" },
|
||||||
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80" },
|
||||||
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8###" },
|
||||||
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8" },
|
||||||
|
{ UDA, CI, 0, 0, 0, 0, 1, { ".", NULL }, "\x7f" },
|
||||||
|
|
||||||
|
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" },
|
||||||
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80\xf4\xa0\x80\x80" },
|
||||||
|
{ UDA, CPI, 4, 1, 1, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf" },
|
||||||
|
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xef\xbf\xbf#" },
|
||||||
|
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xe0\xa0\x80#" },
|
||||||
|
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf0\x90\x80\x80#" },
|
||||||
|
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" },
|
||||||
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf\xf0\x8f\xbf\xbf" },
|
||||||
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80\xf5\x80\x80\x80" },
|
||||||
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80\xf4\x90\x80\x80" },
|
||||||
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff\xf4\x8f\xbf\xff" },
|
||||||
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf\xf4\x8f\xff\xbf" },
|
||||||
|
{ UDA, CPI, 4, 0, 1, -1, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80\xef\x80\x80" },
|
||||||
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80\x80\x80\x80\x80" },
|
||||||
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf\xe0\x9f\xbf#" },
|
||||||
|
{ UDA, CPI, 4, 2, 2, -1, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80\xe0\xa0\x80#" },
|
||||||
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xf0\x80\x80\xf0\x80\x80#" },
|
||||||
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xed\xa0\x80\xed\xa0\x80#" },
|
||||||
|
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xdf\xbf#" },
|
||||||
|
{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xdf\xbf#" },
|
||||||
|
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xc2\x80#" },
|
||||||
|
{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xc2\x80#" },
|
||||||
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xc1\xbf\xc1\xbf##" },
|
||||||
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xdf\xc0\xdf\xc0##" },
|
||||||
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80\xe0\x80##" },
|
||||||
|
|
||||||
|
{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xef\xbf\xbf#" },
|
||||||
|
{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xe0\xa0\x80#" },
|
||||||
|
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf\xe0\x9f\xbf" },
|
||||||
|
{ UDA, CPI, 3, 1, 1, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xbf\xef\xbf\xbf" },
|
||||||
|
{ UDA, CPI, 3, 0, 1, -1, -1, { "\\B", "\\b" }, "\xdf\x80\x80\xdf\x80" },
|
||||||
|
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xff\xef\xbf\xff" },
|
||||||
|
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xff\xbf\xef\xff\xbf" },
|
||||||
|
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xed\xbf\xbf\xed\xbf\xbf" },
|
||||||
|
|
||||||
|
{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xdf\xbf#" },
|
||||||
|
{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xc2\x80#" },
|
||||||
|
{ UDA, CPI, 2, 1, 1, -1, -1, { "\\B", "\\b" }, "\xdf\xbf\xdf\xbf" },
|
||||||
|
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xc1\xbf\xc1\xbf" },
|
||||||
|
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x80\xe0\x80" },
|
||||||
|
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xff\xdf\xff" },
|
||||||
|
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xff\xbf\xff\xbf" },
|
||||||
|
|
||||||
|
{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x7f#" },
|
||||||
|
{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x01#" },
|
||||||
|
{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80" },
|
||||||
|
{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\xb0\xb0" },
|
||||||
|
|
||||||
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" },
|
||||||
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "a\xff" },
|
||||||
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
|
||||||
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
|
||||||
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "\xc2\x80\x80" },
|
||||||
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 6, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
|
||||||
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
|
||||||
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 8, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
|
||||||
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
|
||||||
|
|
||||||
|
{ UDA, CPI, 0, 0, 0, 0, 1, { "\\X", NULL }, "A" },
|
||||||
|
{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xff" },
|
||||||
|
{ UDA, CPI, 0, 0, 0, 0, 2, { "\\X", NULL }, "\xc3\xa1" },
|
||||||
|
{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xc3\xa1" },
|
||||||
|
{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xc3\x7f" },
|
||||||
|
{ UDA, CPI, 0, 0, 0, 0, 3, { "\\X", NULL }, "\xe1\xbd\xb8" },
|
||||||
|
{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xe1\xbd\xb8" },
|
||||||
|
{ UDA, CPI, 0, 0, 0, 0, 4, { "\\X", NULL }, "\xf0\x90\x90\x80" },
|
||||||
|
{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xf0\x90\x90\x80" },
|
||||||
|
|
||||||
|
{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "#" },
|
||||||
|
{ UDA, CPI, 0, 0, 0, 0, 4, { "[^#]", NULL }, "\xf4\x8f\xbf\xbf" },
|
||||||
|
{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xf4\x90\x80\x80" },
|
||||||
|
{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xc1\x80" },
|
||||||
|
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { "^\\W", NULL }, " \x0a#"},
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 14, 15, { "^\\W", NULL }, " \xc0\x8a#\xe0\x80\x8a#\xf0\x80\x80\x8a#\x0a#"},
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf8\x0a#"},
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xc3\x0a#"},
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf1\x0a#"},
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xf2\xbf\x0a#"},
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \xf2\xbf\xbf\x0a#"},
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xef\x0a#"},
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xef\xbf\x0a#"},
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \x85#\xc2\x85#"},
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 7, 8, { "^\\W", NULL }, " \xe2\x80\xf8\xe2\x80\xa8#"},
|
||||||
|
|
||||||
|
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xe2\x80\xf8\xe2\x80\xa8#"},
|
||||||
|
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 3, 4, { "#", NULL }, "\xe2\x80\xf8#\xe2\x80\xa8#"},
|
||||||
|
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "abcd\xc2\x85#"},
|
||||||
|
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 1, 2, { "#", NULL }, "\x85#\xc2\x85#"},
|
||||||
|
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 5, 6, { "#", NULL }, "\xef,\x80,\xf8#\x0a"},
|
||||||
|
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xef,\x80,\xf8\x0a#"},
|
||||||
|
|
||||||
|
{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
|
||||||
|
{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
|
||||||
|
{ PCRE2_UTF, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
|
||||||
|
{ PCRE2_UTF, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
|
||||||
|
|
||||||
|
{ PCRE2_UTF | PCRE2_UCP, CI, 0, 0, 0, -1, -1, { "[\\s]", NULL }, "\xed\xa0\x80" },
|
||||||
|
{ PCRE2_UTF, CI, 0, 0, 0, 0, 3, { "[\\D]", NULL }, "\xe0\xab\xaa@" },
|
||||||
|
{ PCRE2_UTF, CI, 0, 0, 0, 0, 3, { "\\D+", NULL }, "n\xc3\xb1" },
|
||||||
|
{ PCRE2_UTF, CI, 0, 0, 0, 0, 5, { "\\W+", NULL }, "@\xf0\x9d\x84\x9e" },
|
||||||
|
|
||||||
|
/* These two are not invalid UTF tests, but this infrastructure fits better for them. */
|
||||||
|
{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\X{2}", NULL }, "\r\n\n" },
|
||||||
|
{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\R{2}", NULL }, "\r\n\n" },
|
||||||
|
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 0, 0, 0, -1, -1, { "^.a", &invalid_utf8_newline_cr }, "\xc3\xa7#a" },
|
||||||
|
|
||||||
|
{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
|
||||||
|
};
|
||||||
|
|
||||||
|
#undef UDA
|
||||||
|
#undef CI
|
||||||
|
#undef CPI
|
||||||
|
|
||||||
|
static int run_invalid_utf8_test(const struct invalid_utf8_regression_test_case *current,
|
||||||
|
int pattern_index, int i, pcre2_compile_context_8 *ccontext, pcre2_match_data_8 *mdata)
|
||||||
|
{
|
||||||
|
pcre2_code_8 *code;
|
||||||
|
int result, errorcode;
|
||||||
|
PCRE2_SIZE length, erroroffset;
|
||||||
|
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_8(mdata);
|
||||||
|
|
||||||
|
if (current->pattern[i] == NULL)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
code = pcre2_compile_8((PCRE2_UCHAR8*)current->pattern[i], PCRE2_ZERO_TERMINATED,
|
||||||
|
current->compile_options, &errorcode, &erroroffset, ccontext);
|
||||||
|
|
||||||
|
if (!code) {
|
||||||
|
printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pcre2_jit_compile_8(code, current->jit_compile_options) != 0) {
|
||||||
|
printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
|
||||||
|
pcre2_code_free_8(code);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
length = (PCRE2_SIZE)(strlen(current->input) - current->skip_left - current->skip_right);
|
||||||
|
|
||||||
|
if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
|
||||||
|
result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
|
||||||
|
length, current->start_offset - current->skip_left, 0, mdata, NULL);
|
||||||
|
|
||||||
|
if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
|
||||||
|
pcre2_code_free_8(code);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
|
||||||
|
result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
|
||||||
|
length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
|
||||||
|
|
||||||
|
if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
|
||||||
|
pcre2_code_free_8(code);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pcre2_code_free_8(code);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int invalid_utf8_regression_tests(void)
|
||||||
|
{
|
||||||
|
const struct invalid_utf8_regression_test_case *current;
|
||||||
|
pcre2_compile_context_8 *ccontext;
|
||||||
|
pcre2_match_data_8 *mdata;
|
||||||
|
int total = 0, successful = 0;
|
||||||
|
int result;
|
||||||
|
|
||||||
|
printf("\nRunning invalid-utf8 JIT regression tests\n");
|
||||||
|
|
||||||
|
ccontext = pcre2_compile_context_create_8(NULL);
|
||||||
|
pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
|
||||||
|
mdata = pcre2_match_data_create_8(4, NULL);
|
||||||
|
|
||||||
|
for (current = invalid_utf8_regression_test_cases; current->pattern[0]; current++) {
|
||||||
|
/* printf("\nPattern: %s :\n", current->pattern); */
|
||||||
|
total++;
|
||||||
|
|
||||||
|
result = 1;
|
||||||
|
if (current->pattern[1] != &invalid_utf8_newline_cr)
|
||||||
|
{
|
||||||
|
if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
|
||||||
|
result = 0;
|
||||||
|
if (!run_invalid_utf8_test(current, total - 1, 1, ccontext, mdata))
|
||||||
|
result = 0;
|
||||||
|
} else {
|
||||||
|
pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_CR);
|
||||||
|
if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
|
||||||
|
result = 0;
|
||||||
|
pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result) {
|
||||||
|
successful++;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf(".");
|
||||||
|
if ((total % 60) == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((total % 60) != 0)
|
||||||
|
printf("\n");
|
||||||
|
|
||||||
|
pcre2_match_data_free_8(mdata);
|
||||||
|
pcre2_compile_context_free_8(ccontext);
|
||||||
|
|
||||||
|
if (total == successful) {
|
||||||
|
printf("\nAll invalid UTF8 JIT regression tests are successfully passed.\n");
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
printf("\nInvalid UTF8 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_8 */
|
||||||
|
|
||||||
|
static int invalid_utf8_regression_tests(void)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_8 */
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_16
|
||||||
|
|
||||||
|
#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
|
||||||
|
#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
|
||||||
|
#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
|
||||||
|
|
||||||
|
struct invalid_utf16_regression_test_case {
|
||||||
|
uint32_t compile_options;
|
||||||
|
int jit_compile_options;
|
||||||
|
int start_offset;
|
||||||
|
int skip_left;
|
||||||
|
int skip_right;
|
||||||
|
int match_start;
|
||||||
|
int match_end;
|
||||||
|
const PCRE2_UCHAR16 *pattern[2];
|
||||||
|
const PCRE2_UCHAR16 *input;
|
||||||
|
};
|
||||||
|
|
||||||
|
static PCRE2_UCHAR16 allany16[] = { '.', 0 };
|
||||||
|
static PCRE2_UCHAR16 non_word_boundary16[] = { '\\', 'B', 0 };
|
||||||
|
static PCRE2_UCHAR16 word_boundary16[] = { '\\', 'b', 0 };
|
||||||
|
static PCRE2_UCHAR16 backreference16[] = { '(', '.', ')', '\\', '1', 0 };
|
||||||
|
static PCRE2_UCHAR16 grapheme16[] = { '\\', 'X', 0 };
|
||||||
|
static PCRE2_UCHAR16 nothashmark16[] = { '[', '^', '#', ']', 0 };
|
||||||
|
static PCRE2_UCHAR16 afternl16[] = { '^', '\\', 'W', 0 };
|
||||||
|
static PCRE2_UCHAR16 generic16[] = { '#', 0xd800, 0xdc00, '#', 0 };
|
||||||
|
static PCRE2_UCHAR16 test16_1[] = { 0xd7ff, 0xe000, 0xffff, 0x01, '#', 0 };
|
||||||
|
static PCRE2_UCHAR16 test16_2[] = { 0xd800, 0xdc00, 0xd800, 0xdc00, 0 };
|
||||||
|
static PCRE2_UCHAR16 test16_3[] = { 0xdbff, 0xdfff, 0xdbff, 0xdfff, 0 };
|
||||||
|
static PCRE2_UCHAR16 test16_4[] = { 0xd800, 0xdbff, 0xd800, 0xdbff, 0 };
|
||||||
|
static PCRE2_UCHAR16 test16_5[] = { '#', 0xd800, 0xdc00, '#', 0 };
|
||||||
|
static PCRE2_UCHAR16 test16_6[] = { 'a', 'A', 0xdc28, 0 };
|
||||||
|
static PCRE2_UCHAR16 test16_7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 };
|
||||||
|
static PCRE2_UCHAR16 test16_8[] = { '#', 0xd800, 0xdc00, 0 };
|
||||||
|
static PCRE2_UCHAR16 test16_9[] = { ' ', 0x2028, '#', 0 };
|
||||||
|
static PCRE2_UCHAR16 test16_10[] = { ' ', 0xdc00, 0xd800, 0x2028, '#', 0 };
|
||||||
|
static PCRE2_UCHAR16 test16_11[] = { 0xdc00, 0xdc00, 0xd800, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
|
||||||
|
static PCRE2_UCHAR16 test16_12[] = { '#', 0xd800, 0xdc00, 0xd800, '#', 0xd800, 0xdc00, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
|
||||||
|
|
||||||
|
static const struct invalid_utf16_regression_test_case invalid_utf16_regression_test_cases[] = {
|
||||||
|
{ UDA, CI, 0, 0, 0, 0, 1, { allany16, NULL }, test16_1 },
|
||||||
|
{ UDA, CI, 1, 0, 0, 1, 2, { allany16, NULL }, test16_1 },
|
||||||
|
{ UDA, CI, 2, 0, 0, 2, 3, { allany16, NULL }, test16_1 },
|
||||||
|
{ UDA, CI, 3, 0, 0, 3, 4, { allany16, NULL }, test16_1 },
|
||||||
|
{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_2 },
|
||||||
|
{ UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_2 },
|
||||||
|
{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_2 },
|
||||||
|
{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_3 },
|
||||||
|
{ UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_3 },
|
||||||
|
{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_3 },
|
||||||
|
|
||||||
|
{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary16, NULL }, test16_1 },
|
||||||
|
{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_1 },
|
||||||
|
{ UDA, CPI, 3, 0, 0, 3, 3, { non_word_boundary16, NULL }, test16_1 },
|
||||||
|
{ UDA, CPI, 4, 0, 0, 4, 4, { non_word_boundary16, NULL }, test16_1 },
|
||||||
|
{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_2 },
|
||||||
|
{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_3 },
|
||||||
|
{ UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_2 },
|
||||||
|
{ UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_3 },
|
||||||
|
{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_4 },
|
||||||
|
{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_5 },
|
||||||
|
|
||||||
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference16, NULL }, test16_6 },
|
||||||
|
{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference16, NULL }, test16_6 },
|
||||||
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { backreference16, NULL }, test16_7 },
|
||||||
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { backreference16, NULL }, test16_7 },
|
||||||
|
|
||||||
|
{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme16, NULL }, test16_6 },
|
||||||
|
{ UDA, CPI, 1, 0, 0, 1, 2, { grapheme16, NULL }, test16_6 },
|
||||||
|
{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme16, NULL }, test16_6 },
|
||||||
|
{ UDA, CPI, 0, 0, 0, 0, 2, { grapheme16, NULL }, test16_7 },
|
||||||
|
{ UDA, CPI, 2, 0, 0, 2, 4, { grapheme16, NULL }, test16_7 },
|
||||||
|
{ UDA, CPI, 1, 0, 0, -1, -1, { grapheme16, NULL }, test16_7 },
|
||||||
|
|
||||||
|
{ UDA, CPI, 0, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
|
||||||
|
{ UDA, CPI, 1, 0, 0, 1, 3, { nothashmark16, NULL }, test16_8 },
|
||||||
|
{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
|
||||||
|
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl16, NULL }, test16_9 },
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { afternl16, NULL }, test16_10 },
|
||||||
|
|
||||||
|
{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
|
||||||
|
{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
|
||||||
|
{ PCRE2_UTF, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
|
||||||
|
{ PCRE2_UTF, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
|
||||||
|
|
||||||
|
{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
|
||||||
|
};
|
||||||
|
|
||||||
|
#undef UDA
|
||||||
|
#undef CI
|
||||||
|
#undef CPI
|
||||||
|
|
||||||
|
static int run_invalid_utf16_test(const struct invalid_utf16_regression_test_case *current,
|
||||||
|
int pattern_index, int i, pcre2_compile_context_16 *ccontext, pcre2_match_data_16 *mdata)
|
||||||
|
{
|
||||||
|
pcre2_code_16 *code;
|
||||||
|
int result, errorcode;
|
||||||
|
PCRE2_SIZE length, erroroffset;
|
||||||
|
const PCRE2_UCHAR16 *input;
|
||||||
|
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(mdata);
|
||||||
|
|
||||||
|
if (current->pattern[i] == NULL)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
code = pcre2_compile_16(current->pattern[i], PCRE2_ZERO_TERMINATED,
|
||||||
|
current->compile_options, &errorcode, &erroroffset, ccontext);
|
||||||
|
|
||||||
|
if (!code) {
|
||||||
|
printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pcre2_jit_compile_16(code, current->jit_compile_options) != 0) {
|
||||||
|
printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
|
||||||
|
pcre2_code_free_16(code);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
input = current->input;
|
||||||
|
length = 0;
|
||||||
|
|
||||||
|
while (*input++ != 0)
|
||||||
|
length++;
|
||||||
|
|
||||||
|
length -= current->skip_left + current->skip_right;
|
||||||
|
|
||||||
|
if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
|
||||||
|
result = pcre2_jit_match_16(code, (current->input + current->skip_left),
|
||||||
|
length, current->start_offset - current->skip_left, 0, mdata, NULL);
|
||||||
|
|
||||||
|
if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
|
||||||
|
pcre2_code_free_16(code);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
|
||||||
|
result = pcre2_jit_match_16(code, (current->input + current->skip_left),
|
||||||
|
length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
|
||||||
|
|
||||||
|
if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
|
||||||
|
pcre2_code_free_16(code);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pcre2_code_free_16(code);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int invalid_utf16_regression_tests(void)
|
||||||
|
{
|
||||||
|
const struct invalid_utf16_regression_test_case *current;
|
||||||
|
pcre2_compile_context_16 *ccontext;
|
||||||
|
pcre2_match_data_16 *mdata;
|
||||||
|
int total = 0, successful = 0;
|
||||||
|
int result;
|
||||||
|
|
||||||
|
printf("\nRunning invalid-utf16 JIT regression tests\n");
|
||||||
|
|
||||||
|
ccontext = pcre2_compile_context_create_16(NULL);
|
||||||
|
pcre2_set_newline_16(ccontext, PCRE2_NEWLINE_ANY);
|
||||||
|
mdata = pcre2_match_data_create_16(4, NULL);
|
||||||
|
|
||||||
|
for (current = invalid_utf16_regression_test_cases; current->pattern[0]; current++) {
|
||||||
|
/* printf("\nPattern: %s :\n", current->pattern); */
|
||||||
|
total++;
|
||||||
|
|
||||||
|
result = 1;
|
||||||
|
if (!run_invalid_utf16_test(current, total - 1, 0, ccontext, mdata))
|
||||||
|
result = 0;
|
||||||
|
if (!run_invalid_utf16_test(current, total - 1, 1, ccontext, mdata))
|
||||||
|
result = 0;
|
||||||
|
|
||||||
|
if (result) {
|
||||||
|
successful++;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf(".");
|
||||||
|
if ((total % 60) == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((total % 60) != 0)
|
||||||
|
printf("\n");
|
||||||
|
|
||||||
|
pcre2_match_data_free_16(mdata);
|
||||||
|
pcre2_compile_context_free_16(ccontext);
|
||||||
|
|
||||||
|
if (total == successful) {
|
||||||
|
printf("\nAll invalid UTF16 JIT regression tests are successfully passed.\n");
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
printf("\nInvalid UTF16 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_16 */
|
||||||
|
|
||||||
|
static int invalid_utf16_regression_tests(void)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_16 */
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_32
|
||||||
|
|
||||||
|
#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
|
||||||
|
#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
|
||||||
|
#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
|
||||||
|
|
||||||
|
struct invalid_utf32_regression_test_case {
|
||||||
|
uint32_t compile_options;
|
||||||
|
int jit_compile_options;
|
||||||
|
int start_offset;
|
||||||
|
int skip_left;
|
||||||
|
int skip_right;
|
||||||
|
int match_start;
|
||||||
|
int match_end;
|
||||||
|
const PCRE2_UCHAR32 *pattern[2];
|
||||||
|
const PCRE2_UCHAR32 *input;
|
||||||
|
};
|
||||||
|
|
||||||
|
static PCRE2_UCHAR32 allany32[] = { '.', 0 };
|
||||||
|
static PCRE2_UCHAR32 non_word_boundary32[] = { '\\', 'B', 0 };
|
||||||
|
static PCRE2_UCHAR32 word_boundary32[] = { '\\', 'b', 0 };
|
||||||
|
static PCRE2_UCHAR32 backreference32[] = { '(', '.', ')', '\\', '1', 0 };
|
||||||
|
static PCRE2_UCHAR32 grapheme32[] = { '\\', 'X', 0 };
|
||||||
|
static PCRE2_UCHAR32 nothashmark32[] = { '[', '^', '#', ']', 0 };
|
||||||
|
static PCRE2_UCHAR32 afternl32[] = { '^', '\\', 'W', 0 };
|
||||||
|
static PCRE2_UCHAR32 test32_1[] = { 0x10ffff, 0x10ffff, 0x110000, 0x110000, 0x10ffff, 0 };
|
||||||
|
static PCRE2_UCHAR32 test32_2[] = { 0xd7ff, 0xe000, 0xd800, 0xdfff, 0xe000, 0xdfff, 0xd800, 0 };
|
||||||
|
static PCRE2_UCHAR32 test32_3[] = { 'a', 'A', 0x110000, 0 };
|
||||||
|
static PCRE2_UCHAR32 test32_4[] = { '#', 0x10ffff, 0x110000, 0 };
|
||||||
|
static PCRE2_UCHAR32 test32_5[] = { ' ', 0x2028, '#', 0 };
|
||||||
|
static PCRE2_UCHAR32 test32_6[] = { ' ', 0x110000, 0x2028, '#', 0 };
|
||||||
|
|
||||||
|
static const struct invalid_utf32_regression_test_case invalid_utf32_regression_test_cases[] = {
|
||||||
|
{ UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_1 },
|
||||||
|
{ UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_1 },
|
||||||
|
{ UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_2 },
|
||||||
|
{ UDA, CI, 1, 0, 0, 1, 2, { allany32, NULL }, test32_2 },
|
||||||
|
{ UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
|
||||||
|
{ UDA, CI, 3, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
|
||||||
|
|
||||||
|
{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_1 },
|
||||||
|
{ UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 },
|
||||||
|
{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_2 },
|
||||||
|
{ UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
|
||||||
|
{ UDA, CPI, 6, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
|
||||||
|
|
||||||
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference32, NULL }, test32_3 },
|
||||||
|
{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference32, NULL }, test32_3 },
|
||||||
|
|
||||||
|
{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme32, NULL }, test32_1 },
|
||||||
|
{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_1 },
|
||||||
|
{ UDA, CPI, 1, 0, 0, 1, 2, { grapheme32, NULL }, test32_2 },
|
||||||
|
{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
|
||||||
|
{ UDA, CPI, 3, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
|
||||||
|
{ UDA, CPI, 4, 0, 0, 4, 5, { grapheme32, NULL }, test32_2 },
|
||||||
|
|
||||||
|
{ UDA, CPI, 0, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
|
||||||
|
{ UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_4 },
|
||||||
|
{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
|
||||||
|
{ UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_2 },
|
||||||
|
{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_2 },
|
||||||
|
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl32, NULL }, test32_5 },
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { afternl32, NULL }, test32_6 },
|
||||||
|
|
||||||
|
{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
|
||||||
|
};
|
||||||
|
|
||||||
|
#undef UDA
|
||||||
|
#undef CI
|
||||||
|
#undef CPI
|
||||||
|
|
||||||
|
static int run_invalid_utf32_test(const struct invalid_utf32_regression_test_case *current,
|
||||||
|
int pattern_index, int i, pcre2_compile_context_32 *ccontext, pcre2_match_data_32 *mdata)
|
||||||
|
{
|
||||||
|
pcre2_code_32 *code;
|
||||||
|
int result, errorcode;
|
||||||
|
PCRE2_SIZE length, erroroffset;
|
||||||
|
const PCRE2_UCHAR32 *input;
|
||||||
|
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_32(mdata);
|
||||||
|
|
||||||
|
if (current->pattern[i] == NULL)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
code = pcre2_compile_32(current->pattern[i], PCRE2_ZERO_TERMINATED,
|
||||||
|
current->compile_options, &errorcode, &erroroffset, ccontext);
|
||||||
|
|
||||||
|
if (!code) {
|
||||||
|
printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pcre2_jit_compile_32(code, current->jit_compile_options) != 0) {
|
||||||
|
printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
|
||||||
|
pcre2_code_free_32(code);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
input = current->input;
|
||||||
|
length = 0;
|
||||||
|
|
||||||
|
while (*input++ != 0)
|
||||||
|
length++;
|
||||||
|
|
||||||
|
length -= current->skip_left + current->skip_right;
|
||||||
|
|
||||||
|
if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
|
||||||
|
result = pcre2_jit_match_32(code, (current->input + current->skip_left),
|
||||||
|
length, current->start_offset - current->skip_left, 0, mdata, NULL);
|
||||||
|
|
||||||
|
if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
|
||||||
|
pcre2_code_free_32(code);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
|
||||||
|
result = pcre2_jit_match_32(code, (current->input + current->skip_left),
|
||||||
|
length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
|
||||||
|
|
||||||
|
if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
|
||||||
|
pcre2_code_free_32(code);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pcre2_code_free_32(code);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int invalid_utf32_regression_tests(void)
|
||||||
|
{
|
||||||
|
const struct invalid_utf32_regression_test_case *current;
|
||||||
|
pcre2_compile_context_32 *ccontext;
|
||||||
|
pcre2_match_data_32 *mdata;
|
||||||
|
int total = 0, successful = 0;
|
||||||
|
int result;
|
||||||
|
|
||||||
|
printf("\nRunning invalid-utf32 JIT regression tests\n");
|
||||||
|
|
||||||
|
ccontext = pcre2_compile_context_create_32(NULL);
|
||||||
|
pcre2_set_newline_32(ccontext, PCRE2_NEWLINE_ANY);
|
||||||
|
mdata = pcre2_match_data_create_32(4, NULL);
|
||||||
|
|
||||||
|
for (current = invalid_utf32_regression_test_cases; current->pattern[0]; current++) {
|
||||||
|
/* printf("\nPattern: %s :\n", current->pattern); */
|
||||||
|
total++;
|
||||||
|
|
||||||
|
result = 1;
|
||||||
|
if (!run_invalid_utf32_test(current, total - 1, 0, ccontext, mdata))
|
||||||
|
result = 0;
|
||||||
|
if (!run_invalid_utf32_test(current, total - 1, 1, ccontext, mdata))
|
||||||
|
result = 0;
|
||||||
|
|
||||||
|
if (result) {
|
||||||
|
successful++;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf(".");
|
||||||
|
if ((total % 60) == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((total % 60) != 0)
|
||||||
|
printf("\n");
|
||||||
|
|
||||||
|
pcre2_match_data_free_32(mdata);
|
||||||
|
pcre2_compile_context_free_32(ccontext);
|
||||||
|
|
||||||
|
if (total == successful) {
|
||||||
|
printf("\nAll invalid UTF32 JIT regression tests are successfully passed.\n");
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
printf("\nInvalid UTF32 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_32 */
|
||||||
|
|
||||||
|
static int invalid_utf32_regression_tests(void)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_32 */
|
||||||
|
|
||||||
|
/* End of pcre2_jit_test.c */
|
||||||
165
3rd/pcre2/src/pcre2_maketables.c
Normal file
165
3rd/pcre2/src/pcre2_maketables.c
Normal file
@@ -0,0 +1,165 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains the external function pcre2_maketables(), which builds
|
||||||
|
character tables for PCRE2 in the current locale. The file is compiled on its
|
||||||
|
own as part of the PCRE2 library. It is also included in the compilation of
|
||||||
|
pcre2_dftables.c as a freestanding program, in which case the macro
|
||||||
|
PCRE2_DFTABLES is defined. */
|
||||||
|
|
||||||
|
#ifndef PCRE2_DFTABLES /* Compiling the library */
|
||||||
|
# ifdef HAVE_CONFIG_H
|
||||||
|
# include "config.h"
|
||||||
|
# endif
|
||||||
|
# include "pcre2_internal.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Create PCRE2 character tables *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function builds a set of character tables for use by PCRE2 and returns
|
||||||
|
a pointer to them. They are build using the ctype functions, and consequently
|
||||||
|
their contents will depend upon the current locale setting. When compiled as
|
||||||
|
part of the library, the store is obtained via a general context malloc, if
|
||||||
|
supplied, but when PCRE2_DFTABLES is defined (when compiling the pcre2_dftables
|
||||||
|
freestanding auxiliary program) malloc() is used, and the function has a
|
||||||
|
different name so as not to clash with the prototype in pcre2.h.
|
||||||
|
|
||||||
|
Arguments: none when PCRE2_DFTABLES is defined
|
||||||
|
else a PCRE2 general context or NULL
|
||||||
|
Returns: pointer to the contiguous block of data
|
||||||
|
else NULL if memory allocation failed
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef PCRE2_DFTABLES /* Included in freestanding pcre2_dftables program */
|
||||||
|
static const uint8_t *maketables(void)
|
||||||
|
{
|
||||||
|
uint8_t *yield = (uint8_t *)malloc(TABLES_LENGTH);
|
||||||
|
|
||||||
|
#else /* Not PCRE2_DFTABLES, that is, compiling the library */
|
||||||
|
PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_maketables(pcre2_general_context *gcontext)
|
||||||
|
{
|
||||||
|
uint8_t *yield = (uint8_t *)((gcontext != NULL)?
|
||||||
|
gcontext->memctl.malloc(TABLES_LENGTH, gcontext->memctl.memory_data) :
|
||||||
|
malloc(TABLES_LENGTH));
|
||||||
|
#endif /* PCRE2_DFTABLES */
|
||||||
|
|
||||||
|
int i;
|
||||||
|
uint8_t *p;
|
||||||
|
|
||||||
|
if (yield == NULL) return NULL;
|
||||||
|
p = yield;
|
||||||
|
|
||||||
|
/* First comes the lower casing table */
|
||||||
|
|
||||||
|
for (i = 0; i < 256; i++) *p++ = tolower(i);
|
||||||
|
|
||||||
|
/* Next the case-flipping table */
|
||||||
|
|
||||||
|
for (i = 0; i < 256; i++)
|
||||||
|
{
|
||||||
|
int c = islower(i)? toupper(i) : tolower(i);
|
||||||
|
*p++ = (c < 256)? c : i;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Then the character class tables. Don't try to be clever and save effort on
|
||||||
|
exclusive ones - in some locales things may be different.
|
||||||
|
|
||||||
|
Note that the table for "space" includes everything "isspace" gives, including
|
||||||
|
VT in the default locale. This makes it work for the POSIX class [:space:].
|
||||||
|
From PCRE1 release 8.34 and for all PCRE2 releases it is also correct for Perl
|
||||||
|
space, because Perl added VT at release 5.18.
|
||||||
|
|
||||||
|
Note also that it is possible for a character to be alnum or alpha without
|
||||||
|
being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
|
||||||
|
fr_FR locale (at least under Debian Linux's locales as of 12/2005). So we must
|
||||||
|
test for alnum specially. */
|
||||||
|
|
||||||
|
memset(p, 0, cbit_length);
|
||||||
|
for (i = 0; i < 256; i++)
|
||||||
|
{
|
||||||
|
if (isdigit(i)) p[cbit_digit + i/8] |= 1u << (i&7);
|
||||||
|
if (isupper(i)) p[cbit_upper + i/8] |= 1u << (i&7);
|
||||||
|
if (islower(i)) p[cbit_lower + i/8] |= 1u << (i&7);
|
||||||
|
if (isalnum(i)) p[cbit_word + i/8] |= 1u << (i&7);
|
||||||
|
if (i == '_') p[cbit_word + i/8] |= 1u << (i&7);
|
||||||
|
if (isspace(i)) p[cbit_space + i/8] |= 1u << (i&7);
|
||||||
|
if (isxdigit(i)) p[cbit_xdigit + i/8] |= 1u << (i&7);
|
||||||
|
if (isgraph(i)) p[cbit_graph + i/8] |= 1u << (i&7);
|
||||||
|
if (isprint(i)) p[cbit_print + i/8] |= 1u << (i&7);
|
||||||
|
if (ispunct(i)) p[cbit_punct + i/8] |= 1u << (i&7);
|
||||||
|
if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1u << (i&7);
|
||||||
|
}
|
||||||
|
p += cbit_length;
|
||||||
|
|
||||||
|
/* Finally, the character type table. In this, we used to exclude VT from the
|
||||||
|
white space chars, because Perl didn't recognize it as such for \s and for
|
||||||
|
comments within regexes. However, Perl changed at release 5.18, so PCRE1
|
||||||
|
changed at release 8.34 and it's always been this way for PCRE2. */
|
||||||
|
|
||||||
|
for (i = 0; i < 256; i++)
|
||||||
|
{
|
||||||
|
int x = 0;
|
||||||
|
if (isspace(i)) x += ctype_space;
|
||||||
|
if (isalpha(i)) x += ctype_letter;
|
||||||
|
if (islower(i)) x += ctype_lcletter;
|
||||||
|
if (isdigit(i)) x += ctype_digit;
|
||||||
|
if (isalnum(i) || i == '_') x += ctype_word;
|
||||||
|
*p++ = x;
|
||||||
|
}
|
||||||
|
|
||||||
|
return yield;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef PCRE2_DFTABLES /* Compiling the library */
|
||||||
|
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables)
|
||||||
|
{
|
||||||
|
if (gcontext != NULL)
|
||||||
|
gcontext->memctl.free((void *)tables, gcontext->memctl.memory_data);
|
||||||
|
else
|
||||||
|
free((void *)tables);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* End of pcre2_maketables.c */
|
||||||
8080
3rd/pcre2/src/pcre2_match.c
Normal file
8080
3rd/pcre2/src/pcre2_match.c
Normal file
File diff suppressed because it is too large
Load Diff
187
3rd/pcre2/src/pcre2_match_data.c
Normal file
187
3rd/pcre2/src/pcre2_match_data.c
Normal file
@@ -0,0 +1,187 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016-2024 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "pcre2_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Create a match data block given ovector size *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* A minimum of 1 is imposed on the number of ovector pairs. A maximum is also
|
||||||
|
imposed because the oveccount field in a match data block is uintt6_t. */
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
|
||||||
|
{
|
||||||
|
pcre2_match_data *yield;
|
||||||
|
if (oveccount < 1) oveccount = 1;
|
||||||
|
if (oveccount > UINT16_MAX) oveccount = UINT16_MAX;
|
||||||
|
yield = PRIV(memctl_malloc)(
|
||||||
|
offsetof(pcre2_match_data, ovector) + 2*oveccount*sizeof(PCRE2_SIZE),
|
||||||
|
(pcre2_memctl *)gcontext);
|
||||||
|
if (yield == NULL) return NULL;
|
||||||
|
yield->oveccount = oveccount;
|
||||||
|
yield->flags = 0;
|
||||||
|
yield->heapframes = NULL;
|
||||||
|
yield->heapframes_size = 0;
|
||||||
|
return yield;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Create a match data block using pattern data *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* If no context is supplied, use the memory allocator from the code. This code
|
||||||
|
assumes that a general context contains nothing other than a memory allocator.
|
||||||
|
If that ever changes, this code will need fixing. */
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_match_data_create_from_pattern(const pcre2_code *code,
|
||||||
|
pcre2_general_context *gcontext)
|
||||||
|
{
|
||||||
|
if (gcontext == NULL) gcontext = (pcre2_general_context *)code;
|
||||||
|
return pcre2_match_data_create(((const pcre2_real_code *)code)->top_bracket + 1,
|
||||||
|
gcontext);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Free a match data block *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_match_data_free(pcre2_match_data *match_data)
|
||||||
|
{
|
||||||
|
if (match_data != NULL)
|
||||||
|
{
|
||||||
|
if (match_data->heapframes != NULL)
|
||||||
|
match_data->memctl.free(match_data->heapframes,
|
||||||
|
match_data->memctl.memory_data);
|
||||||
|
if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
|
||||||
|
match_data->memctl.free((void *)match_data->subject,
|
||||||
|
match_data->memctl.memory_data);
|
||||||
|
match_data->memctl.free(match_data, match_data->memctl.memory_data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Get last mark in match *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN PCRE2_SPTR PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_get_mark(pcre2_match_data *match_data)
|
||||||
|
{
|
||||||
|
return match_data->mark;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Get pointer to ovector *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN PCRE2_SIZE * PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_get_ovector_pointer(pcre2_match_data *match_data)
|
||||||
|
{
|
||||||
|
return match_data->ovector;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Get number of ovector slots *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN uint32_t PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_get_ovector_count(pcre2_match_data *match_data)
|
||||||
|
{
|
||||||
|
return match_data->oveccount;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Get starting code unit in match *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_get_startchar(pcre2_match_data *match_data)
|
||||||
|
{
|
||||||
|
return match_data->startchar;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Get size of match data block *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_get_match_data_size(pcre2_match_data *match_data)
|
||||||
|
{
|
||||||
|
return offsetof(pcre2_match_data, ovector) +
|
||||||
|
2 * (match_data->oveccount) * sizeof(PCRE2_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Get heapframes size *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_get_match_data_heapframes_size(pcre2_match_data *match_data)
|
||||||
|
{
|
||||||
|
return match_data->heapframes_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre2_match_data.c */
|
||||||
243
3rd/pcre2/src/pcre2_newline.c
Normal file
243
3rd/pcre2/src/pcre2_newline.c
Normal file
@@ -0,0 +1,243 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This module contains internal functions for testing newlines when more than
|
||||||
|
one kind of newline is to be recognized. When a newline is found, its length is
|
||||||
|
returned. In principle, we could implement several newline "types", each
|
||||||
|
referring to a different set of newline characters. At present, PCRE2 supports
|
||||||
|
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
|
||||||
|
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
|
||||||
|
http://unicode.org/unicode/reports/tr18/. */
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "pcre2_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Check for newline at given position *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function is called only via the IS_NEWLINE macro, which does so only
|
||||||
|
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
|
||||||
|
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit
|
||||||
|
pointed to by ptr is less than the end of the string.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
ptr pointer to possible newline
|
||||||
|
type the newline type
|
||||||
|
endptr pointer to the end of the string
|
||||||
|
lenptr where to return the length
|
||||||
|
utf TRUE if in utf mode
|
||||||
|
|
||||||
|
Returns: TRUE or FALSE
|
||||||
|
*/
|
||||||
|
|
||||||
|
BOOL
|
||||||
|
PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
|
||||||
|
uint32_t *lenptr, BOOL utf)
|
||||||
|
{
|
||||||
|
uint32_t c;
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf) { GETCHAR(c, ptr); } else c = *ptr;
|
||||||
|
#else
|
||||||
|
(void)utf;
|
||||||
|
c = *ptr;
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||||
|
{
|
||||||
|
case CHAR_LF:
|
||||||
|
*lenptr = 1;
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
case CHAR_CR:
|
||||||
|
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* NLTYPE_ANY */
|
||||||
|
|
||||||
|
else switch(c)
|
||||||
|
{
|
||||||
|
#ifdef EBCDIC
|
||||||
|
case CHAR_NEL:
|
||||||
|
#endif
|
||||||
|
case CHAR_LF:
|
||||||
|
case CHAR_VT:
|
||||||
|
case CHAR_FF:
|
||||||
|
*lenptr = 1;
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
case CHAR_CR:
|
||||||
|
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
#ifndef EBCDIC
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
case CHAR_NEL:
|
||||||
|
*lenptr = utf? 2 : 1;
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
case 0x2028: /* LS */
|
||||||
|
case 0x2029: /* PS */
|
||||||
|
*lenptr = 3;
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
#else /* 16-bit or 32-bit code units */
|
||||||
|
case CHAR_NEL:
|
||||||
|
case 0x2028: /* LS */
|
||||||
|
case 0x2029: /* PS */
|
||||||
|
*lenptr = 1;
|
||||||
|
return TRUE;
|
||||||
|
#endif
|
||||||
|
#endif /* Not EBCDIC */
|
||||||
|
|
||||||
|
default:
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Check for newline at previous position *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function is called only via the WAS_NEWLINE macro, which does so only
|
||||||
|
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
|
||||||
|
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the initial
|
||||||
|
value of ptr is greater than the start of the string that is being processed.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
ptr pointer to possible newline
|
||||||
|
type the newline type
|
||||||
|
startptr pointer to the start of the string
|
||||||
|
lenptr where to return the length
|
||||||
|
utf TRUE if in utf mode
|
||||||
|
|
||||||
|
Returns: TRUE or FALSE
|
||||||
|
*/
|
||||||
|
|
||||||
|
BOOL
|
||||||
|
PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
|
||||||
|
uint32_t *lenptr, BOOL utf)
|
||||||
|
{
|
||||||
|
uint32_t c;
|
||||||
|
ptr--;
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf)
|
||||||
|
{
|
||||||
|
BACKCHAR(ptr);
|
||||||
|
GETCHAR(c, ptr);
|
||||||
|
}
|
||||||
|
else c = *ptr;
|
||||||
|
#else
|
||||||
|
(void)utf;
|
||||||
|
c = *ptr;
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||||
|
{
|
||||||
|
case CHAR_LF:
|
||||||
|
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
case CHAR_CR:
|
||||||
|
*lenptr = 1;
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* NLTYPE_ANY */
|
||||||
|
|
||||||
|
else switch(c)
|
||||||
|
{
|
||||||
|
case CHAR_LF:
|
||||||
|
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
#ifdef EBCDIC
|
||||||
|
case CHAR_NEL:
|
||||||
|
#endif
|
||||||
|
case CHAR_VT:
|
||||||
|
case CHAR_FF:
|
||||||
|
case CHAR_CR:
|
||||||
|
*lenptr = 1;
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
#ifndef EBCDIC
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
case CHAR_NEL:
|
||||||
|
*lenptr = utf? 2 : 1;
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
case 0x2028: /* LS */
|
||||||
|
case 0x2029: /* PS */
|
||||||
|
*lenptr = 3;
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
#else /* 16-bit or 32-bit code units */
|
||||||
|
case CHAR_NEL:
|
||||||
|
case 0x2028: /* LS */
|
||||||
|
case 0x2029: /* PS */
|
||||||
|
*lenptr = 1;
|
||||||
|
return TRUE;
|
||||||
|
#endif
|
||||||
|
#endif /* Not EBCDIC */
|
||||||
|
|
||||||
|
default:
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of pcre2_newline.c */
|
||||||
120
3rd/pcre2/src/pcre2_ord2utf.c
Normal file
120
3rd/pcre2/src/pcre2_ord2utf.c
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
/*************************************************
|
||||||
|
* Perl-Compatible Regular Expressions *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
New API code Copyright (c) 2016 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the University of Cambridge nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* This file contains a function that converts a Unicode character code point
|
||||||
|
into a UTF string. The behaviour is different for each code unit width. */
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "pcre2_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
/* If SUPPORT_UNICODE is not defined, this function will never be called.
|
||||||
|
Supply a dummy function because some compilers do not like empty source
|
||||||
|
modules. */
|
||||||
|
|
||||||
|
#ifndef SUPPORT_UNICODE
|
||||||
|
unsigned int
|
||||||
|
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
|
||||||
|
{
|
||||||
|
(void)(cvalue);
|
||||||
|
(void)(buffer);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#else /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Convert code point to UTF *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/*
|
||||||
|
Arguments:
|
||||||
|
cvalue the character value
|
||||||
|
buffer pointer to buffer for result
|
||||||
|
|
||||||
|
Returns: number of code units placed in the buffer
|
||||||
|
*/
|
||||||
|
|
||||||
|
unsigned int
|
||||||
|
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
|
||||||
|
{
|
||||||
|
/* Convert to UTF-8 */
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < PRIV(utf8_table1_size); i++)
|
||||||
|
if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
|
||||||
|
buffer += i;
|
||||||
|
for (j = i; j > 0; j--)
|
||||||
|
{
|
||||||
|
*buffer-- = 0x80 | (cvalue & 0x3f);
|
||||||
|
cvalue >>= 6;
|
||||||
|
}
|
||||||
|
*buffer = PRIV(utf8_table2)[i] | cvalue;
|
||||||
|
return i + 1;
|
||||||
|
|
||||||
|
/* Convert to UTF-16 */
|
||||||
|
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
if (cvalue <= 0xffff)
|
||||||
|
{
|
||||||
|
*buffer = (PCRE2_UCHAR)cvalue;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
cvalue -= 0x10000;
|
||||||
|
*buffer++ = 0xd800 | (cvalue >> 10);
|
||||||
|
*buffer = 0xdc00 | (cvalue & 0x3ff);
|
||||||
|
return 2;
|
||||||
|
|
||||||
|
/* Convert to UTF-32 */
|
||||||
|
|
||||||
|
#else
|
||||||
|
*buffer = (PCRE2_UCHAR)cvalue;
|
||||||
|
return 1;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
/* End of pcre2_ord2utf.c */
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user